{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Mediation analysis with duration data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook demonstrates mediation analysis when the\n", "mediator and outcome are duration variables, modeled\n", "using proportional hazards regression. These examples\n", "are based on simulated data." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2024-04-19T16:36:23.372609Z", "iopub.status.busy": "2024-04-19T16:36:23.372326Z", "iopub.status.idle": "2024-04-19T16:36:26.178879Z", "shell.execute_reply": "2024-04-19T16:36:26.178163Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import statsmodels.api as sm\n", "from statsmodels.stats.mediation import Mediation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Make the notebook reproducible." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2024-04-19T16:36:26.182517Z", "iopub.status.busy": "2024-04-19T16:36:26.181969Z", "iopub.status.idle": "2024-04-19T16:36:26.199584Z", "shell.execute_reply": "2024-04-19T16:36:26.199001Z" } }, "outputs": [], "source": [ "np.random.seed(3424)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Specify a sample size." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2024-04-19T16:36:26.202956Z", "iopub.status.busy": "2024-04-19T16:36:26.201873Z", "iopub.status.idle": "2024-04-19T16:36:26.206030Z", "shell.execute_reply": "2024-04-19T16:36:26.205471Z" } }, "outputs": [], "source": [ "n = 1000" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Generate an exposure variable." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2024-04-19T16:36:26.210159Z", "iopub.status.busy": "2024-04-19T16:36:26.209082Z", "iopub.status.idle": "2024-04-19T16:36:26.213412Z", "shell.execute_reply": "2024-04-19T16:36:26.212853Z" }, "lines_to_next_cell": 1 }, "outputs": [], "source": [ "exp = np.random.normal(size=n)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Generate a mediator variable." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2024-04-19T16:36:26.217436Z", "iopub.status.busy": "2024-04-19T16:36:26.216405Z", "iopub.status.idle": "2024-04-19T16:36:26.239551Z", "shell.execute_reply": "2024-04-19T16:36:26.238993Z" }, "lines_to_next_cell": 1 }, "outputs": [], "source": [ "def gen_mediator():\n", " mn = np.exp(exp)\n", " mtime0 = -mn * np.log(np.random.uniform(size=n))\n", " ctime = -2 * mn * np.log(np.random.uniform(size=n))\n", " mstatus = (ctime >= mtime0).astype(int)\n", " mtime = np.where(mtime0 <= ctime, mtime0, ctime)\n", " return mtime0, mtime, mstatus" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Generate an outcome variable." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2024-04-19T16:36:26.242612Z", "iopub.status.busy": "2024-04-19T16:36:26.242394Z", "iopub.status.idle": "2024-04-19T16:36:26.248407Z", "shell.execute_reply": "2024-04-19T16:36:26.247840Z" }, "lines_to_next_cell": 1 }, "outputs": [], "source": [ "def gen_outcome(otype, mtime0):\n", " if otype == \"full\":\n", " lp = 0.5 * mtime0\n", " elif otype == \"no\":\n", " lp = exp\n", " else:\n", " lp = exp + mtime0\n", " mn = np.exp(-lp)\n", " ytime0 = -mn * np.log(np.random.uniform(size=n))\n", " ctime = -2 * mn * np.log(np.random.uniform(size=n))\n", " ystatus = (ctime >= ytime0).astype(int)\n", " ytime = np.where(ytime0 <= ctime, ytime0, ctime)\n", " return ytime, ystatus" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Build a dataframe containing all the relevant variables." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2024-04-19T16:36:26.252475Z", "iopub.status.busy": "2024-04-19T16:36:26.251420Z", "iopub.status.idle": "2024-04-19T16:36:26.262721Z", "shell.execute_reply": "2024-04-19T16:36:26.262133Z" }, "lines_to_next_cell": 1 }, "outputs": [], "source": [ "def build_df(ytime, ystatus, mtime0, mtime, mstatus):\n", " df = pd.DataFrame(\n", " {\n", " \"ytime\": ytime,\n", " \"ystatus\": ystatus,\n", " \"mtime\": mtime,\n", " \"mstatus\": mstatus,\n", " \"exp\": exp,\n", " }\n", " )\n", " return df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run the full simulation and analysis, under a particular\n", "population structure of mediation." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2024-04-19T16:36:26.266758Z", "iopub.status.busy": "2024-04-19T16:36:26.265641Z", "iopub.status.idle": "2024-04-19T16:36:26.278487Z", "shell.execute_reply": "2024-04-19T16:36:26.277898Z" }, "lines_to_next_cell": 1 }, "outputs": [], "source": [ "def run(otype):\n", "\n", " mtime0, mtime, mstatus = gen_mediator()\n", " ytime, ystatus = gen_outcome(otype, mtime0)\n", " df = build_df(ytime, ystatus, mtime0, mtime, mstatus)\n", "\n", " outcome_model = sm.PHReg.from_formula(\n", " \"ytime ~ exp + mtime\", status=\"ystatus\", data=df\n", " )\n", " mediator_model = sm.PHReg.from_formula(\"mtime ~ exp\", status=\"mstatus\", data=df)\n", "\n", " med = Mediation(\n", " outcome_model,\n", " mediator_model,\n", " \"exp\",\n", " \"mtime\",\n", " outcome_predict_kwargs={\"pred_only\": True},\n", " )\n", " med_result = med.fit(n_rep=20)\n", " print(med_result.summary())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run the example with full mediation" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2024-04-19T16:36:26.282419Z", "iopub.status.busy": "2024-04-19T16:36:26.281380Z", "iopub.status.idle": "2024-04-19T16:36:39.052871Z", "shell.execute_reply": "2024-04-19T16:36:39.052151Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Estimate Lower CI bound Upper CI bound P-value\n", "ACME (control) 0.742427 0.643339 0.862745 0.0\n", "ACME (treated) 0.742427 0.643339 0.862745 0.0\n", "ADE (control) 0.073017 -0.016189 0.155321 0.1\n", "ADE (treated) 0.073017 -0.016189 0.155321 0.1\n", "Total effect 0.815444 0.675214 0.919580 0.0\n", "Prop. mediated (control) 0.912695 0.814965 1.025747 0.0\n", "Prop. mediated (treated) 0.912695 0.814965 1.025747 0.0\n", "ACME (average) 0.742427 0.643339 0.862745 0.0\n", "ADE (average) 0.073017 -0.016189 0.155321 0.1\n", "Prop. mediated (average) 0.912695 0.814965 1.025747 0.0\n" ] } ], "source": [ "run(\"full\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run the example with partial mediation" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2024-04-19T16:36:39.062376Z", "iopub.status.busy": "2024-04-19T16:36:39.057630Z", "iopub.status.idle": "2024-04-19T16:36:52.063069Z", "shell.execute_reply": "2024-04-19T16:36:52.062159Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Estimate Lower CI bound Upper CI bound P-value\n", "ACME (control) 0.987067 0.801560 1.192019 0.0\n", "ACME (treated) 0.987067 0.801560 1.192019 0.0\n", "ADE (control) 1.071734 0.964214 1.150352 0.0\n", "ADE (treated) 1.071734 0.964214 1.150352 0.0\n", "Total effect 2.058801 1.862231 2.288170 0.0\n", "Prop. mediated (control) 0.481807 0.417501 0.533773 0.0\n", "Prop. mediated (treated) 0.481807 0.417501 0.533773 0.0\n", "ACME (average) 0.987067 0.801560 1.192019 0.0\n", "ADE (average) 1.071734 0.964214 1.150352 0.0\n", "Prop. mediated (average) 0.481807 0.417501 0.533773 0.0\n" ] } ], "source": [ "run(\"partial\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run the example with no mediation" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2024-04-19T16:36:52.071036Z", "iopub.status.busy": "2024-04-19T16:36:52.070731Z", "iopub.status.idle": "2024-04-19T16:37:04.830832Z", "shell.execute_reply": "2024-04-19T16:37:04.830135Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Estimate Lower CI bound Upper CI bound P-value\n", "ACME (control) 0.010200 -0.039434 0.065176 1.0\n", "ACME (treated) 0.010200 -0.039434 0.065176 1.0\n", "ADE (control) 0.902295 0.824526 0.984934 0.0\n", "ADE (treated) 0.902295 0.824526 0.984934 0.0\n", "Total effect 0.912495 0.834728 1.009958 0.0\n", "Prop. mediated (control) 0.003763 -0.044186 0.065520 1.0\n", "Prop. mediated (treated) 0.003763 -0.044186 0.065520 1.0\n", "ACME (average) 0.010200 -0.039434 0.065176 1.0\n", "ADE (average) 0.902295 0.824526 0.984934 0.0\n", "Prop. mediated (average) 0.003763 -0.044186 0.065520 1.0\n" ] } ], "source": [ "run(\"no\")" ] } ], "metadata": { "jupytext": { "cell_metadata_filter": "-all", "main_language": "python", "notebook_metadata_filter": "-all" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.14" } }, "nbformat": 4, "nbformat_minor": 4 }