{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Mediation analysis with duration data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook demonstrates mediation analysis when the\n", "mediator and outcome are duration variables, modeled\n", "using proportional hazards regression. These examples\n", "are based on simulated data." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2024-03-18T09:25:22.347861Z", "iopub.status.busy": "2024-03-18T09:25:22.347571Z", "iopub.status.idle": "2024-03-18T09:25:24.943795Z", "shell.execute_reply": "2024-03-18T09:25:24.943092Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import statsmodels.api as sm\n", "from statsmodels.stats.mediation import Mediation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Make the notebook reproducible." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2024-03-18T09:25:24.951097Z", "iopub.status.busy": "2024-03-18T09:25:24.949068Z", "iopub.status.idle": "2024-03-18T09:25:24.956385Z", "shell.execute_reply": "2024-03-18T09:25:24.955791Z" } }, "outputs": [], "source": [ "np.random.seed(3424)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Specify a sample size." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2024-03-18T09:25:24.962083Z", "iopub.status.busy": "2024-03-18T09:25:24.960315Z", "iopub.status.idle": "2024-03-18T09:25:24.966871Z", "shell.execute_reply": "2024-03-18T09:25:24.966286Z" } }, "outputs": [], "source": [ "n = 1000" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Generate an exposure variable." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2024-03-18T09:25:24.972795Z", "iopub.status.busy": "2024-03-18T09:25:24.970884Z", "iopub.status.idle": "2024-03-18T09:25:24.978047Z", "shell.execute_reply": "2024-03-18T09:25:24.977433Z" }, "lines_to_next_cell": 1 }, "outputs": [], "source": [ "exp = np.random.normal(size=n)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Generate a mediator variable." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2024-03-18T09:25:24.983376Z", "iopub.status.busy": "2024-03-18T09:25:24.981605Z", "iopub.status.idle": "2024-03-18T09:25:24.991385Z", "shell.execute_reply": "2024-03-18T09:25:24.990792Z" }, "lines_to_next_cell": 1 }, "outputs": [], "source": [ "def gen_mediator():\n", " mn = np.exp(exp)\n", " mtime0 = -mn * np.log(np.random.uniform(size=n))\n", " ctime = -2 * mn * np.log(np.random.uniform(size=n))\n", " mstatus = (ctime >= mtime0).astype(int)\n", " mtime = np.where(mtime0 <= ctime, mtime0, ctime)\n", " return mtime0, mtime, mstatus" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Generate an outcome variable." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2024-03-18T09:25:24.997047Z", "iopub.status.busy": "2024-03-18T09:25:24.995301Z", "iopub.status.idle": "2024-03-18T09:25:25.012822Z", "shell.execute_reply": "2024-03-18T09:25:25.012224Z" }, "lines_to_next_cell": 1 }, "outputs": [], "source": [ "def gen_outcome(otype, mtime0):\n", " if otype == \"full\":\n", " lp = 0.5 * mtime0\n", " elif otype == \"no\":\n", " lp = exp\n", " else:\n", " lp = exp + mtime0\n", " mn = np.exp(-lp)\n", " ytime0 = -mn * np.log(np.random.uniform(size=n))\n", " ctime = -2 * mn * np.log(np.random.uniform(size=n))\n", " ystatus = (ctime >= ytime0).astype(int)\n", " ytime = np.where(ytime0 <= ctime, ytime0, ctime)\n", " return ytime, ystatus" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Build a dataframe containing all the relevant variables." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2024-03-18T09:25:25.017901Z", "iopub.status.busy": "2024-03-18T09:25:25.016314Z", "iopub.status.idle": "2024-03-18T09:25:25.029911Z", "shell.execute_reply": "2024-03-18T09:25:25.029280Z" }, "lines_to_next_cell": 1 }, "outputs": [], "source": [ "def build_df(ytime, ystatus, mtime0, mtime, mstatus):\n", " df = pd.DataFrame(\n", " {\n", " \"ytime\": ytime,\n", " \"ystatus\": ystatus,\n", " \"mtime\": mtime,\n", " \"mstatus\": mstatus,\n", " \"exp\": exp,\n", " }\n", " )\n", " return df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run the full simulation and analysis, under a particular\n", "population structure of mediation." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2024-03-18T09:25:25.035754Z", "iopub.status.busy": "2024-03-18T09:25:25.034005Z", "iopub.status.idle": "2024-03-18T09:25:25.045343Z", "shell.execute_reply": "2024-03-18T09:25:25.044527Z" }, "lines_to_next_cell": 1 }, "outputs": [], "source": [ "def run(otype):\n", "\n", " mtime0, mtime, mstatus = gen_mediator()\n", " ytime, ystatus = gen_outcome(otype, mtime0)\n", " df = build_df(ytime, ystatus, mtime0, mtime, mstatus)\n", "\n", " outcome_model = sm.PHReg.from_formula(\n", " \"ytime ~ exp + mtime\", status=\"ystatus\", data=df\n", " )\n", " mediator_model = sm.PHReg.from_formula(\"mtime ~ exp\", status=\"mstatus\", data=df)\n", "\n", " med = Mediation(\n", " outcome_model,\n", " mediator_model,\n", " \"exp\",\n", " \"mtime\",\n", " outcome_predict_kwargs={\"pred_only\": True},\n", " )\n", " med_result = med.fit(n_rep=20)\n", " print(med_result.summary())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run the example with full mediation" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2024-03-18T09:25:25.055484Z", "iopub.status.busy": "2024-03-18T09:25:25.053888Z", "iopub.status.idle": "2024-03-18T09:25:36.719780Z", "shell.execute_reply": "2024-03-18T09:25:36.719009Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Estimate Lower CI bound Upper CI bound P-value\n", "ACME (control) 0.742427 0.643339 0.862745 0.0\n", "ACME (treated) 0.742427 0.643339 0.862745 0.0\n", "ADE (control) 0.073017 -0.016189 0.155321 0.1\n", "ADE (treated) 0.073017 -0.016189 0.155321 0.1\n", "Total effect 0.815444 0.675214 0.919580 0.0\n", "Prop. mediated (control) 0.912695 0.814965 1.025747 0.0\n", "Prop. mediated (treated) 0.912695 0.814965 1.025747 0.0\n", "ACME (average) 0.742427 0.643339 0.862745 0.0\n", "ADE (average) 0.073017 -0.016189 0.155321 0.1\n", "Prop. mediated (average) 0.912695 0.814965 1.025747 0.0\n" ] } ], "source": [ "run(\"full\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run the example with partial mediation" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2024-03-18T09:25:36.728469Z", "iopub.status.busy": "2024-03-18T09:25:36.727699Z", "iopub.status.idle": "2024-03-18T09:25:48.494787Z", "shell.execute_reply": "2024-03-18T09:25:48.493973Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Estimate Lower CI bound Upper CI bound P-value\n", "ACME (control) 0.987067 0.801560 1.192019 0.0\n", "ACME (treated) 0.987067 0.801560 1.192019 0.0\n", "ADE (control) 1.071734 0.964214 1.150352 0.0\n", "ADE (treated) 1.071734 0.964214 1.150352 0.0\n", "Total effect 2.058801 1.862231 2.288170 0.0\n", "Prop. mediated (control) 0.481807 0.417501 0.533773 0.0\n", "Prop. mediated (treated) 0.481807 0.417501 0.533773 0.0\n", "ACME (average) 0.987067 0.801560 1.192019 0.0\n", "ADE (average) 1.071734 0.964214 1.150352 0.0\n", "Prop. mediated (average) 0.481807 0.417501 0.533773 0.0\n" ] } ], "source": [ "run(\"partial\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run the example with no mediation" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2024-03-18T09:25:48.499112Z", "iopub.status.busy": "2024-03-18T09:25:48.498362Z", "iopub.status.idle": "2024-03-18T09:25:59.417637Z", "shell.execute_reply": "2024-03-18T09:25:59.416931Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Estimate Lower CI bound Upper CI bound P-value\n", "ACME (control) 0.010200 -0.039434 0.065176 1.0\n", "ACME (treated) 0.010200 -0.039434 0.065176 1.0\n", "ADE (control) 0.902295 0.824526 0.984934 0.0\n", "ADE (treated) 0.902295 0.824526 0.984934 0.0\n", "Total effect 0.912495 0.834728 1.009958 0.0\n", "Prop. mediated (control) 0.003763 -0.044186 0.065520 1.0\n", "Prop. mediated (treated) 0.003763 -0.044186 0.065520 1.0\n", "ACME (average) 0.010200 -0.039434 0.065176 1.0\n", "ADE (average) 0.902295 0.824526 0.984934 0.0\n", "Prop. mediated (average) 0.003763 -0.044186 0.065520 1.0\n" ] } ], "source": [ "run(\"no\")" ] } ], "metadata": { "jupytext": { "cell_metadata_filter": "-all", "main_language": "python", "notebook_metadata_filter": "-all" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 4 }