{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Mediation analysis with duration data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook demonstrates mediation analysis when the\n", "mediator and outcome are duration variables, modeled\n", "using proportional hazards regression. These examples\n", "are based on simulated data." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2021-02-02T06:54:52.860135Z", "iopub.status.busy": "2021-02-02T06:54:52.859438Z", "iopub.status.idle": "2021-02-02T06:54:54.192271Z", "shell.execute_reply": "2021-02-02T06:54:54.193268Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import statsmodels.api as sm\n", "from statsmodels.stats.mediation import Mediation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Make the notebook reproducible." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2021-02-02T06:54:54.197624Z", "iopub.status.busy": "2021-02-02T06:54:54.196935Z", "iopub.status.idle": "2021-02-02T06:54:54.201181Z", "shell.execute_reply": "2021-02-02T06:54:54.201567Z" } }, "outputs": [], "source": [ "np.random.seed(3424)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Specify a sample size." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2021-02-02T06:54:54.205929Z", "iopub.status.busy": "2021-02-02T06:54:54.204254Z", "iopub.status.idle": "2021-02-02T06:54:54.210546Z", "shell.execute_reply": "2021-02-02T06:54:54.211770Z" } }, "outputs": [], "source": [ "n = 1000" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Generate an exposure variable." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2021-02-02T06:54:54.218504Z", "iopub.status.busy": "2021-02-02T06:54:54.215291Z", "iopub.status.idle": "2021-02-02T06:54:54.222004Z", "shell.execute_reply": "2021-02-02T06:54:54.223544Z" }, "lines_to_next_cell": 1 }, "outputs": [], "source": [ "exp = np.random.normal(size=n)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Generate a mediator variable." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2021-02-02T06:54:54.233103Z", "iopub.status.busy": "2021-02-02T06:54:54.232145Z", "iopub.status.idle": "2021-02-02T06:54:54.236600Z", "shell.execute_reply": "2021-02-02T06:54:54.237795Z" }, "lines_to_next_cell": 1 }, "outputs": [], "source": [ "def gen_mediator():\n", " mn = np.exp(exp)\n", " mtime0 = -mn * np.log(np.random.uniform(size=n))\n", " ctime = -2 * mn * np.log(np.random.uniform(size=n))\n", " mstatus = (ctime >= mtime0).astype(np.int)\n", " mtime = np.where(mtime0 <= ctime, mtime0, ctime)\n", " return mtime0, mtime, mstatus" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Generate an outcome variable." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2021-02-02T06:54:54.247705Z", "iopub.status.busy": "2021-02-02T06:54:54.241267Z", "iopub.status.idle": "2021-02-02T06:54:54.251260Z", "shell.execute_reply": "2021-02-02T06:54:54.252485Z" }, "lines_to_next_cell": 1 }, "outputs": [], "source": [ "def gen_outcome(otype, mtime0):\n", " if otype == \"full\":\n", " lp = 0.5*mtime0\n", " elif otype == \"no\":\n", " lp = exp\n", " else:\n", " lp = exp + mtime0\n", " mn = np.exp(-lp)\n", " ytime0 = -mn * np.log(np.random.uniform(size=n))\n", " ctime = -2 * mn * np.log(np.random.uniform(size=n))\n", " ystatus = (ctime >= ytime0).astype(np.int)\n", " ytime = np.where(ytime0 <= ctime, ytime0, ctime)\n", " return ytime, ystatus" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Build a dataframe containing all the relevant variables." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2021-02-02T06:54:54.257909Z", "iopub.status.busy": "2021-02-02T06:54:54.256246Z", "iopub.status.idle": "2021-02-02T06:54:54.263636Z", "shell.execute_reply": "2021-02-02T06:54:54.264850Z" }, "lines_to_next_cell": 1 }, "outputs": [], "source": [ "def build_df(ytime, ystatus, mtime0, mtime, mstatus):\n", " df = pd.DataFrame({\"ytime\": ytime, \"ystatus\": ystatus,\n", " \"mtime\": mtime, \"mstatus\": mstatus,\n", " \"exp\": exp})\n", " return df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run the full simulation and analysis, under a particular\n", "population structure of mediation." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2021-02-02T06:54:54.270290Z", "iopub.status.busy": "2021-02-02T06:54:54.268593Z", "iopub.status.idle": "2021-02-02T06:54:54.278241Z", "shell.execute_reply": "2021-02-02T06:54:54.279492Z" }, "lines_to_next_cell": 1 }, "outputs": [], "source": [ "def run(otype):\n", "\n", " mtime0, mtime, mstatus = gen_mediator()\n", " ytime, ystatus = gen_outcome(otype, mtime0)\n", " df = build_df(ytime, ystatus, mtime0, mtime, mstatus)\n", "\n", " outcome_model = sm.PHReg.from_formula(\"ytime ~ exp + mtime\", status=\"ystatus\", data=df)\n", " mediator_model = sm.PHReg.from_formula(\"mtime ~ exp\", status=\"mstatus\", data=df)\n", "\n", " med = Mediation(outcome_model, mediator_model, \"exp\", \"mtime\",\n", " outcome_predict_kwargs={\"pred_only\": True})\n", " med_result = med.fit(n_rep=20)\n", " print(med_result.summary())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run the example with full mediation" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2021-02-02T06:54:54.284932Z", "iopub.status.busy": "2021-02-02T06:54:54.283253Z", "iopub.status.idle": "2021-02-02T06:55:01.848830Z", "shell.execute_reply": "2021-02-02T06:55:01.849452Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Estimate Lower CI bound Upper CI bound P-value\n", "ACME (control) 0.742427 0.643339 0.862745 0.0\n", "ACME (treated) 0.742427 0.643339 0.862745 0.0\n", "ADE (control) 0.073017 -0.016189 0.155321 0.1\n", "ADE (treated) 0.073017 -0.016189 0.155321 0.1\n", "Total effect 0.815444 0.675214 0.919580 0.0\n", "Prop. mediated (control) 0.912695 0.814965 1.025747 0.0\n", "Prop. mediated (treated) 0.912695 0.814965 1.025747 0.0\n", "ACME (average) 0.742427 0.643339 0.862745 0.0\n", "ADE (average) 0.073017 -0.016189 0.155321 0.1\n", "Prop. mediated (average) 0.912695 0.814965 1.025747 0.0\n" ] } ], "source": [ "run(\"full\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run the example with partial mediation" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2021-02-02T06:55:01.853836Z", "iopub.status.busy": "2021-02-02T06:55:01.852873Z", "iopub.status.idle": "2021-02-02T06:55:09.960460Z", "shell.execute_reply": "2021-02-02T06:55:09.961034Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Estimate Lower CI bound Upper CI bound P-value\n", "ACME (control) 0.987067 0.801560 1.192019 0.0\n", "ACME (treated) 0.987067 0.801560 1.192019 0.0\n", "ADE (control) 1.071734 0.964214 1.150352 0.0\n", "ADE (treated) 1.071734 0.964214 1.150352 0.0\n", "Total effect 2.058801 1.862231 2.288170 0.0\n", "Prop. mediated (control) 0.481807 0.417501 0.533773 0.0\n", "Prop. mediated (treated) 0.481807 0.417501 0.533773 0.0\n", "ACME (average) 0.987067 0.801560 1.192019 0.0\n", "ADE (average) 1.071734 0.964214 1.150352 0.0\n", "Prop. mediated (average) 0.481807 0.417501 0.533773 0.0\n" ] } ], "source": [ "run(\"partial\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run the example with no mediation" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2021-02-02T06:55:09.966237Z", "iopub.status.busy": "2021-02-02T06:55:09.965425Z", "iopub.status.idle": "2021-02-02T06:55:16.948160Z", "shell.execute_reply": "2021-02-02T06:55:16.947766Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Estimate Lower CI bound Upper CI bound P-value\n", "ACME (control) 0.010200 -0.039434 0.065176 1.0\n", "ACME (treated) 0.010200 -0.039434 0.065176 1.0\n", "ADE (control) 0.902295 0.824526 0.984934 0.0\n", "ADE (treated) 0.902295 0.824526 0.984934 0.0\n", "Total effect 0.912495 0.834728 1.009958 0.0\n", "Prop. mediated (control) 0.003763 -0.044186 0.065520 1.0\n", "Prop. mediated (treated) 0.003763 -0.044186 0.065520 1.0\n", "ACME (average) 0.010200 -0.039434 0.065176 1.0\n", "ADE (average) 0.902295 0.824526 0.984934 0.0\n", "Prop. mediated (average) 0.003763 -0.044186 0.065520 1.0\n" ] } ], "source": [ "run(\"no\")" ] } ], "metadata": { "jupytext": { "cell_metadata_filter": "-all", "main_language": "python", "notebook_metadata_filter": "-all" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9" } }, "nbformat": 4, "nbformat_minor": 2 }