{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Mediation analysis with duration data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook demonstrates mediation analysis when the\n",
    "mediator and outcome are duration variables, modeled\n",
    "using proportional hazards regression.  These examples\n",
    "are based on simulated data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-07-29T17:38:51.645311Z",
     "iopub.status.busy": "2026-07-29T17:38:51.645062Z",
     "iopub.status.idle": "2026-07-29T17:38:53.798908Z",
     "shell.execute_reply": "2026-07-29T17:38:53.797686Z"
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "import statsmodels.api as sm\n",
    "from statsmodels.stats.mediation import Mediation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Make the notebook reproducible."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-07-29T17:38:53.801378Z",
     "iopub.status.busy": "2026-07-29T17:38:53.800927Z",
     "iopub.status.idle": "2026-07-29T17:38:53.808025Z",
     "shell.execute_reply": "2026-07-29T17:38:53.807147Z"
    }
   },
   "outputs": [],
   "source": [
    "rs = np.random.default_rng(3424)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Specify a sample size."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-07-29T17:38:53.810215Z",
     "iopub.status.busy": "2026-07-29T17:38:53.809923Z",
     "iopub.status.idle": "2026-07-29T17:38:53.816824Z",
     "shell.execute_reply": "2026-07-29T17:38:53.815800Z"
    }
   },
   "outputs": [],
   "source": [
    "n = 1000"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Generate an exposure variable."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-07-29T17:38:53.821207Z",
     "iopub.status.busy": "2026-07-29T17:38:53.820731Z",
     "iopub.status.idle": "2026-07-29T17:38:53.826094Z",
     "shell.execute_reply": "2026-07-29T17:38:53.825325Z"
    },
    "lines_to_next_cell": 1
   },
   "outputs": [],
   "source": [
    "exp = rs.normal(size=n)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Generate a mediator variable."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-07-29T17:38:53.828843Z",
     "iopub.status.busy": "2026-07-29T17:38:53.828579Z",
     "iopub.status.idle": "2026-07-29T17:38:53.843232Z",
     "shell.execute_reply": "2026-07-29T17:38:53.840806Z"
    },
    "lines_to_next_cell": 1
   },
   "outputs": [],
   "source": [
    "def gen_mediator():\n",
    "    mn = np.exp(exp)\n",
    "    mtime0 = -mn * np.log(rs.uniform(size=n))\n",
    "    ctime = -2 * mn * np.log(rs.uniform(size=n))\n",
    "    mstatus = (ctime >= mtime0).astype(int)\n",
    "    mtime = np.where(mtime0 <= ctime, mtime0, ctime)\n",
    "    return mtime0, mtime, mstatus"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Generate an outcome variable."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-07-29T17:38:53.846108Z",
     "iopub.status.busy": "2026-07-29T17:38:53.845847Z",
     "iopub.status.idle": "2026-07-29T17:38:53.859485Z",
     "shell.execute_reply": "2026-07-29T17:38:53.854068Z"
    },
    "lines_to_next_cell": 1
   },
   "outputs": [],
   "source": [
    "def gen_outcome(otype, mtime0):\n",
    "    if otype == \"full\":\n",
    "        lp = 0.5 * mtime0\n",
    "    elif otype == \"no\":\n",
    "        lp = exp\n",
    "    else:\n",
    "        lp = exp + mtime0\n",
    "    mn = np.exp(-lp)\n",
    "    ytime0 = -mn * np.log(rs.uniform(size=n))\n",
    "    ctime = -2 * mn * np.log(rs.uniform(size=n))\n",
    "    ystatus = (ctime >= ytime0).astype(int)\n",
    "    ytime = np.where(ytime0 <= ctime, ytime0, ctime)\n",
    "    return ytime, ystatus"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Build a dataframe containing all the relevant variables."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-07-29T17:38:53.861790Z",
     "iopub.status.busy": "2026-07-29T17:38:53.861530Z",
     "iopub.status.idle": "2026-07-29T17:38:53.871238Z",
     "shell.execute_reply": "2026-07-29T17:38:53.868807Z"
    },
    "lines_to_next_cell": 1
   },
   "outputs": [],
   "source": [
    "def build_df(ytime, ystatus, mtime0, mtime, mstatus):\n",
    "    df = pd.DataFrame(\n",
    "        {\n",
    "            \"ytime\": ytime,\n",
    "            \"ystatus\": ystatus,\n",
    "            \"mtime\": mtime,\n",
    "            \"mstatus\": mstatus,\n",
    "            \"exp\": exp,\n",
    "        }\n",
    "    )\n",
    "    return df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run the full simulation and analysis, under a particular\n",
    "population structure of mediation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-07-29T17:38:53.873418Z",
     "iopub.status.busy": "2026-07-29T17:38:53.873109Z",
     "iopub.status.idle": "2026-07-29T17:38:53.886649Z",
     "shell.execute_reply": "2026-07-29T17:38:53.883800Z"
    },
    "lines_to_next_cell": 1
   },
   "outputs": [],
   "source": [
    "def run(otype):\n",
    "\n",
    "    mtime0, mtime, mstatus = gen_mediator()\n",
    "    ytime, ystatus = gen_outcome(otype, mtime0)\n",
    "    df = build_df(ytime, ystatus, mtime0, mtime, mstatus)\n",
    "\n",
    "    outcome_model = sm.PHReg.from_formula(\n",
    "        \"ytime ~ exp + mtime\", status=\"ystatus\", data=df\n",
    "    )\n",
    "    mediator_model = sm.PHReg.from_formula(\"mtime ~ exp\", status=\"mstatus\", data=df)\n",
    "\n",
    "    med = Mediation(\n",
    "        outcome_model,\n",
    "        mediator_model,\n",
    "        \"exp\",\n",
    "        \"mtime\",\n",
    "        outcome_predict_kwargs={\"pred_only\": True},\n",
    "    )\n",
    "    med_result = med.fit(n_rep=20, rng=rs)\n",
    "    print(med_result.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run the example with full mediation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-07-29T17:38:53.889038Z",
     "iopub.status.busy": "2026-07-29T17:38:53.888766Z",
     "iopub.status.idle": "2026-07-29T17:39:00.309288Z",
     "shell.execute_reply": "2026-07-29T17:39:00.308005Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                          Estimate  Lower CI bound  Upper CI bound  P-value\n",
      "ACME (control)            0.783217        0.669836        0.901194      0.0\n",
      "ACME (treated)            0.783217        0.669836        0.901194      0.0\n",
      "ADE (control)             0.025815       -0.070741        0.111009      0.5\n",
      "ADE (treated)             0.025815       -0.070741        0.111009      0.5\n",
      "Total effect              0.809033        0.701157        0.924868      0.0\n",
      "Prop. mediated (control)  0.963520        0.870958        1.087129      0.0\n",
      "Prop. mediated (treated)  0.963520        0.870958        1.087129      0.0\n",
      "ACME (average)            0.783217        0.669836        0.901194      0.0\n",
      "ADE (average)             0.025815       -0.070741        0.111009      0.5\n",
      "Prop. mediated (average)  0.963520        0.870958        1.087129      0.0\n"
     ]
    }
   ],
   "source": [
    "run(\"full\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run the example with partial mediation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-07-29T17:39:00.312782Z",
     "iopub.status.busy": "2026-07-29T17:39:00.312449Z",
     "iopub.status.idle": "2026-07-29T17:39:07.089716Z",
     "shell.execute_reply": "2026-07-29T17:39:07.088932Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                          Estimate  Lower CI bound  Upper CI bound  P-value\n",
      "ACME (control)            1.202634        1.041637        1.492226      0.0\n",
      "ACME (treated)            1.202634        1.041637        1.492226      0.0\n",
      "ADE (control)             0.960218        0.858490        1.086519      0.0\n",
      "ADE (treated)             0.960218        0.858490        1.086519      0.0\n",
      "Total effect              2.162852        2.015172        2.425144      0.0\n",
      "Prop. mediated (control)  0.555877        0.497044        0.628792      0.0\n",
      "Prop. mediated (treated)  0.555877        0.497044        0.628792      0.0\n",
      "ACME (average)            1.202634        1.041637        1.492226      0.0\n",
      "ADE (average)             0.960218        0.858490        1.086519      0.0\n",
      "Prop. mediated (average)  0.555877        0.497044        0.628792      0.0\n"
     ]
    }
   ],
   "source": [
    "run(\"partial\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run the example with no mediation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-07-29T17:39:07.096356Z",
     "iopub.status.busy": "2026-07-29T17:39:07.093521Z",
     "iopub.status.idle": "2026-07-29T17:39:13.970618Z",
     "shell.execute_reply": "2026-07-29T17:39:13.967521Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                          Estimate  Lower CI bound  Upper CI bound  P-value\n",
      "ACME (control)            0.028193       -0.056908        0.094842      0.5\n",
      "ACME (treated)            0.028193       -0.056908        0.094842      0.5\n",
      "ADE (control)             0.937539        0.872684        1.036476      0.0\n",
      "ADE (treated)             0.937539        0.872684        1.036476      0.0\n",
      "Total effect              0.965732        0.874654        1.100320      0.0\n",
      "Prop. mediated (control)  0.046258       -0.064971        0.097052      0.5\n",
      "Prop. mediated (treated)  0.046258       -0.064971        0.097052      0.5\n",
      "ACME (average)            0.028193       -0.056908        0.094842      0.5\n",
      "ADE (average)             0.937539        0.872684        1.036476      0.0\n",
      "Prop. mediated (average)  0.046258       -0.064971        0.097052      0.5\n"
     ]
    }
   ],
   "source": [
    "run(\"no\")"
   ]
  }
 ],
 "metadata": {
  "jupytext": {
   "cell_metadata_filter": "-all",
   "main_language": "python",
   "notebook_metadata_filter": "-all"
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.14.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}