{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Mediation analysis with duration data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook demonstrates mediation analysis when the\n",
    "mediator and outcome are duration variables, modeled\n",
    "using proportional hazards regression.  These examples\n",
    "are based on simulated data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-04-19T16:36:23.372609Z",
     "iopub.status.busy": "2024-04-19T16:36:23.372326Z",
     "iopub.status.idle": "2024-04-19T16:36:26.178879Z",
     "shell.execute_reply": "2024-04-19T16:36:26.178163Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import statsmodels.api as sm\n",
    "from statsmodels.stats.mediation import Mediation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Make the notebook reproducible."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-04-19T16:36:26.182517Z",
     "iopub.status.busy": "2024-04-19T16:36:26.181969Z",
     "iopub.status.idle": "2024-04-19T16:36:26.199584Z",
     "shell.execute_reply": "2024-04-19T16:36:26.199001Z"
    }
   },
   "outputs": [],
   "source": [
    "np.random.seed(3424)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Specify a sample size."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-04-19T16:36:26.202956Z",
     "iopub.status.busy": "2024-04-19T16:36:26.201873Z",
     "iopub.status.idle": "2024-04-19T16:36:26.206030Z",
     "shell.execute_reply": "2024-04-19T16:36:26.205471Z"
    }
   },
   "outputs": [],
   "source": [
    "n = 1000"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Generate an exposure variable."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-04-19T16:36:26.210159Z",
     "iopub.status.busy": "2024-04-19T16:36:26.209082Z",
     "iopub.status.idle": "2024-04-19T16:36:26.213412Z",
     "shell.execute_reply": "2024-04-19T16:36:26.212853Z"
    },
    "lines_to_next_cell": 1
   },
   "outputs": [],
   "source": [
    "exp = np.random.normal(size=n)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Generate a mediator variable."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-04-19T16:36:26.217436Z",
     "iopub.status.busy": "2024-04-19T16:36:26.216405Z",
     "iopub.status.idle": "2024-04-19T16:36:26.239551Z",
     "shell.execute_reply": "2024-04-19T16:36:26.238993Z"
    },
    "lines_to_next_cell": 1
   },
   "outputs": [],
   "source": [
    "def gen_mediator():\n",
    "    mn = np.exp(exp)\n",
    "    mtime0 = -mn * np.log(np.random.uniform(size=n))\n",
    "    ctime = -2 * mn * np.log(np.random.uniform(size=n))\n",
    "    mstatus = (ctime >= mtime0).astype(int)\n",
    "    mtime = np.where(mtime0 <= ctime, mtime0, ctime)\n",
    "    return mtime0, mtime, mstatus"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Generate an outcome variable."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-04-19T16:36:26.242612Z",
     "iopub.status.busy": "2024-04-19T16:36:26.242394Z",
     "iopub.status.idle": "2024-04-19T16:36:26.248407Z",
     "shell.execute_reply": "2024-04-19T16:36:26.247840Z"
    },
    "lines_to_next_cell": 1
   },
   "outputs": [],
   "source": [
    "def gen_outcome(otype, mtime0):\n",
    "    if otype == \"full\":\n",
    "        lp = 0.5 * mtime0\n",
    "    elif otype == \"no\":\n",
    "        lp = exp\n",
    "    else:\n",
    "        lp = exp + mtime0\n",
    "    mn = np.exp(-lp)\n",
    "    ytime0 = -mn * np.log(np.random.uniform(size=n))\n",
    "    ctime = -2 * mn * np.log(np.random.uniform(size=n))\n",
    "    ystatus = (ctime >= ytime0).astype(int)\n",
    "    ytime = np.where(ytime0 <= ctime, ytime0, ctime)\n",
    "    return ytime, ystatus"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Build a dataframe containing all the relevant variables."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-04-19T16:36:26.252475Z",
     "iopub.status.busy": "2024-04-19T16:36:26.251420Z",
     "iopub.status.idle": "2024-04-19T16:36:26.262721Z",
     "shell.execute_reply": "2024-04-19T16:36:26.262133Z"
    },
    "lines_to_next_cell": 1
   },
   "outputs": [],
   "source": [
    "def build_df(ytime, ystatus, mtime0, mtime, mstatus):\n",
    "    df = pd.DataFrame(\n",
    "        {\n",
    "            \"ytime\": ytime,\n",
    "            \"ystatus\": ystatus,\n",
    "            \"mtime\": mtime,\n",
    "            \"mstatus\": mstatus,\n",
    "            \"exp\": exp,\n",
    "        }\n",
    "    )\n",
    "    return df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run the full simulation and analysis, under a particular\n",
    "population structure of mediation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-04-19T16:36:26.266758Z",
     "iopub.status.busy": "2024-04-19T16:36:26.265641Z",
     "iopub.status.idle": "2024-04-19T16:36:26.278487Z",
     "shell.execute_reply": "2024-04-19T16:36:26.277898Z"
    },
    "lines_to_next_cell": 1
   },
   "outputs": [],
   "source": [
    "def run(otype):\n",
    "\n",
    "    mtime0, mtime, mstatus = gen_mediator()\n",
    "    ytime, ystatus = gen_outcome(otype, mtime0)\n",
    "    df = build_df(ytime, ystatus, mtime0, mtime, mstatus)\n",
    "\n",
    "    outcome_model = sm.PHReg.from_formula(\n",
    "        \"ytime ~ exp + mtime\", status=\"ystatus\", data=df\n",
    "    )\n",
    "    mediator_model = sm.PHReg.from_formula(\"mtime ~ exp\", status=\"mstatus\", data=df)\n",
    "\n",
    "    med = Mediation(\n",
    "        outcome_model,\n",
    "        mediator_model,\n",
    "        \"exp\",\n",
    "        \"mtime\",\n",
    "        outcome_predict_kwargs={\"pred_only\": True},\n",
    "    )\n",
    "    med_result = med.fit(n_rep=20)\n",
    "    print(med_result.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run the example with full mediation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-04-19T16:36:26.282419Z",
     "iopub.status.busy": "2024-04-19T16:36:26.281380Z",
     "iopub.status.idle": "2024-04-19T16:36:39.052871Z",
     "shell.execute_reply": "2024-04-19T16:36:39.052151Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                          Estimate  Lower CI bound  Upper CI bound  P-value\n",
      "ACME (control)            0.742427        0.643339        0.862745      0.0\n",
      "ACME (treated)            0.742427        0.643339        0.862745      0.0\n",
      "ADE (control)             0.073017       -0.016189        0.155321      0.1\n",
      "ADE (treated)             0.073017       -0.016189        0.155321      0.1\n",
      "Total effect              0.815444        0.675214        0.919580      0.0\n",
      "Prop. mediated (control)  0.912695        0.814965        1.025747      0.0\n",
      "Prop. mediated (treated)  0.912695        0.814965        1.025747      0.0\n",
      "ACME (average)            0.742427        0.643339        0.862745      0.0\n",
      "ADE (average)             0.073017       -0.016189        0.155321      0.1\n",
      "Prop. mediated (average)  0.912695        0.814965        1.025747      0.0\n"
     ]
    }
   ],
   "source": [
    "run(\"full\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run the example with partial mediation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-04-19T16:36:39.062376Z",
     "iopub.status.busy": "2024-04-19T16:36:39.057630Z",
     "iopub.status.idle": "2024-04-19T16:36:52.063069Z",
     "shell.execute_reply": "2024-04-19T16:36:52.062159Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                          Estimate  Lower CI bound  Upper CI bound  P-value\n",
      "ACME (control)            0.987067        0.801560        1.192019      0.0\n",
      "ACME (treated)            0.987067        0.801560        1.192019      0.0\n",
      "ADE (control)             1.071734        0.964214        1.150352      0.0\n",
      "ADE (treated)             1.071734        0.964214        1.150352      0.0\n",
      "Total effect              2.058801        1.862231        2.288170      0.0\n",
      "Prop. mediated (control)  0.481807        0.417501        0.533773      0.0\n",
      "Prop. mediated (treated)  0.481807        0.417501        0.533773      0.0\n",
      "ACME (average)            0.987067        0.801560        1.192019      0.0\n",
      "ADE (average)             1.071734        0.964214        1.150352      0.0\n",
      "Prop. mediated (average)  0.481807        0.417501        0.533773      0.0\n"
     ]
    }
   ],
   "source": [
    "run(\"partial\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run the example with no mediation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-04-19T16:36:52.071036Z",
     "iopub.status.busy": "2024-04-19T16:36:52.070731Z",
     "iopub.status.idle": "2024-04-19T16:37:04.830832Z",
     "shell.execute_reply": "2024-04-19T16:37:04.830135Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                          Estimate  Lower CI bound  Upper CI bound  P-value\n",
      "ACME (control)            0.010200       -0.039434        0.065176      1.0\n",
      "ACME (treated)            0.010200       -0.039434        0.065176      1.0\n",
      "ADE (control)             0.902295        0.824526        0.984934      0.0\n",
      "ADE (treated)             0.902295        0.824526        0.984934      0.0\n",
      "Total effect              0.912495        0.834728        1.009958      0.0\n",
      "Prop. mediated (control)  0.003763       -0.044186        0.065520      1.0\n",
      "Prop. mediated (treated)  0.003763       -0.044186        0.065520      1.0\n",
      "ACME (average)            0.010200       -0.039434        0.065176      1.0\n",
      "ADE (average)             0.902295        0.824526        0.984934      0.0\n",
      "Prop. mediated (average)  0.003763       -0.044186        0.065520      1.0\n"
     ]
    }
   ],
   "source": [
    "run(\"no\")"
   ]
  }
 ],
 "metadata": {
  "jupytext": {
   "cell_metadata_filter": "-all",
   "main_language": "python",
   "notebook_metadata_filter": "-all"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}