Source code for statsmodels.graphics.agreement

'''
Bland-Altman mean-difference plots

Author: Joses Ho
License: BSD-3
'''

import numpy as np
from . import utils

[docs]def mean_diff_plot(m1, m2, sd_limit=1.96, ax=None, scatter_kwds=None, mean_line_kwds=None, limit_lines_kwds=None): """ Tukey's Mean Difference Plot. Tukey's Mean Difference Plot (also known as a Bland-Altman plot) is a graphical method to analyze the differences between two methods of measurement. The mean of the measures is plotted against their difference. For more information see https://en.wikipedia.org/wiki/Bland-Altman_plot Parameters ---------- m1, m2: pandas Series or array-like sd_limit : float, default 1.96 The limit of agreements expressed in terms of the standard deviation of the differences. If `md` is the mean of the differences, and `sd` is the standard deviation of those differences, then the limits of agreement that will be plotted will be md - sd_limit * sd, md + sd_limit * sd The default of 1.96 will produce 95% confidence intervals for the means of the differences. If sd_limit = 0, no limits will be plotted, and the ylimit of the plot defaults to 3 standard deviatons on either side of the mean. ax: matplotlib AxesSubplot instance, optional If `ax` is None, then a figure is created. If an axis instance is given, the mean difference plot is drawn on the axis. scatter_kwargs: keywords Options to to style the scatter plot. Accepts any keywords for the matplotlib Axes.scatter plotting method mean_line_kwds: keywords Options to to style the scatter plot. Accepts any keywords for the matplotlib Axes.axhline plotting method limit_lines_kwds: keywords Options to to style the scatter plot. Accepts any keywords for the matplotlib Axes.axhline plotting method Returns ------- fig : matplotlib Figure If `ax` is None, the created figure. Otherwise the figure to which `ax` is connected. References ---------- Bland JM, Altman DG (1986). "Statistical methods for assessing agreement between two methods of clinical measurement" Example -------- Load relevant libraries. >>> import statsmodels.api as sm >>> import numpy as np >>> import matplotlib.pyplot as plt Making a mean difference plot. >>> # Seed the random number generator. >>> # This ensures that the results below are reproducible. >>> np.random.seed(9999) >>> m1 = np.random.random(20) >>> m2 = np.random.random(20) >>> f, ax = plt.subplots(1, figsize = (8,5)) >>> sm.graphics.mean_diff_plot(m1, m2, ax = ax) >>> plt.show() .. plot:: plots/graphics-mean_diff_plot.py """ fig, ax = utils.create_mpl_ax(ax) if len(m1) != len(m2): raise ValueError('m1 does not have the same length as m2.') if sd_limit < 0: raise ValueError('sd_limit ({}) is less than 0.'.format(sd_limit)) means = np.mean([m1, m2], axis=0) diffs = m1 - m2 mean_diff = np.mean(diffs) std_diff = np.std(diffs, axis=0) scatter_kwds = scatter_kwds or {} if 's' not in scatter_kwds: scatter_kwds['s'] = 20 mean_line_kwds = mean_line_kwds or {} limit_lines_kwds = limit_lines_kwds or {} for kwds in [mean_line_kwds, limit_lines_kwds]: if 'color' not in kwds: kwds['color'] = 'gray' if 'linewidth' not in kwds: kwds['linewidth'] = 1 if 'linestyle' not in mean_line_kwds: kwds['linestyle'] = '--' if 'linestyle' not in limit_lines_kwds: kwds['linestyle'] = ':' ax.scatter(means, diffs, **scatter_kwds) # Plot the means against the diffs. ax.axhline(mean_diff, **mean_line_kwds) # draw mean line. # Annotate mean line with mean difference. ax.annotate('mean diff:\n{}'.format(np.round(mean_diff, 2)), xy=(0.99, 0.5), horizontalalignment='right', verticalalignment='center', fontsize=14, xycoords='axes fraction') if sd_limit > 0: half_ylim = (1.5 * sd_limit) * std_diff ax.set_ylim(mean_diff - half_ylim, mean_diff + half_ylim) limit_of_agreement = sd_limit * std_diff lower = mean_diff - limit_of_agreement upper = mean_diff + limit_of_agreement for j, lim in enumerate([lower, upper]): ax.axhline(lim, **limit_lines_kwds) ax.annotate('-SD{}: {}'.format(sd_limit, np.round(lower, 2)), xy=(0.99, 0.07), horizontalalignment='right', verticalalignment='bottom', fontsize=14, xycoords='axes fraction') ax.annotate('+SD{}: {}'.format(sd_limit, np.round(upper, 2)), xy=(0.99, 0.92), horizontalalignment='right', fontsize=14, xycoords='axes fraction') elif sd_limit == 0: half_ylim = 3 * std_diff ax.set_ylim(mean_diff - half_ylim, mean_diff + half_ylim) ax.set_ylabel('Difference', fontsize=15) ax.set_xlabel('Means', fontsize=15) ax.tick_params(labelsize=13) fig.tight_layout() return fig