Source code for statsmodels.othermod.betareg

"""
Beta regression for modeling rates and proportions.

References
----------
Grün, Bettina, Ioannis Kosmidis, and Achim Zeileis. Extended beta regression
in R: Shaken, stirred, mixed, and partitioned. No. 2011-22. Working Papers in
Economics and Statistics, 2011.

Smithson, Michael, and Jay Verkuilen. "A better lemon squeezer?
Maximum-likelihood regression with beta-distributed dependent variables."
Psychological methods 11.1 (2006): 54.
"""

import numpy as np
from scipy.special import gammaln as lgamma

from statsmodels.base.model import (
    GenericLikelihoodModel,
    GenericLikelihoodModelResults,
    _LLRMixin,
)
import statsmodels.base.wrapper as wrap
from statsmodels.formula._manager import FormulaManager
from statsmodels.formula.formulatools import advance_eval_env
from statsmodels.genmod import families
import statsmodels.regression.linear_model as lm
from statsmodels.tools.decorators import cache_readonly

LOGIT_LINK = families.links.Logit()
LOG_LINK = families.links.Log()

_init_example = """

    Beta regression with default of logit-link for exog and log-link
    for precision.

    >>> mod = BetaModel(endog, exog)
    >>> rslt = mod.fit()
    >>> print(rslt.summary())

    We can also specify a formula and a specific structure and use the
    identity-link for precision.

    >>> from sm.families.links import identity
    >>> Z = patsy.dmatrix('~ temp', dat, return_type='dataframe')
    >>> mod = BetaModel.from_formula('iyield ~ C(batch, Treatment(10)) + temp',
    ...                              dat, exog_precision=Z,
    ...                              link_precision=identity())

    In the case of proportion-data, we may think that the precision depends on
    the number of measurements. E.g for sequence data, on the number of
    sequence reads covering a site:

    >>> Z = patsy.dmatrix('~ coverage', df)
    >>> formula = 'methylation ~ disease + age + gender + coverage'
    >>> mod = BetaModel.from_formula(formula, df, Z)
    >>> rslt = mod.fit()

"""



[docs]
class BetaModel(GenericLikelihoodModel):
    __doc__ = """Beta Regression.

    The Model is parameterized by mean and precision. Both can depend on
    explanatory variables through link functions.

    Parameters
    ----------
    endog : array_like
        1d array of endogenous response variable.
    exog : array_like
        A nobs x k array where `nobs` is the number of observations and `k`
        is the number of regressors. An intercept is not included by default
        and should be added by the user (models specified using a formula
        include an intercept by default). See `statsmodels.tools.add_constant`.
    exog_precision : array_like
        2d array of variables for the precision.
    link : link
        Any link in sm.families.links for mean, should have range in
        interval [0, 1]. Default is logit-link.
    link_precision : link
        Any link in sm.families.links for precision, should have
        range in positive line. Default is log-link.
    **kwds : extra keywords
        Keyword options that will be handled by super classes.
        Not all general keywords will be supported in this class.

    Notes
    -----
    Status: experimental, new in 0.13.
    Core results are verified, but api can change and some extra results
    specific to Beta regression are missing.

    Examples
    --------
    {example}

    See Also
    --------
    :ref:`links`

    """.format(
        example=_init_example
    )

    def __init__(
        self,
        endog,
        exog,
        exog_precision=None,
        link=LOGIT_LINK,
        link_precision=LOG_LINK,
        **kwds,
    ):

        etmp = np.array(endog)
        assert np.all((0 < etmp) & (etmp < 1))
        if exog_precision is None:
            extra_names = ["precision"]
            exog_precision = np.ones((len(endog), 1), dtype="f")
        else:
            extra_names = [
                "precision-%s" % zc
                for zc in (
                    exog_precision.columns
                    if hasattr(exog_precision, "columns")
                    else range(1, exog_precision.shape[1] + 1)
                )
            ]

        kwds["extra_params_names"] = extra_names

        super().__init__(endog, exog, exog_precision=exog_precision, **kwds)
        self.link = link
        self.link_precision = link_precision
        # not needed, handled by super:
        # self.exog_precision = exog_precision
        # inherited df do not account for precision params
        self.nobs = self.endog.shape[0]
        self.k_extra = 1
        self.df_model = self.nparams - 2
        self.df_resid = self.nobs - self.nparams
        assert len(self.exog_precision) == len(self.endog)
        self.hess_type = "oim"
        if "exog_precision" not in self._init_keys:
            self._init_keys.extend(["exog_precision"])
        self._init_keys.extend(["link", "link_precision"])
        self._null_drop_keys = ["exog_precision"]
        del kwds["extra_params_names"]
        self._check_kwargs(kwds)
        self.results_class = BetaResults
        self.results_class_wrapper = BetaResultsWrapper


[docs]
    @classmethod
    def from_formula(cls, formula, data, exog_precision_formula=None, *args, **kwargs):
        if exog_precision_formula is not None:

            mgr = FormulaManager()
            if "subset" in kwargs:
                d = data.ix[kwargs["subset"]]
                Z = mgr.get_matrices(exog_precision_formula, d, pandas=False)
            else:
                Z = mgr.get_matrices(exog_precision_formula, data, pandas=False)
            kwargs["exog_precision"] = Z
        advance_eval_env(kwargs)
        return super().from_formula(formula, data, *args, **kwargs)


    def _get_exogs(self):
        return (self.exog, self.exog_precision)


[docs]
    def predict(self, params, exog=None, exog_precision=None, which="mean"):
        """Predict values for mean or precision

        Parameters
        ----------
        params : array_like
            The model parameters.
        exog : array_like
            Array of predictor variables for mean.
        exog_precision : array_like
            Array of predictor variables for precision parameter.
        which : str

            - "mean" : mean, conditional expectation E(endog | exog)
            - "precision" : predicted precision
            - "linear" : linear predictor for the mean function
            - "linear-precision" : linear predictor for the precision parameter

        Returns
        -------
        ndarray, predicted values
        """
        # compatibility with old names and misspelling
        if which == "linpred":
            which = "linear"
        if which in ["linpred_precision", "linear_precision"]:
            which = "linear-precision"

        k_mean = self.exog.shape[1]
        if which in ["mean", "linear"]:
            if exog is None:
                exog = self.exog
            params_mean = params[:k_mean]
            # Zparams = params[k_mean:]
            linpred = np.dot(exog, params_mean)
            if which == "mean":
                mu = self.link.inverse(linpred)
                res = mu
            else:
                res = linpred

        elif which in ["precision", "linear-precision"]:
            if exog_precision is None:
                exog_precision = self.exog_precision
            params_prec = params[k_mean:]
            linpred_prec = np.dot(exog_precision, params_prec)

            if which == "precision":
                phi = self.link_precision.inverse(linpred_prec)
                res = phi
            else:
                res = linpred_prec

        elif which == "var":
            res = self._predict_var(params, exog=exog, exog_precision=exog_precision)

        else:
            raise ValueError("which = %s is not available" % which)

        return res


    def _predict_precision(self, params, exog_precision=None):
        """Predict values for precision function for given exog_precision.

        Parameters
        ----------
        params : array_like
            The model parameters.
        exog_precision : array_like
            Array of predictor variables for precision.

        Returns
        -------
        Predicted precision.
        """
        if exog_precision is None:
            exog_precision = self.exog_precision

        k_mean = self.exog.shape[1]
        params_precision = params[k_mean:]
        linpred_prec = np.dot(exog_precision, params_precision)
        phi = self.link_precision.inverse(linpred_prec)

        return phi

    def _predict_var(self, params, exog=None, exog_precision=None):
        """predict values for conditional variance V(endog | exog)

        Parameters
        ----------
        params : array_like
            The model parameters.
        exog : array_like
            Array of predictor variables for mean.
        exog_precision : array_like
            Array of predictor variables for precision.

        Returns
        -------
        Predicted conditional variance.
        """
        mean = self.predict(params, exog=exog)
        precision = self._predict_precision(params, exog_precision=exog_precision)

        var_endog = mean * (1 - mean) / (1 + precision)
        return var_endog


[docs]
    def loglikeobs(self, params):
        """
        Loglikelihood for observations of the Beta regressionmodel.

        Parameters
        ----------
        params : ndarray
            The parameters of the model, coefficients for linear predictors
            of the mean and of the precision function.

        Returns
        -------
        loglike : ndarray
            The log likelihood for each observation of the model evaluated
            at `params`.
        """
        return self._llobs(self.endog, self.exog, self.exog_precision, params)


    def _llobs(self, endog, exog, exog_precision, params):
        """
        Loglikelihood for observations with data arguments.

        Parameters
        ----------
        endog : ndarray
            1d array of endogenous variable.
        exog : ndarray
            2d array of explanatory variables.
        exog_precision : ndarray
            2d array of explanatory variables for precision.
        params : ndarray
            The parameters of the model, coefficients for linear predictors
            of the mean and of the precision function.

        Returns
        -------
        loglike : ndarray
            The log likelihood for each observation of the model evaluated
            at `params`.
        """
        y, X, Z = endog, exog, exog_precision
        nz = Z.shape[1]

        params_mean = params[:-nz]
        params_prec = params[-nz:]
        linpred = np.dot(X, params_mean)
        linpred_prec = np.dot(Z, params_prec)

        mu = self.link.inverse(linpred)
        phi = self.link_precision.inverse(linpred_prec)

        eps_lb = 1e-200
        alpha = np.clip(mu * phi, eps_lb, np.inf)
        beta = np.clip((1 - mu) * phi, eps_lb, np.inf)

        ll = (
            lgamma(phi)
            - lgamma(alpha)
            - lgamma(beta)
            + (mu * phi - 1) * np.log(y)
            + (((1 - mu) * phi) - 1) * np.log(1 - y)
        )

        return ll


[docs]
    def score(self, params):
        """
        Returns the score vector of the log-likelihood.

        http://www.tandfonline.com/doi/pdf/10.1080/00949650903389993

        Parameters
        ----------
        params : ndarray
            Parameter at which score is evaluated.

        Returns
        -------
        score : ndarray
            First derivative of loglikelihood function.
        """
        sf1, sf2 = self.score_factor(params)

        d1 = np.dot(sf1, self.exog)
        d2 = np.dot(sf2, self.exog_precision)
        return np.concatenate((d1, d2))


    def _score_check(self, params):
        """Inherited score with finite differences

        Parameters
        ----------
        params : ndarray
            Parameter at which score is evaluated.

        Returns
        -------
        score based on numerical derivatives
        """
        return super().score(params)


[docs]
    def score_factor(self, params, endog=None):
        """Derivative of loglikelihood function w.r.t. linear predictors.

        This needs to be multiplied with the exog to obtain the score_obs.

        Parameters
        ----------
        params : ndarray
            Parameter at which score is evaluated.

        Returns
        -------
        score_factor : ndarray, 2-D
            A 2d weight vector used in the calculation of the score_obs.

        Notes
        -----
        The score_obs can be obtained from score_factor ``sf`` using

            - d1 = sf[:, :1] * exog
            - d2 = sf[:, 1:2] * exog_precision

        """
        from scipy import special

        digamma = special.psi

        y = self.endog if endog is None else endog
        X, Z = self.exog, self.exog_precision
        nz = Z.shape[1]
        Xparams = params[:-nz]
        Zparams = params[-nz:]

        # NO LINKS
        mu = self.link.inverse(np.dot(X, Xparams))
        phi = self.link_precision.inverse(np.dot(Z, Zparams))

        eps_lb = 1e-200  # lower bound for evaluating digamma, avoids -inf
        alpha = np.clip(mu * phi, eps_lb, np.inf)
        beta = np.clip((1 - mu) * phi, eps_lb, np.inf)

        ystar = np.log(y / (1.0 - y))
        dig_beta = digamma(beta)
        mustar = digamma(alpha) - dig_beta
        yt = np.log(1 - y)
        mut = dig_beta - digamma(phi)

        t = 1.0 / self.link.deriv(mu)
        h = 1.0 / self.link_precision.deriv(phi)
        sf1 = phi * t * (ystar - mustar)
        sf2 = h * (mu * (ystar - mustar) + yt - mut)

        return (sf1, sf2)



[docs]
    def score_hessian_factor(self, params, return_hessian=False, observed=True):
        """Derivatives of loglikelihood function w.r.t. linear predictors.

        This calculates score and hessian factors at the same time, because
        there is a large overlap in calculations.

        Parameters
        ----------
        params : ndarray
            Parameter at which score is evaluated.
        return_hessian : bool
            If False, then only score_factors are returned
            If True, the both score and hessian factors are returned
        observed : bool
            If True, then the observed Hessian is returned (default).
            If False, then the expected information matrix is returned.

        Returns
        -------
        score_factor : ndarray, 2-D
            A 2d weight vector used in the calculation of the score_obs.
        (-jbb, -jbg, -jgg) : tuple
            A tuple with 3 hessian factors, corresponding to the upper
            triangle of the Hessian matrix.
            TODO: check why there are minus
        """
        from scipy import special

        digamma = special.psi

        y, X, Z = self.endog, self.exog, self.exog_precision
        nz = Z.shape[1]
        Xparams = params[:-nz]
        Zparams = params[-nz:]

        # NO LINKS
        mu = self.link.inverse(np.dot(X, Xparams))
        phi = self.link_precision.inverse(np.dot(Z, Zparams))

        # We need to prevent mu = 0 and (1-mu) = 0 in digamma call
        eps_lb = 1e-200  # lower bound for evaluating digamma, avoids -inf
        alpha = np.clip(mu * phi, eps_lb, np.inf)
        beta = np.clip((1 - mu) * phi, eps_lb, np.inf)

        ystar = np.log(y / (1.0 - y))
        dig_beta = digamma(beta)
        mustar = digamma(alpha) - dig_beta
        yt = np.log(1 - y)
        mut = dig_beta - digamma(phi)

        t = 1.0 / self.link.deriv(mu)
        h = 1.0 / self.link_precision.deriv(phi)

        ymu_star = ystar - mustar
        sf1 = phi * t * ymu_star
        sf2 = h * (mu * ymu_star + yt - mut)

        if return_hessian:

            def trigamma(x):
                return special.polygamma(1, x)

            trig_beta = trigamma(beta)
            var_star = trigamma(alpha) + trig_beta
            var_t = trig_beta - trigamma(phi)

            c = -trig_beta
            s = self.link.deriv2(mu)
            q = self.link_precision.deriv2(phi)

            jbb = (phi * t) * var_star
            if observed:
                jbb += s * t**2 * ymu_star

            jbb *= t * phi

            jbg = phi * t * h * (mu * var_star + c)
            if observed:
                jbg -= ymu_star * t * h

            jgg = h**2 * (mu**2 * var_star + 2 * mu * c + var_t)
            if observed:
                jgg += (mu * ymu_star + yt - mut) * q * h**3  # **3 ?

            return (sf1, sf2), (-jbb, -jbg, -jgg)
        else:
            return (sf1, sf2)



[docs]
    def score_obs(self, params):
        """
        Score, first derivative of the loglikelihood for each observation.

        Parameters
        ----------
        params : ndarray
            Parameter at which score is evaluated.

        Returns
        -------
        score_obs : ndarray, 2d
            The first derivative of the loglikelihood function evaluated at
            params for each observation.
        """
        sf1, sf2 = self.score_factor(params)

        # elementwise product for each row (observation)
        d1 = sf1[:, None] * self.exog
        d2 = sf2[:, None] * self.exog_precision
        return np.column_stack((d1, d2))



[docs]
    def hessian(self, params, observed=None):
        """Hessian, second derivative of loglikelihood function

        Parameters
        ----------
        params : ndarray
            Parameter at which Hessian is evaluated.
        observed : bool
            If True, then the observed Hessian is returned (default).
            If False, then the expected information matrix is returned.

        Returns
        -------
        hessian : ndarray
            Hessian, i.e. observed information, or expected information matrix.
        """
        if self.hess_type == "eim":
            observed = False
        else:
            observed = True
        _, hf = self.score_hessian_factor(
            params, return_hessian=True, observed=observed
        )

        hf11, hf12, hf22 = hf

        # elementwise product for each row (observation)
        d11 = (self.exog.T * hf11).dot(self.exog)
        d12 = (self.exog.T * hf12).dot(self.exog_precision)
        d22 = (self.exog_precision.T * hf22).dot(self.exog_precision)
        return np.block([[d11, d12], [d12.T, d22]])



[docs]
    def hessian_factor(self, params, observed=True):
        """Derivatives of loglikelihood function w.r.t. linear predictors."""
        _, hf = self.score_hessian_factor(
            params, return_hessian=True, observed=observed
        )
        return hf


    def _start_params(self, niter=2, return_intermediate=False):
        """find starting values

        Parameters
        ----------
        niter : int
            Number of iterations of WLS approximation
        return_intermediate : bool
            If False (default), then only the preliminary parameter estimate
            will be returned.
            If True, then also the two results instances of the WLS estimate
            for mean parameters and for the precision parameters will be
            returned.

        Returns
        -------
        sp : ndarray
            start parameters for the optimization
        res_m2 : results instance (optional)
            Results instance for the WLS regression of the mean function.
        res_p2 : results instance (optional)
            Results instance for the WLS regression of the precision function.

        Notes
        -----
        This calculates a few iteration of weighted least squares. This is not
        a full scoring algorithm.
        """
        # WLS of the mean equation uses the implied weights (inverse variance),
        # WLS for the precision equations uses weights that only take
        # account of the link transformation of the precision endog.
        from statsmodels.regression.linear_model import OLS, WLS

        res_m = OLS(self.link(self.endog), self.exog).fit()
        fitted = self.link.inverse(res_m.fittedvalues)
        resid = self.endog - fitted

        prec_i = fitted * (1 - fitted) / np.maximum(np.abs(resid), 1e-2) ** 2 - 1
        res_p = OLS(self.link_precision(prec_i), self.exog_precision).fit()
        prec_fitted = self.link_precision.inverse(res_p.fittedvalues)
        # sp = np.concatenate((res_m.params, res_p.params))

        for _ in range(niter):
            y_var_inv = (1 + prec_fitted) / (fitted * (1 - fitted))
            # y_var = fitted * (1 - fitted) / (1 + prec_fitted)

            ylink_var_inv = y_var_inv / self.link.deriv(fitted) ** 2
            res_m2 = WLS(self.link(self.endog), self.exog, weights=ylink_var_inv).fit()
            fitted = self.link.inverse(res_m2.fittedvalues)
            resid2 = self.endog - fitted

            prec_i2 = fitted * (1 - fitted) / np.maximum(np.abs(resid2), 1e-2) ** 2 - 1
            w_p = 1.0 / self.link_precision.deriv(prec_fitted) ** 2
            res_p2 = WLS(
                self.link_precision(prec_i2), self.exog_precision, weights=w_p
            ).fit()
            prec_fitted = self.link_precision.inverse(res_p2.fittedvalues)
            sp2 = np.concatenate((res_m2.params, res_p2.params))

        if return_intermediate:
            return sp2, res_m2, res_p2

        return sp2


[docs]
    def fit(self, start_params=None, maxiter=1000, disp=False, method="bfgs", **kwds):
        """
        Fit the model by maximum likelihood.

        Parameters
        ----------
        start_params : array-like
            A vector of starting values for the regression
            coefficients.  If None, a default is chosen.
        maxiter : integer
            The maximum number of iterations
        disp : bool
            Show convergence stats.
        method : str
            The optimization method to use.
        kwds :
            Keyword arguments for the optimizer.

        Returns
        -------
        BetaResults instance.
        """

        if start_params is None:
            start_params = self._start_params()
        #           # http://www.ime.usp.br/~sferrari/beta.pdf suggests starting phi
        #           # on page 8

        if "cov_type" in kwds:
            # this is a workaround because we cannot tell super to use eim
            if kwds["cov_type"].lower() == "eim":
                self.hess_type = "eim"
                del kwds["cov_type"]
        else:
            self.hess_type = "oim"

        res = super().fit(
            start_params=start_params, maxiter=maxiter, method=method, disp=disp, **kwds
        )
        if not isinstance(res, BetaResultsWrapper):
            # currently GenericLikelihoodModel doe not add wrapper
            res = BetaResultsWrapper(res)
        return res


    def _deriv_mean_dparams(self, params):
        """
        Derivative of the expected endog with respect to the parameters.

        not verified yet

        Parameters
        ----------
        params : ndarray
            parameter at which score is evaluated

        Returns
        -------
        The value of the derivative of the expected endog with respect
        to the parameter vector.
        """
        link = self.link
        lin_pred = self.predict(params, which="linear")
        idl = link.inverse_deriv(lin_pred)
        dmat = self.exog * idl[:, None]
        return np.column_stack((dmat, np.zeros(self.exog_precision.shape)))

    def _deriv_score_obs_dendog(self, params):
        """derivative of score_obs w.r.t. endog

        Parameters
        ----------
        params : ndarray
            parameter at which score is evaluated

        Returns
        -------
        derivative : ndarray_2d
            The derivative of the score_obs with respect to endog.
        """
        from statsmodels.tools.numdiff import _approx_fprime_cs_scalar

        def f(y):
            if y.ndim == 2 and y.shape[1] == 1:
                y = y[:, 0]
            sf = self.score_factor(params, endog=y)
            return np.column_stack(sf)

        dsf = _approx_fprime_cs_scalar(self.endog[:, None], f)
        # deriv is 2d vector
        d1 = dsf[:, :1] * self.exog
        d2 = dsf[:, 1:2] * self.exog_precision

        return np.column_stack((d1, d2))


[docs]
    def get_distribution_params(self, params, exog=None, exog_precision=None):
        """
        Return distribution parameters converted from model prediction.

        Parameters
        ----------
        params : array_like
            The model parameters.
        exog : array_like
            Array of predictor variables for mean.
        exog_precision : array_like
            Array of predictor variables for mean.

        Returns
        -------
        (alpha, beta) : tuple of ndarrays
            Parameters for the scipy distribution to evaluate predictive
            distribution.
        """
        mean = self.predict(params, exog=exog)
        precision = self.predict(
            params, exog_precision=exog_precision, which="precision"
        )
        return precision * mean, precision * (1 - mean)



[docs]
    def get_distribution(self, params, exog=None, exog_precision=None):
        """
        Return a instance of the predictive distribution.

        Parameters
        ----------
        params : array_like
            The model parameters.
        exog : array_like
            Array of predictor variables for mean.
        exog_precision : array_like
            Array of predictor variables for mean.

        Returns
        -------
        Instance of a scipy frozen distribution based on estimated
        parameters.

        See Also
        --------
        predict

        Notes
        -----
        This function delegates to the predict method to handle exog and
        exog_precision, which in turn makes any required transformations.

        Due to the behavior of ``scipy.stats.distributions objects``, the
        returned random number generator must be called with ``gen.rvs(n)``
        where ``n`` is the number of observations in the data set used
        to fit the model.  If any other value is used for ``n``, misleading
        results will be produced.
        """
        from scipy import stats

        args = self.get_distribution_params(
            params, exog=exog, exog_precision=exog_precision
        )
        distr = stats.beta(*args)
        return distr





[docs]
class BetaResults(GenericLikelihoodModelResults, _LLRMixin):
    """Results class for Beta regression

    This class inherits from GenericLikelihoodModelResults and not all
    inherited methods might be appropriate in this case.
    """

    # GenericLikeihoodmodel doesn't define fittedvalues, residuals and similar
    @cache_readonly
    def fittedvalues(self):
        """In-sample predicted mean, conditional expectation."""
        return self.model.predict(self.params)

    @cache_readonly
    def fitted_precision(self):
        """In-sample predicted precision"""
        return self.model.predict(self.params, which="precision")

    @cache_readonly
    def resid(self):
        """Response residual"""
        return self.model.endog - self.fittedvalues

    @cache_readonly
    def resid_pearson(self):
        """Pearson standardize residual"""
        std = np.sqrt(self.model.predict(self.params, which="var"))
        return self.resid / std

    @cache_readonly
    def prsquared(self):
        """Cox-Snell Likelihood-Ratio pseudo-R-squared.

        1 - exp((llnull - .llf) * (2 / nobs))
        """
        return self.pseudo_rsquared(kind="lr")


[docs]
    def get_distribution_params(self, exog=None, exog_precision=None, transform=True):
        """
        Return distribution parameters converted from model prediction.

        Parameters
        ----------
        params : array_like
            The model parameters.
        exog : array_like
            Array of predictor variables for mean.
        transform : bool
            If transform is True and formulas have been used, then predictor
            ``exog`` is passed through the formula processing. Default is True.

        Returns
        -------
        (alpha, beta) : tuple of ndarrays
            Parameters for the scipy distribution to evaluate predictive
            distribution.
        """
        mean = self.predict(exog=exog, transform=transform)
        precision = self.predict(
            exog_precision=exog_precision, which="precision", transform=transform
        )
        return precision * mean, precision * (1 - mean)



[docs]
    def get_distribution(self, exog=None, exog_precision=None, transform=True):
        """
        Return a instance of the predictive distribution.

        Parameters
        ----------
        exog : array_like
            Array of predictor variables for mean.
        exog_precision : array_like
            Array of predictor variables for mean.
        transform : bool
            If transform is True and formulas have been used, then predictor
            ``exog`` is passed through the formula processing. Default is True.

        Returns
        -------
        Instance of a scipy frozen distribution based on estimated
        parameters.

        See Also
        --------
        predict

        Notes
        -----
        This function delegates to the predict method to handle exog and
        exog_precision, which in turn makes any required transformations.

        Due to the behavior of ``scipy.stats.distributions objects``, the
        returned random number generator must be called with ``gen.rvs(n)``
        where ``n`` is the number of observations in the data set used
        to fit the model.  If any other value is used for ``n``, misleading
        results will be produced.
        """
        from scipy import stats

        args = self.get_distribution_params(
            exog=exog, exog_precision=exog_precision, transform=transform
        )
        args = (np.asarray(arg) for arg in args)
        distr = stats.beta(*args)
        return distr



[docs]
    def get_influence(self):
        """
        Get an instance of MLEInfluence with influence and outlier measures

        Returns
        -------
        infl : MLEInfluence instance
            The instance has methods to calculate the main influence and
            outlier measures as attributes.

        See Also
        --------
        statsmodels.stats.outliers_influence.MLEInfluence

        Notes
        -----
        Support for mutli-link and multi-exog models is still experimental
        in MLEInfluence. Interface and some definitions might still change.

        Note: Difference to R betareg: Betareg has the same general leverage
        as this model. However, they use a linear approximation hat matrix
        to scale and studentize influence and residual statistics.
        MLEInfluence uses the generalized leverage as hat_matrix_diag.
        Additionally, MLEInfluence uses pearson residuals for residual
        analusis.

        References
        ----------
        todo

        """
        from statsmodels.stats.outliers_influence import MLEInfluence

        return MLEInfluence(self)



[docs]
    def bootstrap(self, *args, **kwargs):
        raise NotImplementedError




class BetaResultsWrapper(lm.RegressionResultsWrapper):
    pass


wrap.populate_wrapper(BetaResultsWrapper, BetaResults)