# Source code for statsmodels.distributions.copula.copulas

"""

Which Archimedean is Best?
Extreme Value copulas formulas are based on Genest 2009

References
----------

Genest, C., 2009. Rank-based inference for bivariate extreme-value
copulas. The Annals of Statistics, 37(5), pp.2990-3022.

"""
from abc import ABC, abstractmethod

import numpy as np
from scipy import stats

from statsmodels.graphics import utils

[docs]
"""Multivariate copula distribution

Parameters
----------
copula : :class:Copula instance
An instance of :class:Copula, e.g. :class:GaussianCopula,
:class:FrankCopula, etc.
marginals : list of distribution instances
Marginal distributions.
copargs : tuple
Parameters for copula

Notes
-----
Status: experimental, argument handling may still change

"""
def __init__(self, copula, marginals, cop_args=()):

self.copula = copula

# no checking done on marginals
self.marginals = marginals
self.cop_args = cop_args
self.k_vars = len(marginals)

[docs]
def rvs(self, nobs=1, cop_args=None, marg_args=None, random_state=None):
"""Draw n in the half-open interval [0, 1).

Sample the joint distribution.

Parameters
----------
nobs : int, optional
Number of samples to generate in the parameter space.
Default is 1.
cop_args : tuple
Copula parameters. If None, then the copula parameters will be
taken from the cop_args attribute created when initiializing
the instance.
marg_args : list of tuples
Parameters for the marginal distributions. It can be None if none
of the marginal distributions have parameters, otherwise it needs
to be a list of tuples with the same length has the number of
marginal distributions. The list can contain empty tuples for
marginal distributions that do not take parameter arguments.
random_state : {None, int, numpy.random.Generator}, optional
If seed is None then the legacy singleton NumPy generator.
This will change after 0.13 to use a fresh NumPy Generator,
so you should explicitly pass a seeded Generator if you
need reproducible results.
If seed is an int, a new Generator instance is used,
seeded with seed.
If seed is already a Generator instance then that instance is
used.

Returns
-------
sample : array_like (n, d)
Sample from the joint distribution.

Notes
-----
The random samples are generated by creating a sample with uniform
margins from the copula, and using ppf to convert uniform margins
to the one specified by the marginal distribution.

--------
statsmodels.tools.rng_qrng.check_random_state
"""
if cop_args is None:
cop_args = self.cop_args
if marg_args is None:
marg_args = [()] * self.k_vars

sample = self.copula.rvs(nobs=nobs, args=cop_args,
random_state=random_state)

for i, dist in enumerate(self.marginals):
sample[:, i] = dist.ppf(0.5 + (1 - 1e-10) * (sample[:, i] - 0.5),
*marg_args[i])
return sample

[docs]
def cdf(self, y, cop_args=None, marg_args=None):
"""CDF of copula distribution.

Parameters
----------
y : array_like
Values of random variable at which to evaluate cdf.
If 2-dimensional, then components of multivariate random variable
need to be in columns
cop_args : tuple
Copula parameters. If None, then the copula parameters will be
taken from the cop_args attribute created when initiializing
the instance.
marg_args : list of tuples
Parameters for the marginal distributions. It can be None if none
of the marginal distributions have parameters, otherwise it needs
to be a list of tuples with the same length has the number of
marginal distributions. The list can contain empty tuples for
marginal distributions that do not take parameter arguments.

Returns
-------
cdf values

"""
y = np.asarray(y)
if cop_args is None:
cop_args = self.cop_args
if marg_args is None:
marg_args = [()] * y.shape[-1]

cdf_marg = []
for i in range(self.k_vars):
cdf_marg.append(self.marginals[i].cdf(y[..., i], *marg_args[i]))

u = np.column_stack(cdf_marg)
if y.ndim == 1:
u = u.squeeze()
return self.copula.cdf(u, cop_args)

[docs]
def pdf(self, y, cop_args=None, marg_args=None):
"""PDF of copula distribution.

Parameters
----------
y : array_like
Values of random variable at which to evaluate cdf.
If 2-dimensional, then components of multivariate random variable
need to be in columns
cop_args : tuple
Copula parameters. If None, then the copula parameters will be
taken from the cop_args attribute created when initiializing
the instance.
marg_args : list of tuples
Parameters for the marginal distributions. It can be None if none
of the marginal distributions have parameters, otherwise it needs
to be a list of tuples with the same length has the number of
marginal distributions. The list can contain empty tuples for
marginal distributions that do not take parameter arguments.

Returns
-------
pdf values
"""
return np.exp(self.logpdf(y, cop_args=cop_args, marg_args=marg_args))

[docs]
def logpdf(self, y, cop_args=None, marg_args=None):
"""Log-pdf of copula distribution.

Parameters
----------
y : array_like
Values of random variable at which to evaluate cdf.
If 2-dimensional, then components of multivariate random variable
need to be in columns
cop_args : tuple
Copula parameters. If None, then the copula parameters will be
taken from the cop_args attribute creating when initiializing
the instance.
marg_args : list of tuples
Parameters for the marginal distributions. It can be None if none
of the marginal distributions have parameters, otherwise it needs
to be a list of tuples with the same length has the number of
marginal distributions. The list can contain empty tuples for
marginal distributions that do not take parameter arguments.

Returns
-------
log-pdf values

"""
y = np.asarray(y)
if cop_args is None:
cop_args = self.cop_args
if marg_args is None:
marg_args = tuple([()] * y.shape[-1])

lpdf = 0.0
cdf_marg = []
for i in range(self.k_vars):
lpdf += self.marginals[i].logpdf(y[..., i], *marg_args[i])
cdf_marg.append(self.marginals[i].cdf(y[..., i], *marg_args[i]))

u = np.column_stack(cdf_marg)
if y.ndim == 1:
u = u.squeeze()

lpdf += self.copula.logpdf(u, cop_args)
return lpdf

class Copula(ABC):
r"""A generic Copula class meant for subclassing.

Notes
-----
A function :math:\phi on :math:[0, \infty] is the Laplace-Stieltjes
transform of a distribution function if and only if :math:\phi is
completely monotone and :math:\phi(0) = 1 [2]_.

The following algorithm for sampling a d-dimensional exchangeable
Archimedean copula with generator :math:\phi is due to Marshall, Olkin
(1988) [1]_, where :math:LS^{−1}(\phi) denotes the inverse
Laplace-Stieltjes transform of :math:\phi.

From a mixture representation with respect to :math:F, the following
algorithm may be derived for sampling Archimedean copulas, see [1]_.

1. Sample :math:V \sim F = LS^{−1}(\phi).
2. Sample i.i.d. :math:X_i \sim U[0,1], i \in \{1,...,d\}.
3. Return:math:(U_1,..., U_d), where :math:U_i = \phi(−\log(X_i)/V), i
\in \{1, ...,d\}.

Detailed properties of each copula can be found in [3]_.

Instances of the class can access the attributes: rng for the random
number generator (used for the seed).

**Subclassing**

When subclassing Copula to create a new copula, __init__ and
random must be redefined.

* __init__(theta): If the copula
does not take advantage of a theta, this parameter can be omitted.
* random(n, random_state): draw n from the copula.
* pdf(x): PDF from the copula.
* cdf(x): CDF from the copula.

References
----------
.. [1] Marshall AW, Olkin I. “Families of Multivariate Distributions”,
Journal of the American Statistical Association, 83, 834–841, 1988.
.. [2] Marius Hofert. "Sampling Archimedean copulas",
Universität Ulm, 2008.
.. rvs[3] Harry Joe. "Dependence Modeling with Copulas", Monographs on
Statistics and Applied Probability 134, 2015.

"""

def __init__(self, k_dim=2):
self.k_dim = k_dim

def rvs(self, nobs=1, args=(), random_state=None):
"""Draw n in the half-open interval [0, 1).

Marginals are uniformly distributed.

Parameters
----------
nobs : int, optional
Number of samples to generate from the copula. Default is 1.
args : tuple
Arguments for copula parameters. The number of arguments depends
on the copula.
random_state : {None, int, numpy.random.Generator}, optional
If seed is None then the legacy singleton NumPy generator.
This will change after 0.13 to use a fresh NumPy Generator,
so you should explicitly pass a seeded Generator if you
need reproducible results.
If seed is an int, a new Generator instance is used,
seeded with seed.
If seed is already a Generator instance then that instance is
used.

Returns
-------
sample : array_like (nobs, d)
Sample from the copula.

--------
statsmodels.tools.rng_qrng.check_random_state
"""
raise NotImplementedError

@abstractmethod
def pdf(self, u, args=()):
"""Probability density function of copula.

Parameters
----------
u : array_like, 2-D
Points of random variables in unit hypercube at which method is
evaluated.
The second (or last) dimension should be the same as the dimension
of the random variable, e.g. 2 for bivariate copula.
args : tuple
Arguments for copula parameters. The number of arguments depends
on the copula.

Returns
-------
pdf : ndarray, (nobs, k_dim)
Copula pdf evaluated at points u.
"""

def logpdf(self, u, args=()):
"""Log of copula pdf, loglikelihood.

Parameters
----------
u : array_like, 2-D
Points of random variables in unit hypercube at which method is
evaluated.
The second (or last) dimension should be the same as the dimension
of the random variable, e.g. 2 for bivariate copula.
args : tuple
Arguments for copula parameters. The number of arguments depends
on the copula.

Returns
-------
cdf : ndarray, (nobs, k_dim)
Copula log-pdf evaluated at points u.
"""
return np.log(self.pdf(u, *args))

@abstractmethod
def cdf(self, u, args=()):
"""Cumulative distribution function evaluated at points u.

Parameters
----------
u : array_like, 2-D
Points of random variables in unit hypercube at which method is
evaluated.
The second (or last) dimension should be the same as the dimension
of the random variable, e.g. 2 for bivariate copula.
args : tuple
Arguments for copula parameters. The number of arguments depends
on the copula.

Returns
-------
cdf : ndarray, (nobs, k_dim)
Copula cdf evaluated at points u.
"""

def plot_scatter(self, sample=None, nobs=500, random_state=None, ax=None):
"""Sample the copula and plot.

Parameters
----------
sample : array-like, optional
The sample to plot.  If not provided (the default), a sample
is generated.
nobs : int, optional
Number of samples to generate from the copula.
random_state : {None, int, numpy.random.Generator}, optional
If seed is None then the legacy singleton NumPy generator.
This will change after 0.13 to use a fresh NumPy Generator,
so you should explicitly pass a seeded Generator if you
need reproducible results.
If seed is an int, a new Generator instance is used,
seeded with seed.
If seed is already a Generator instance then that instance is
used.
ax : AxesSubplot, optional
If given, this subplot is used to plot in instead of a new figure
being created.

Returns
-------
fig : Figure
If ax is None, the created figure.  Otherwise the figure to which
ax is connected.
sample : array_like (n, d)
Sample from the copula.

--------
statsmodels.tools.rng_qrng.check_random_state
"""
if self.k_dim != 2:
raise ValueError("Can only plot 2-dimensional Copula.")

if sample is None:
sample = self.rvs(nobs=nobs, random_state=random_state)

fig, ax = utils.create_mpl_ax(ax)
ax.scatter(sample[:, 0], sample[:, 1])
ax.set_xlabel('u')
ax.set_ylabel('v')

return fig, sample

def plot_pdf(self, ticks_nbr=10, ax=None):
"""Plot the PDF.

Parameters
----------
ticks_nbr : int, optional
Number of color isolines for the PDF. Default is 10.
ax : AxesSubplot, optional
If given, this subplot is used to plot in instead of a new figure
being created.

Returns
-------
fig : Figure
If ax is None, the created figure.  Otherwise the figure to which
ax is connected.

"""
from matplotlib import pyplot as plt
if self.k_dim != 2:
import warnings
warnings.warn("Plotting 2-dimensional Copula.")

n_samples = 100

eps = 1e-4
uu, vv = np.meshgrid(np.linspace(eps, 1 - eps, n_samples),
np.linspace(eps, 1 - eps, n_samples))
points = np.vstack([uu.ravel(), vv.ravel()]).T

data = self.pdf(points).T.reshape(uu.shape)
min_ = np.nanpercentile(data, 5)
max_ = np.nanpercentile(data, 95)

fig, ax = utils.create_mpl_ax(ax)

vticks = np.linspace(min_, max_, num=ticks_nbr)
range_cbar = [min_, max_]
cs = ax.contourf(uu, vv, data, vticks,
antialiased=True, vmin=range_cbar[0],
vmax=range_cbar[1])

ax.set_xlabel("u")
ax.set_ylabel("v")
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.set_aspect('equal')
cbar = plt.colorbar(cs, ticks=vticks)
cbar.set_label('p')
fig.tight_layout()

return fig

def tau_simulated(self, nobs=1024, random_state=None):
"""Kendall's tau based on simulated samples.

Returns
-------
tau : float
Kendall's tau.

"""
x = self.rvs(nobs, random_state=random_state)
return stats.kendalltau(x[:, 0], x[:, 1])[0]

def fit_corr_param(self, data):
"""Copula correlation parameter using Kendall's tau of sample data.

Parameters
----------
data : array_like
Sample data used to fit theta using Kendall's tau.

Returns
-------
corr_param : float
Correlation parameter of the copula, theta in Archimedean and
pearson correlation in elliptical.
If k_dim > 2, then average tau is used.
"""
x = np.asarray(data)

if x.shape[1] == 2:
tau = stats.kendalltau(x[:, 0], x[:, 1])[0]
else:
k = self.k_dim
taus = [stats.kendalltau(x[..., i], x[..., j])[0]
for i in range(k) for j in range(i+1, k)]
tau = np.mean(taus)
return self._arg_from_tau(tau)

def _arg_from_tau(self, tau):
"""Compute correlation parameter from tau.

Parameters
----------
tau : float
Kendall's tau.

Returns
-------
corr_param : float
Correlation parameter of the copula, theta in Archimedean and
pearson correlation in elliptical.

"""
raise NotImplementedError


Last update: Feb 14, 2024