# Source code for statsmodels.graphics.functional

"""Module for functional boxplots."""
from statsmodels.compat.numpy import NP_LT_123

import numpy as np
from scipy.special import comb

from statsmodels.graphics.utils import _import_mpl
from statsmodels.multivariate.pca import PCA
from statsmodels.nonparametric.kernel_density import KDEMultivariate

try:
from scipy.optimize import brute, differential_evolution, fmin
have_de_optim = True
except ImportError:
from scipy.optimize import brute, fmin
have_de_optim = False
import itertools
from multiprocessing import Pool

from . import utils

__all__ = ['hdrboxplot', 'fboxplot', 'rainbowplot', 'banddepth']

class HdrResults:
"""Wrap results and pretty print them."""

def __init__(self, kwds):
self.__dict__.update(kwds)

def __repr__(self):
msg = ("HDR boxplot summary:\n"
"-> median:\n{}\n"
"-> 50% HDR (max, min):\n{}\n"
"-> 90% HDR (max, min):\n{}\n"
"-> Extra quantiles (max, min):\n{}\n"
"-> Outliers:\n{}\n"
"-> Outliers indices:\n{}\n"
).format(self.median, self.hdr_50, self.hdr_90,
self.extra_quantiles, self.outliers, self.outliers_idx)

return msg

def _inverse_transform(pca, data):
"""
Inverse transform on PCA.

Use PCA's `project` method by temporary replacing its factors with
`data`.

Parameters
----------
pca : statsmodels Principal Component Analysis instance
The PCA object to use.
data : sequence of ndarrays or 2-D ndarray
The vectors of functions to create a functional boxplot from.  If a
sequence of 1-D arrays, these should all be the same size.
The first axis is the function index, the second axis the one along
which the function is defined.  So ``data[0, :]`` is the first
functional curve.

Returns
-------
projection : ndarray
nobs by nvar array of the projection onto ncomp factors
"""
factors = pca.factors
pca.factors = data.reshape(-1, factors.shape[1])
projection = pca.project()
pca.factors = factors
return projection

def _curve_constrained(x, idx, sign, band, pca, ks_gaussian):
"""Find out if the curve is within the band.

The curve value at :attr:`idx` for a given PDF is only returned if
within bounds defined by the band. Otherwise, 1E6 is returned.

Parameters
----------
x : float
Curve in reduced space.
idx : int
Index value of the components to compute.
sign : int
Return positive or negative value.
band : list of float
PDF values `[min_pdf, max_pdf]` to be within.
pca : statsmodels Principal Component Analysis instance
The PCA object to use.
ks_gaussian : KDEMultivariate instance

Returns
-------
value : float
Curve value at `idx`.
"""
x = x.reshape(1, -1)
pdf = ks_gaussian.pdf(x)
if band[0] < pdf < band[1]:
value = sign * _inverse_transform(pca, x)[0][idx]
else:
value = 1E6
return value

def _min_max_band(args):
"""
Min and max values at `idx`.

Global optimization to find the extrema per component.

Parameters
----------
args: list
It is a list of an idx and other arguments as a tuple:
idx : int
Index value of the components to compute
The tuple contains:
band : list of float
PDF values `[min_pdf, max_pdf]` to be within.
pca : statsmodels Principal Component Analysis instance
The PCA object to use.
bounds : sequence
``(min, max)`` pair for each components
ks_gaussian : KDEMultivariate instance

Returns
-------
band : tuple of float
``(max, min)`` curve values at `idx`
"""
idx, (band, pca, bounds, ks_gaussian, use_brute, seed) = args
if have_de_optim and not use_brute:
max_ = differential_evolution(_curve_constrained, bounds=bounds,
args=(idx, -1, band, pca, ks_gaussian),
maxiter=7, seed=seed).x
min_ = differential_evolution(_curve_constrained, bounds=bounds,
args=(idx, 1, band, pca, ks_gaussian),
maxiter=7, seed=seed).x
else:
max_ = brute(_curve_constrained, ranges=bounds, finish=fmin,
args=(idx, -1, band, pca, ks_gaussian))

min_ = brute(_curve_constrained, ranges=bounds, finish=fmin,
args=(idx, 1, band, pca, ks_gaussian))

band = (_inverse_transform(pca, max_)[0][idx],
_inverse_transform(pca, min_)[0][idx])
return band