Source code for statsmodels.sandbox.tsa.fftarma

# -*- coding: utf-8 -*-
"""
Created on Mon Dec 14 19:53:25 2009

Author: josef-pktd

generate arma sample using fft with all the lfilter it looks slow
to get the ma representation first

apply arma filter (in ar representation) to time series to get white noise
but seems slow to be useful for fast estimation for nobs=10000

change/check: instead of using marep, use fft-transform of ar and ma
    separately, use ratio check theory is correct and example works
    DONE : feels much faster than lfilter
    -> use for estimation of ARMA
    -> use pade (scipy.misc) approximation to get starting polynomial
       from autocorrelation (is autocorrelation of AR(p) related to marep?)
       check if pade is fast, not for larger arrays ?
       maybe pade doesn't do the right thing for this, not tried yet
       scipy.pade([ 1.    ,  0.6,  0.25, 0.125, 0.0625, 0.1],2)
       raises LinAlgError: singular matrix
       also doesn't have roots inside unit circle ??
    -> even without initialization, it might be fast for estimation
    -> how do I enforce stationarity and invertibility,
       need helper function

get function drop imag if close to zero from numpy/scipy source, where?

"""

from __future__ import print_function
import numpy as np
import numpy.fft as fft
#import scipy.fftpack as fft
from scipy import signal
#from try_var_convolve import maxabs
from statsmodels.sandbox.archive.linalg_decomp_1 import OneTimeProperty
from statsmodels.tsa.arima_process import ArmaProcess


#trying to convert old experiments to a class


[docs]class ArmaFft(ArmaProcess): '''fft tools for arma processes This class contains several methods that are providing the same or similar returns to try out and test different implementations. Notes ----- TODO: check whether we don't want to fix maxlags, and create new instance if maxlag changes. usage for different lengths of timeseries ? or fix frequency and length for fft check default frequencies w, terminology norw n_or_w some ffts are currently done without padding with zeros returns for spectral density methods needs checking, is it always the power spectrum hw*hw.conj() normalization of the power spectrum, spectral density: not checked yet, for example no variance of underlying process is used ''' def __init__(self, ar, ma, n): #duplicates now that are subclassing ArmaProcess super(ArmaFft, self).__init__(ar, ma) self.ar = np.asarray(ar) self.ma = np.asarray(ma) self.nobs = n #could make the polynomials into cached attributes self.arpoly = np.polynomial.Polynomial(ar) self.mapoly = np.polynomial.Polynomial(ma) self.nar = len(ar) #1d only currently self.nma = len(ma)
[docs] def padarr(self, arr, maxlag, atend=True): '''pad 1d array with zeros at end to have length maxlag function that is a method, no self used Parameters ---------- arr : array_like, 1d array that will be padded with zeros maxlag : int length of array after padding atend : boolean If True (default), then the zeros are added to the end, otherwise to the front of the array Returns ------- arrp : ndarray zero-padded array Notes ----- This is mainly written to extend coefficient arrays for the lag-polynomials. It returns a copy. ''' if atend: return np.r_[arr, np.zeros(maxlag-len(arr))] else: return np.r_[np.zeros(maxlag-len(arr)), arr]
[docs] def pad(self, maxlag): '''construct AR and MA polynomials that are zero-padded to a common length Parameters ---------- maxlag : int new length of lag-polynomials Returns ------- ar : ndarray extended AR polynomial coefficients ma : ndarray extended AR polynomial coefficients ''' arpad = np.r_[self.ar, np.zeros(maxlag-self.nar)] mapad = np.r_[self.ma, np.zeros(maxlag-self.nma)] return arpad, mapad
[docs] def fftar(self, n=None): '''Fourier transform of AR polynomial, zero-padded at end to n Parameters ---------- n : int length of array after zero-padding Returns ------- fftar : ndarray fft of zero-padded ar polynomial ''' if n is None: n = len(self.ar) return fft.fft(self.padarr(self.ar, n))
[docs] def fftma(self, n): '''Fourier transform of MA polynomial, zero-padded at end to n Parameters ---------- n : int length of array after zero-padding Returns ------- fftar : ndarray fft of zero-padded ar polynomial ''' if n is None: n = len(self.ar) return fft.fft(self.padarr(self.ma, n))
#@OneTimeProperty # not while still debugging things
[docs] def fftarma(self, n=None): '''Fourier transform of ARMA polynomial, zero-padded at end to n The Fourier transform of the ARMA process is calculated as the ratio of the fft of the MA polynomial divided by the fft of the AR polynomial. Parameters ---------- n : int length of array after zero-padding Returns ------- fftarma : ndarray fft of zero-padded arma polynomial ''' if n is None: n = self.nobs return (self.fftma(n) / self.fftar(n))
[docs] def spd(self, npos): '''raw spectral density, returns Fourier transform n is number of points in positive spectrum, the actual number of points is twice as large. different from other spd methods with fft ''' n = npos w = fft.fftfreq(2*n) * 2 * np.pi hw = self.fftarma(2*n) #not sure, need to check normalization #return (hw*hw.conj()).real[n//2-1:] * 0.5 / np.pi #doesn't show in plot return (hw*hw.conj()).real * 0.5 / np.pi, w
[docs] def spdshift(self, n): '''power spectral density using fftshift currently returns two-sided according to fft frequencies, use first half ''' #size = s1+s2-1 mapadded = self.padarr(self.ma, n) arpadded = self.padarr(self.ar, n) hw = fft.fft(fft.fftshift(mapadded)) / fft.fft(fft.fftshift(arpadded)) #return np.abs(spd)[n//2-1:] w = fft.fftfreq(n) * 2 * np.pi wslice = slice(n//2-1, None, None) #return (hw*hw.conj()).real[wslice], w[wslice] return (hw*hw.conj()).real, w
[docs] def spddirect(self, n): '''power spectral density using padding to length n done by fft currently returns two-sided according to fft frequencies, use first half ''' #size = s1+s2-1 #abs looks wrong hw = fft.fft(self.ma, n) / fft.fft(self.ar, n) w = fft.fftfreq(n) * 2 * np.pi wslice = slice(None, n//2, None) #return (np.abs(hw)**2)[wslice], w[wslice] return (np.abs(hw)**2) * 0.5/np.pi, w
def _spddirect2(self, n): '''this looks bad, maybe with an fftshift ''' #size = s1+s2-1 hw = (fft.fft(np.r_[self.ma[::-1],self.ma], n) / fft.fft(np.r_[self.ar[::-1],self.ar], n)) return (hw*hw.conj()) #.real[n//2-1:]
[docs] def spdroots(self, w): '''spectral density for frequency using polynomial roots builds two arrays (number of roots, number of frequencies) ''' return self._spdroots(self.arroots, self.maroots, w)
def _spdroots(self, arroots, maroots, w): '''spectral density for frequency using polynomial roots builds two arrays (number of roots, number of frequencies) Parameters ---------- arroots : ndarray roots of ar (denominator) lag-polynomial maroots : ndarray roots of ma (numerator) lag-polynomial w : array_like frequencies for which spd is calculated Notes ----- this should go into a function ''' w = np.atleast_2d(w).T cosw = np.cos(w) #Greene 5th edt. p626, section 20.2.7.a. maroots = 1./maroots arroots = 1./arroots num = 1 + maroots**2 - 2* maroots * cosw den = 1 + arroots**2 - 2* arroots * cosw #print 'num.shape, den.shape', num.shape, den.shape hw = 0.5 / np.pi * num.prod(-1) / den.prod(-1) #or use expsumlog return np.squeeze(hw), w.squeeze()
[docs] def spdpoly(self, w, nma=50): '''spectral density from MA polynomial representation for ARMA process References ---------- Cochrane, section 8.3.3 ''' mpoly = np.polynomial.Polynomial(self.arma2ma(nma)) hw = mpoly(np.exp(1j * w)) spd = np.real_if_close(hw * hw.conj() * 0.5/np.pi) return spd, w
[docs] def filter(self, x): ''' filter a timeseries with the ARMA filter padding with zero is missing, in example I needed the padding to get initial conditions identical to direct filter Initial filtered observations differ from filter2 and signal.lfilter, but at end they are the same. See Also -------- tsa.filters.fftconvolve ''' n = x.shape[0] if n == self.fftarma: fftarma = self.fftarma else: fftarma = self.fftma(n) / self.fftar(n) tmpfft = fftarma * fft.fft(x) return fft.ifft(tmpfft)
[docs] def filter2(self, x, pad=0): '''filter a time series using fftconvolve3 with ARMA filter padding of x currently works only if x is 1d in example it produces same observations at beginning as lfilter even without padding. TODO: this returns 1 additional observation at the end ''' from statsmodels.tsa.filters import fftconvolve3 if not pad: pass elif pad == 'auto': #just guessing how much padding x = self.padarr(x, x.shape[0] + 2*(self.nma+self.nar), atend=False) else: x = self.padarr(x, x.shape[0] + int(pad), atend=False) return fftconvolve3(x, self.ma, self.ar)
[docs] def acf2spdfreq(self, acovf, nfreq=100, w=None): ''' not really a method just for comparison, not efficient for large n or long acf this is also similarly use in tsa.stattools.periodogram with window ''' if w is None: w = np.linspace(0, np.pi, nfreq)[:, None] nac = len(acovf) hw = 0.5 / np.pi * (acovf[0] + 2 * (acovf[1:] * np.cos(w*np.arange(1,nac))).sum(1)) return hw
[docs] def invpowerspd(self, n): '''autocovariance from spectral density scaling is correct, but n needs to be large for numerical accuracy maybe padding with zero in fft would be faster without slicing it returns 2-sided autocovariance with fftshift >>> ArmaFft([1, -0.5], [1., 0.4], 40).invpowerspd(2**8)[:10] array([ 2.08 , 1.44 , 0.72 , 0.36 , 0.18 , 0.09 , 0.045 , 0.0225 , 0.01125 , 0.005625]) >>> ArmaFft([1, -0.5], [1., 0.4], 40).acovf(10) array([ 2.08 , 1.44 , 0.72 , 0.36 , 0.18 , 0.09 , 0.045 , 0.0225 , 0.01125 , 0.005625]) ''' hw = self.fftarma(n) return np.real_if_close(fft.ifft(hw*hw.conj()), tol=200)[:n]
[docs] def spdmapoly(self, w, twosided=False): '''ma only, need division for ar, use LagPolynomial ''' if w is None: w = np.linspace(0, np.pi, nfreq) return 0.5 / np.pi * self.mapoly(np.exp(w*1j))
[docs] def plot4(self, fig=None, nobs=100, nacf=20, nfreq=100): rvs = self.generate_sample(nsample=100, burnin=500) acf = self.acf(nacf)[:nacf] #TODO: check return length pacf = self.pacf(nacf) w = np.linspace(0, np.pi, nfreq) spdr, wr = self.spdroots(w) if fig is None: import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(2,2,1) ax.plot(rvs) ax.set_title('Random Sample \nar=%s, ma=%s' % (self.ar, self.ma)) ax = fig.add_subplot(2,2,2) ax.plot(acf) ax.set_title('Autocorrelation \nar=%s, ma=%rs' % (self.ar, self.ma)) ax = fig.add_subplot(2,2,3) ax.plot(wr, spdr) ax.set_title('Power Spectrum \nar=%s, ma=%s' % (self.ar, self.ma)) ax = fig.add_subplot(2,2,4) ax.plot(pacf) ax.set_title('Partial Autocorrelation \nar=%s, ma=%s' % (self.ar, self.ma)) return fig
def spdar1(ar, w): if np.ndim(ar) == 0: rho = ar else: rho = -ar[1] return 0.5 / np.pi /(1 + rho*rho - 2 * rho * np.cos(w)) if __name__ == '__main__': def maxabs(x,y): return np.max(np.abs(x-y)) nobs = 200 #10000 ar = [1, 0.0] ma = [1, 0.0] ar2 = np.zeros(nobs) ar2[:2] = [1, -0.9] uni = np.zeros(nobs) uni[0]=1. #arrep = signal.lfilter(ma, ar, ar2) #marep = signal.lfilter([1],arrep, uni) # same faster: arcomb = np.convolve(ar, ar2, mode='same') marep = signal.lfilter(ma,arcomb, uni) #[len(ma):] print(marep[:10]) mafr = fft.fft(marep) rvs = np.random.normal(size=nobs) datafr = fft.fft(rvs) y = fft.ifft(mafr*datafr) print(np.corrcoef(np.c_[y[2:], y[1:-1], y[:-2]],rowvar=0)) arrep = signal.lfilter([1],marep, uni) print(arrep[:20]) # roundtrip to ar arfr = fft.fft(arrep) yfr = fft.fft(y) x = fft.ifft(arfr*yfr).real #imag part is e-15 # the next two are equal, roundtrip works print(x[:5]) print(rvs[:5]) print(np.corrcoef(np.c_[x[2:], x[1:-1], x[:-2]],rowvar=0)) # ARMA filter using fft with ratio of fft of ma/ar lag polynomial # seems much faster than using lfilter #padding, note arcomb is already full length arcombp = np.zeros(nobs) arcombp[:len(arcomb)] = arcomb map_ = np.zeros(nobs) #rename: map was shadowing builtin map_[:len(ma)] = ma ar0fr = fft.fft(arcombp) ma0fr = fft.fft(map_) y2 = fft.ifft(ma0fr/ar0fr*datafr) #the next two are (almost) equal in real part, almost zero but different in imag print(y2[:10]) print(y[:10]) print(maxabs(y, y2)) # from chfdiscrete #1.1282071239631782e-014 ar = [1, -0.4] ma = [1, 0.2] arma1 = ArmaFft([1, -0.5,0,0,0,00, -0.7, 0.3], [1, 0.8], nobs) nfreq = nobs w = np.linspace(0, np.pi, nfreq) w2 = np.linspace(0, 2*np.pi, nfreq) import matplotlib.pyplot as plt plt.close('all') plt.figure() spd1, w1 = arma1.spd(2**10) print(spd1.shape) _ = plt.plot(spd1) plt.title('spd fft complex') plt.figure() spd2, w2 = arma1.spdshift(2**10) print(spd2.shape) _ = plt.plot(w2, spd2) plt.title('spd fft shift') plt.figure() spd3, w3 = arma1.spddirect(2**10) print(spd3.shape) _ = plt.plot(w3, spd3) plt.title('spd fft direct') plt.figure() spd3b = arma1._spddirect2(2**10) print(spd3b.shape) _ = plt.plot(spd3b) plt.title('spd fft direct mirrored') plt.figure() spdr, wr = arma1.spdroots(w) print(spdr.shape) plt.plot(w, spdr) plt.title('spd from roots') plt.figure() spdar1_ = spdar1(arma1.ar, w) print(spdar1_.shape) _ = plt.plot(w, spdar1_) plt.title('spd ar1') plt.figure() wper, spdper = arma1.periodogram(nfreq) print(spdper.shape) _ = plt.plot(w, spdper) plt.title('periodogram') startup = 1000 rvs = arma1.generate_sample(startup+10000)[startup:] import matplotlib.mlab as mlb plt.figure() sdm, wm = mlb.psd(x) print('sdm.shape', sdm.shape) sdm = sdm.ravel() plt.plot(wm, sdm) plt.title('matplotlib') from nitime.algorithms import LD_AR_est #yule_AR_est(s, order, Nfreqs) wnt, spdnt = LD_AR_est(rvs, 10, 512) plt.figure() print('spdnt.shape', spdnt.shape) _ = plt.plot(spdnt.ravel()) print(spdnt[:10]) plt.title('nitime') fig = plt.figure() arma1.plot4(fig) #plt.show()