Source code for statsmodels.sandbox.tsa.movstat

"""using scipy signal and numpy correlate to calculate some time series
statistics

original developer notes

see also scikits.timeseries  (movstat is partially inspired by it)
added 2009-08-29
timeseries moving stats are in c, autocorrelation similar to here
I thought I saw moving stats somewhere in python, maybe not)


TODO

moving statistics
- filters do not handle boundary conditions nicely (correctly ?)
e.g. minimum order filter uses 0 for out of bounds value
-> append and prepend with last resp. first value
- enhance for nd arrays, with axis = 0



Note: Equivalence for 1D signals
>>> np.all(signal.correlate(x,[1,1,1],'valid')==np.correlate(x,[1,1,1]))
True
>>> np.all(ndimage.filters.correlate(x,[1,1,1], origin = -1)[:-3+1]==np.correlate(x,[1,1,1]))
True

# multidimensional, but, it looks like it uses common filter across time series, no VAR
ndimage.filters.correlate(np.vstack([x,x]),np.array([[1,1,1],[0,0,0]]), origin = 1)
ndimage.filters.correlate(x,[1,1,1],origin = 1))
ndimage.filters.correlate(np.vstack([x,x]),np.array([[0.5,0.5,0.5],[0.5,0.5,0.5]]), \
origin = 1)

>>> np.all(ndimage.filters.correlate(np.vstack([x,x]),np.array([[1,1,1],[0,0,0]]), origin = 1)[0]==\
ndimage.filters.correlate(x,[1,1,1],origin = 1))
True
>>> np.all(ndimage.filters.correlate(np.vstack([x,x]),np.array([[0.5,0.5,0.5],[0.5,0.5,0.5]]), \
origin = 1)[0]==ndimage.filters.correlate(x,[1,1,1],origin = 1))


update
2009-09-06: cosmetic changes, rearrangements
"""

import numpy as np
from numpy.testing import assert_array_almost_equal, assert_array_equal
from scipy import signal


def expandarr(x, k):
    # make it work for 2D or nD with axis
    kadd = k
    if np.ndim(x) == 2:
        kadd = (kadd, np.shape(x)[1])
    return np.r_[np.ones(kadd) * x[0], x, np.ones(kadd) * x[-1]]


[docs] def movorder(x, order="med", windsize=3, lag="lagged"): """moving order statistics Parameters ---------- x : ndarray time series data order : float or 'med', 'min', 'max' which order statistic to calculate windsize : int window size lag : 'lagged', 'centered', or 'leading' location of window relative to current position Returns ------- filtered array """ # if windsize is even should it raise ValueError if lag == "lagged": lead = windsize // 2 elif lag == "centered": lead = 0 elif lag == "leading": lead = -windsize // 2 + 1 else: raise ValueError if np.isfinite(order): # if np.isnumber(order): ord = order # note: ord is a builtin function elif order == "med": ord = (windsize - 1) / 2 elif order == "min": ord = 0 elif order == "max": ord = windsize - 1 else: raise ValueError # return signal.order_filter(x,np.ones(windsize),ord)[:-lead] xext = expandarr(x, windsize) # np.r_[np.ones(windsize)*x[0],x,np.ones(windsize)*x[-1]] return signal.order_filter(xext, np.ones(windsize), ord)[ windsize - lead : -(windsize + lead) ]
def check_movorder(): """graphical test for movorder""" import matplotlib.pylab as plt x = np.arange(1, 10) xo = movorder(x, order="max") assert_array_equal(xo, x) x = np.arange(10, 1, -1) xo = movorder(x, order="min") assert_array_equal(xo, x) assert_array_equal(movorder(x, order="min", lag="centered")[:-1], x[1:]) tt = np.linspace(0, 2 * np.pi, 15) x = np.sin(tt) + 1 xo = movorder(x, order="max") plt.figure() plt.plot(tt, x, ".-", tt, xo, ".-") plt.title("moving max lagged") xo = movorder(x, order="max", lag="centered") plt.figure() plt.plot(tt, x, ".-", tt, xo, ".-") plt.title("moving max centered") xo = movorder(x, order="max", lag="leading") plt.figure() plt.plot(tt, x, ".-", tt, xo, ".-") plt.title("moving max leading") # identity filter # >>> signal.order_filter(x,np.ones(1),0) # array([ 1., 2., 3., 4., 5., 6., 7., 8., 9.]) # median filter # signal.medfilt(np.sin(x), kernel_size=3) # >>> plt.figure() # <matplotlib.figure.Figure object at 0x069BBB50> # >>> x=np.linspace(0,3,100);plt.plot(x,np.sin(x),x,signal.medfilt(np.sin(x), kernel_size=3)) # remove old version # def movmeanvar(x, windowsize=3, valid='same'): # ''' # this should also work along axis or at least for columns # ''' # n = x.shape[0] # x = expandarr(x, windowsize - 1) # takeslice = slice(windowsize-1, n + windowsize-1) # avgkern = (np.ones(windowsize)/float(windowsize)) # m = np.correlate(x, avgkern, 'same')# [takeslice] # print(m.shape) # print(x.shape) # xm = x - m # v = np.correlate(x*x, avgkern, 'same') - m**2 # v1 = np.correlate(xm*xm, avgkern, valid) # not correct for var of window # #>>> np.correlate(xm*xm,np.array([1,1,1])/3.0,'valid')-np.correlate(xm*xm,np.array([1,1,1])/3.0,'valid')**2 # return m[takeslice], v[takeslice], v1
[docs] def movmean(x, windowsize=3, lag="lagged"): """moving window mean Parameters ---------- x : ndarray time series data windsize : int window size lag : 'lagged', 'centered', or 'leading' location of window relative to current position Returns ------- mk : ndarray moving mean, with same shape as x Notes ----- for leading and lagging the data array x is extended by the closest value of the array """ return movmoment(x, 1, windowsize=windowsize, lag=lag)
[docs] def movvar(x, windowsize=3, lag="lagged"): """moving window variance Parameters ---------- x : ndarray time series data windsize : int window size lag : 'lagged', 'centered', or 'leading' location of window relative to current position Returns ------- mk : ndarray moving variance, with same shape as x """ m1 = movmoment(x, 1, windowsize=windowsize, lag=lag) m2 = movmoment(x, 2, windowsize=windowsize, lag=lag) return m2 - m1 * m1
[docs] def movmoment(x, k, windowsize=3, lag="lagged"): """non-central moment Parameters ---------- x : ndarray time series data windsize : int window size lag : 'lagged', 'centered', or 'leading' location of window relative to current position Returns ------- mk : ndarray k-th moving non-central moment, with same shape as x Notes ----- If data x is 2d, then moving moment is calculated for each column. """ windsize = windowsize # if windsize is even should it raise ValueError if lag == "lagged": # lead = -0 + windsize # windsize//2 lead = -0 # + (windsize-1) + windsize//2 sl = slice((windsize - 1) or None, -2 * (windsize - 1) or None) elif lag == "centered": lead = -windsize // 2 # 0#-1 #+ #(windsize-1) sl = slice( (windsize - 1) + windsize // 2 or None, -(windsize - 1) - windsize // 2 or None, ) elif lag == "leading": # lead = -windsize +1#+1 #+ (windsize-1)#//2 +1 lead = -windsize + 2 # -windsize//2 +1 sl = slice( 2 * (windsize - 1) + 1 + lead or None, -(2 * (windsize - 1) + lead) + 1 or None, ) else: raise ValueError avgkern = np.ones(windowsize) / float(windowsize) xext = expandarr(x, windsize - 1) # Note: expandarr increases the array size by 2*(windsize-1) # sl = slice(2*(windsize-1)+1+lead or None, -(2*(windsize-1)+lead)+1 or None) print(sl) if xext.ndim == 1: return np.correlate(xext**k, avgkern, "full")[sl] # return np.correlate(xext**k, avgkern, 'same')[windsize-lead:-(windsize+lead)] else: print(xext.shape) print(avgkern[:, None].shape) # try first with 2d along columns, possibly ndim with axis return signal.correlate(xext**k, avgkern[:, None], "full")[sl, :]
# x=0.5**np.arange(10);xm=x-x.mean();a=np.correlate(xm,[1],'full') # x=0.5**np.arange(3);np.correlate(x,x,'same') # >>> x=0.5**np.arange(10);xm=x-x.mean();a=np.correlate(xm,xo,'full') # # >>> xo=np.ones(10);d=np.correlate(xo,xo,'full') # >>> xo # xo=np.ones(10);d=np.correlate(xo,xo,'full') # >>> x=np.ones(10);xo=x-x.mean();a=np.correlate(xo,xo,'full') # >>> xo=np.ones(10);d=np.correlate(xo,xo,'full') # >>> d # array([ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 9., # 8., 7., 6., 5., 4., 3., 2., 1.]) # def ccovf(): # pass # # x=0.5**np.arange(10);xm=x-x.mean();a=np.correlate(xm,xo,'full') __all__ = ["movmean", "movmoment", "movorder", "movvar"] if __name__ == "__main__": print("\ncheckin moving mean and variance") nobs = 10 x = np.arange(nobs) ws = 3 ave = np.array([0.0, 1 / 3.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 26 / 3.0, 9]) va = np.array( [ [0.0, 0.0], [0.22222222, 0.88888889], [0.66666667, 2.66666667], [0.66666667, 2.66666667], [0.66666667, 2.66666667], [0.66666667, 2.66666667], [0.66666667, 2.66666667], [0.66666667, 2.66666667], [0.66666667, 2.66666667], [0.66666667, 2.66666667], [0.22222222, 0.88888889], [0.0, 0.0], ] ) ave2d = np.c_[ave, 2 * ave] print(movmean(x, windowsize=ws, lag="lagged")) print(movvar(x, windowsize=ws, lag="lagged")) print([np.var(x[i - ws : i]) for i in range(ws, nobs)]) m1 = movmoment(x, 1, windowsize=3, lag="lagged") m2 = movmoment(x, 2, windowsize=3, lag="lagged") print(m1) print(m2) print(m2 - m1 * m1) # this implicitly also tests moment assert_array_almost_equal(va[ws - 1 :, 0], movvar(x, windowsize=3, lag="leading")) assert_array_almost_equal( va[ws // 2 : -ws // 2 + 1, 0], movvar(x, windowsize=3, lag="centered") ) assert_array_almost_equal(va[: -ws + 1, 0], movvar(x, windowsize=ws, lag="lagged")) print("\nchecking moving moment for 2d (columns only)") x2d = np.c_[x, 2 * x] print(movmoment(x2d, 1, windowsize=3, lag="centered")) print(movmean(x2d, windowsize=ws, lag="lagged")) print(movvar(x2d, windowsize=ws, lag="lagged")) assert_array_almost_equal(va[ws - 1 :, :], movvar(x2d, windowsize=3, lag="leading")) assert_array_almost_equal( va[ws // 2 : -ws // 2 + 1, :], movvar(x2d, windowsize=3, lag="centered") ) assert_array_almost_equal( va[: -ws + 1, :], movvar(x2d, windowsize=ws, lag="lagged") ) assert_array_almost_equal( ave2d[ws - 1 :], movmoment(x2d, 1, windowsize=3, lag="leading") ) assert_array_almost_equal( ave2d[ws // 2 : -ws // 2 + 1], movmoment(x2d, 1, windowsize=3, lag="centered") ) assert_array_almost_equal( ave2d[: -ws + 1], movmean(x2d, windowsize=ws, lag="lagged") ) from scipy import ndimage print(ndimage.filters.correlate1d(x2d, np.array([1, 1, 1]) / 3.0, axis=0)) # regression test check xg = np.array( [ 0.0, 0.1, 0.3, 0.6, 1.0, 1.5, 2.1, 2.8, 3.6, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 17.5, 18.5, 19.5, 20.5, 21.5, 22.5, 23.5, 24.5, 25.5, 26.5, 27.5, 28.5, 29.5, 30.5, 31.5, 32.5, 33.5, 34.5, 35.5, 36.5, 37.5, 38.5, 39.5, 40.5, 41.5, 42.5, 43.5, 44.5, 45.5, 46.5, 47.5, 48.5, 49.5, 50.5, 51.5, 52.5, 53.5, 54.5, 55.5, 56.5, 57.5, 58.5, 59.5, 60.5, 61.5, 62.5, 63.5, 64.5, 65.5, 66.5, 67.5, 68.5, 69.5, 70.5, 71.5, 72.5, 73.5, 74.5, 75.5, 76.5, 77.5, 78.5, 79.5, 80.5, 81.5, 82.5, 83.5, 84.5, 85.5, 86.5, 87.5, 88.5, 89.5, 90.5, 91.5, 92.5, 93.5, 94.5, ] ) assert_array_almost_equal(xg, movmean(np.arange(100), 10, "lagged")) xd = np.array( [ 0.3, 0.6, 1.0, 1.5, 2.1, 2.8, 3.6, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 17.5, 18.5, 19.5, 20.5, 21.5, 22.5, 23.5, 24.5, 25.5, 26.5, 27.5, 28.5, 29.5, 30.5, 31.5, 32.5, 33.5, 34.5, 35.5, 36.5, 37.5, 38.5, 39.5, 40.5, 41.5, 42.5, 43.5, 44.5, 45.5, 46.5, 47.5, 48.5, 49.5, 50.5, 51.5, 52.5, 53.5, 54.5, 55.5, 56.5, 57.5, 58.5, 59.5, 60.5, 61.5, 62.5, 63.5, 64.5, 65.5, 66.5, 67.5, 68.5, 69.5, 70.5, 71.5, 72.5, 73.5, 74.5, 75.5, 76.5, 77.5, 78.5, 79.5, 80.5, 81.5, 82.5, 83.5, 84.5, 85.5, 86.5, 87.5, 88.5, 89.5, 90.5, 91.5, 92.5, 93.5, 94.5, 95.4, 96.2, 96.9, 97.5, 98.0, 98.4, 98.7, 98.9, 99.0, ] ) assert_array_almost_equal(xd, movmean(np.arange(100), 10, "leading")) xc = np.array( [ 1.36363636, 1.90909091, 2.54545455, 3.27272727, 4.09090909, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 94.90909091, 95.72727273, 96.45454545, 97.09090909, 97.63636364, ] ) assert_array_almost_equal(xc, movmean(np.arange(100), 11, "centered"))