# HG changeset patch
# User kantor@josh-kantors-macbook.local
# Date 1215305707 25200
# Node ID 3cf04038566212388e801244bdb4f54e34f69dd2
# Parent 23384509345e7f7edb557152ac8531d0bde5ef65
changed linear_filter/forecase to autoregressive_fit/forecast, added a doctest
diff -r 23384509345e -r 3cf040385662 sage/finance/all.py
a
|
b
|
from stock import Stock |
2 | 2 | |
3 | 3 | from markov_multifractal import MarkovSwitchingMultifractal |
4 | 4 | |
5 | | from time_series import TimeSeries, linear_filter |
| 5 | from time_series import TimeSeries, autoregressive_fit |
6 | 6 | |
7 | 7 | from fractal import (stationary_gaussian_simulation, |
8 | 8 | fractional_gaussian_noise_simulation, |
diff -r 23384509345e -r 3cf040385662 sage/finance/time_series.pyx
a
|
b
|
cdef class TimeSeries: |
501 | 501 | memcpy(v._values + i*T._length, T._values, sizeof(double)*T._length) |
502 | 502 | return v |
503 | 503 | |
504 | | def linear_filter(self, M): |
| 504 | |
| 505 | def autoregressive_fit(self,M): |
505 | 506 | """ |
506 | | Return a linear filter using the first M autocovariance values |
507 | | of self. This is useful in forcasting by taking a weighted |
508 | | average of previous values of a time series. |
| 507 | This method fits the time series to an autoregressive process |
| 508 | of order M. That is we assume the process is given by |
| 509 | $X_t-\mu=a_1(X_{t-1}-\mu)+a_2(X_{t-1}-\mu)+...+a_M(X_{t-M}-\mu)+Z_t$ |
| 510 | where $\mu$ is the mean of the process and $Z_t$ is noise. |
| 511 | This method returns estimates for $a_1,...,a_M$. |
509 | 512 | |
| 513 | The method works by solving the Yule-Walker equations |
| 514 | $\Gamma a =\gamma$, where $\gamma=(\gamma(1),...,\gamma(M))$, |
| 515 | $a=(a_1,..,a_M)$ with $\gamma(i)$ the autocovariance of lag i |
| 516 | and $\Gamma_{ij}=\gamma(i-j)$. |
| 517 | |
| 518 | |
510 | 519 | WARNING: The input sequence is assumed to be stationary, which |
511 | 520 | means that the autocovariance $\langle y_j y_k \rangle$ depends |
512 | 521 | only on the difference $|j-k|$. |
… |
… |
cdef class TimeSeries: |
515 | 525 | M -- integer |
516 | 526 | |
517 | 527 | OUTPUT: |
518 | | TimeSeries -- the weights in the linear filter. |
| 528 | TimeSeries -- the coefficients of the autoregressive process. |
519 | 529 | |
520 | 530 | EXAMPLES: |
521 | 531 | sage: set_random_seed(0) |
522 | 532 | sage: v = finance.TimeSeries(10^4).randomize('normal').sums() |
523 | | sage: F = v.linear_filter(100) |
| 533 | sage: F = v.autoregressive_fit(100) |
524 | 534 | sage: v |
525 | 535 | [0.6767, 0.2756, 0.6332, 0.0469, -0.8897 ... 87.6759, 87.6825, 87.4120, 87.6639, 86.3194] |
526 | | sage: v.linear_forecast(F) |
| 536 | sage: v.autoregressive_forecast(F) |
527 | 537 | 86.017728504280015 |
528 | 538 | sage: F |
529 | 539 | [1.0148, -0.0029, -0.0105, 0.0067, -0.0232 ... -0.0106, -0.0068, 0.0085, -0.0131, 0.0092] |
| 540 | |
| 541 | sage: set_random_seed(0) |
| 542 | sage: t=finance.TimeSeries(2000) |
| 543 | sage: z=finance.TimeSeries(2000) |
| 544 | sage: z.randomize('normal',1) |
| 545 | [1.6767, 0.5989, 1.3576, 0.4136, 0.0635 ... 1.0057, -1.1467, 1.2809, 1.5705, 1.1095] |
| 546 | sage: t[0]=1 |
| 547 | sage: t[1]=2 |
| 548 | sage: for i in range(2,2000): |
| 549 | ... t[i]=t[i-1]-0.5*t[i-2]+z[i] |
| 550 | ... |
| 551 | sage: c=t[0:-1].autoregressive_fit(2) #recovers recurrence relation |
| 552 | sage: c #should be close to [1,-0.5] |
| 553 | [1.0371, -0.5199] |
| 554 | |
| 555 | |
530 | 556 | """ |
531 | 557 | acvs = [self.autocovariance(i) for i in range(M+1)] |
532 | | return linear_filter(acvs) |
| 558 | return autoregressive_fit(acvs) |
533 | 559 | |
534 | | def linear_forecast(self, filter): |
| 560 | def autoregressive_forecast(self, filter): |
535 | 561 | """ |
536 | | Given a linear filter as output by the linear_filter command, |
| 562 | Given the autoregression coefficients as outputed by the autoregressive_fit command, |
537 | 563 | compute the forecast for the next term in the series. |
538 | 564 | |
539 | 565 | INPUT: |
540 | | filter -- a time series output by the linear filter command. |
| 566 | autoregression coefficients -- a time series outputed by the autoregressive_fit command. |
541 | 567 | |
542 | 568 | EXAMPLES: |
543 | 569 | sage: set_random_seed(0) |
544 | 570 | sage: v = finance.TimeSeries(100).randomize('normal').sums() |
545 | | sage: F = v[:-1].linear_filter(5); F |
| 571 | sage: F = v[:-1].autoregressive_fit(5); F |
546 | 572 | [1.0019, -0.0524, -0.0643, 0.1323, -0.0539] |
547 | | sage: v.linear_forecast(F) |
| 573 | sage: v.autoregressive_forecast(F) |
548 | 574 | 11.782029861181114 |
549 | 575 | sage: v |
550 | 576 | [0.6767, 0.2756, 0.6332, 0.0469, -0.8897 ... 9.2447, 9.6709, 10.4037, 10.4836, 12.1960] |
… |
… |
cdef class TimeSeries: |
1432 | 1458 | Another example with small Hurst exponent (notice how bad the prediction is...): |
1433 | 1459 | sage: fbm = finance.fractional_brownian_motion_simulation(0.2,0.1,10^5,1)[0].sums() |
1434 | 1460 | sage: fbm.hurst_exponent() |
1435 | | 0.30450273560706259 |
| 1461 | 0.304502... |
| 1462 | |
1436 | 1463 | |
1437 | 1464 | The above example illustrate that this is not a very good |
1438 | 1465 | estimate of the Hurst exponent. |
… |
… |
def unpickle_time_series_v1(v, Py_ssize_ |
2117 | 2144 | |
2118 | 2145 | |
2119 | 2146 | |
2120 | | def linear_filter(acvs): |
| 2147 | def autoregressive_fit(acvs): |
2121 | 2148 | """ |
2122 | | Create a linear filter with given autocovariance sequence. |
| 2149 | Given a sequence of lagged autocovariances of length $M$ produce |
| 2150 | $a_1,...,a_p$ so that the first $M$ autocovariance coefficients |
| 2151 | of the autoregressive processs $X_t=a_1X_{t_1}+...a_pX_{t-p}+Z_t$ |
| 2152 | are the same as the input sequence. |
| 2153 | |
| 2154 | The function works by solving the Yule-Walker equations |
| 2155 | $\Gamma a =\gamma$, where $\gamma=(\gamma(1),...,\gamma(M))$, |
| 2156 | $a=(a_1,..,a_M)$, with $\gamma(i)$ the autocovariance of lag i |
| 2157 | and $\Gamma_{ij}=\gamma(i-j)$. |
| 2158 | |
2123 | 2159 | |
2124 | 2160 | EXAMPLES: |
2125 | 2161 | |
… |
… |
def linear_filter(acvs): |
2155 | 2191 | Note: ac looks like a line -- one could best fit it to yield a lot |
2156 | 2192 | more approximate autocovariances. |
2157 | 2193 | |
2158 | | We compute the linear filter given by the above autocovariances: |
2159 | | sage: F = finance.linear_filter(ac); F |
| 2194 | We compute the autoregression coefficients matching the above autocovariances: |
| 2195 | sage: F = finance.autoregressive_fit(ac); F |
2160 | 2196 | [0.9982, -0.0002, -0.0002, 0.0003, 0.0001 ... 0.0002, -0.0002, -0.0000, -0.0002, -0.0014] |
2161 | 2197 | |
2162 | 2198 | Note that the sum is close to 1. |
… |
… |
def linear_filter(acvs): |
2169 | 2205 | [0.0013, 0.0059, 0.0066, 0.0068, 0.0184 ... 6.8004, 6.8009, 6.8063, 6.8090, 6.8339] |
2170 | 2206 | |
2171 | 2207 | And we forecast the very last value using our linear filter; the forecast is close: |
2172 | | sage: y2[:-1].linear_forecast(F) |
| 2208 | sage: y2[:-1].autoregressive_forecast(F) |
2173 | 2209 | 6.7836741372407... |
2174 | 2210 | |
2175 | 2211 | In fact it is closer than we would get by forecasting using a |
2176 | 2212 | linear filter made from all the autocovariances of our sequence: |
2177 | | sage: y2[:-1].linear_forecast(y2[:-1].linear_filter(len(y2))) |
| 2213 | sage: y2[:-1].autoregressive_forecast(y2[:-1].autoregressive_fit(len(y2))) |
2178 | 2214 | 6.7701687056683... |
2179 | 2215 | |
2180 | 2216 | We record the last 20 forecasts, always using all correct values up to the |
2181 | 2217 | one we are forecasting: |
2182 | | sage: s1 = sum([(y2[:-i].linear_forecast(F)-y2[-i])^2 for i in range(1,20)]) |
| 2218 | sage: s1 = sum([(y2[:-i].autoregressive_forecast(F)-y2[-i])^2 for i in range(1,20)]) |
2183 | 2219 | |
2184 | 2220 | We do the same, but using the autocovariances of the sample sequence: |
2185 | | sage: F2 = y2[:-100].linear_filter(len(F)) |
2186 | | sage: s2 = sum([(y2[:-i].linear_forecast(F2)-y2[-i])^2 for i in range(1,20)]) |
| 2221 | sage: F2 = y2[:-100].autoregressive_fit(len(F)) |
| 2222 | sage: s2 = sum([(y2[:-i].autoregressive_forecast(F2)-y2[-i])^2 for i in range(1,20)]) |
2187 | 2223 | |
2188 | 2224 | Our model gives us something that is 15 percent better in this case: |
2189 | 2225 | sage: s2/s1 |
… |
… |
def linear_filter(acvs): |
2195 | 2231 | sage: y_out = finance.multifractal_cascade_random_walk_simulation(3700,0.02,0.01,0.01,1000,100) |
2196 | 2232 | sage: s1 = []; s2 = [] |
2197 | 2233 | sage: for v in y_out: |
2198 | | ... s1.append(sum([(v[:-i].linear_forecast(F)-v[-i])^2 for i in range(1,20)])) |
2199 | | ... F2 = v[:-len(F)].linear_filter(len(F)) |
2200 | | ... s2.append(sum([(v[:-i].linear_forecast(F2)-v[-i])^2 for i in range(1,20)])) |
| 2234 | ... s1.append(sum([(v[:-i].autoregressive_forecast(F)-v[-i])^2 for i in range(1,20)])) |
| 2235 | ... F2 = v[:-len(F)].autoregressive_fit(len(F)) |
| 2236 | ... s2.append(sum([(v[:-i].autoregressive_forecast(F2)-v[-i])^2 for i in range(1,20)])) |
2201 | 2237 | ... |
2202 | 2238 | |
2203 | 2239 | We find that overall the model beats naive linear forecasting by 35 percent! |