Ticket #3356: sage-3356-part1.patch

File sage-3356-part1.patch, 11.8 KB (added by was, 14 years ago)
  • sage/finance/markov_multifractal.py

    # HG changeset patch
    # User William Stein <wstein@gmail.com>
    # Date 1212477179 25200
    # Node ID b76f5b58a6addd3c0a302b79d7103d90aed4a435
    # Parent  d52073110418946594e1ae19a69cdb16e74633e0
    trac #3356
    
    diff -r d52073110418 -r b76f5b58a6ad sage/finance/markov_multifractal.py
    a b import math 
    1515import math
    1616import random
    1717
    18 from time_series import TimeSeries
     18import time_series
     19
     20import markov_multifractal_cython
    1921
    2022class MarkovSwitchingMultifractal:
    2123    def __init__(self, kbar, m0, sigma, gamma_kbar, b):
    class MarkovSwitchingMultifractal: 
    197199        sigma = self.sigma()/100.0
    198200
    199201        # r is the time series of returns
    200         r = TimeSeries(T)
     202        r = time_series.TimeSeries(T)
    201203
    202204        # Generate T Gaussian random numbers with mean 0
    203205        # and variance 1.
    204206        import scipy.stats
    205207        eps = scipy.stats.norm().rvs(T)
    206208
    207         # Generate uniform distribution between 0 and 1
     209        # Generate uniform distribution around 0 between -1 and 1
    208210        uniform = scipy.stats.uniform().rvs(kbar*T)
    209211
    210212        # The gamma_k
    class MarkovSwitchingMultifractal: 
    221223                   
    222224        return r
    223225           
    224        
     226    def simulations(self, n, k=1):
     227        """
     228        Return k simulations of length n using this Markov switching
     229        multifractal model for n time steps.
     230
     231        INPUT:
     232            n -- positive integer; number of steps
     233            k -- positive integer; number of simulations.
     234           
     235        OUTPUT:
     236            list -- a list of TimeSeries objects.
     237
     238        EXAMPLES:
     239            sage: cad_usd = finance.MarkovSwitchingMultifractal(10,1.278,0.262,0.644,2.11); cad_usd
     240            Markov switching multifractal model with m0 = 1.278, sigma = 0.262, b = 2.11, and gamma_10 = 0.644
     241        """
     242        return markov_multifractal_cython.simulation(n, k,
     243                   self.__m0, self.__sigma, self.__b,
     244                   self.__gamma_kbar, self.__kbar)
    225245       
    226246
    227247   
  • new file sage/finance/markov_multifractal_cython.pyx

    diff -r d52073110418 -r b76f5b58a6ad sage/finance/markov_multifractal_cython.pyx
    - +  
     1"""
     2Markov Switching Multfractal model
     3
     4Cython code
     5"""
     6
     7cdef extern from "stdlib.h":
     8    long random()
     9
     10from time_series cimport TimeSeries
     11
     12def simulation(Py_ssize_t n, Py_ssize_t k,
     13               double m0, double sigma, double b,
     14               double gamma_kbar, int kbar):
     15    cdef double m1 = 2 - m0
     16    cdef Py_ssize_t i, j
     17    cdef TimeSeries t
     18    cdef TimeSeries markov_state_vector = TimeSeries(kbar)
     19
     20    sigma = sigma / 100.0  # model's sigma is a percent
     21
     22    # output list of simulations
     23    S = []
     24
     25    for i from 0 <= i < k:
     26        # Initalize the model
     27        for j from 0 <= j < kbar:
     28            markov_state_vector._values[j] = m0 if random()%2 else m1
     29        t = TimeSeries(n)
     30       
     31       
     32   
     33
     34   
     35   
  • new file sage/finance/time_series.pxd

    diff -r d52073110418 -r b76f5b58a6ad sage/finance/time_series.pxd
    - +  
     1cdef class TimeSeries:
     2    cdef double* _values
     3    cdef Py_ssize_t _length
  • sage/finance/time_series.pyx

    diff -r d52073110418 -r b76f5b58a6ad sage/finance/time_series.pyx
    a b cdef extern from "string.h": 
    4545cdef extern from "string.h":
    4646    void* memcpy(void* dst, void* src, size_t len)
    4747
     48cdef extern from "stdlib.h":
     49    long random()
     50    long RAND_MAX
     51
     52cdef inline float ranf():
     53    """
     54    Return random number uniformly distributed in 0..1.
     55    """
     56    return (<float> random())/RAND_MAX
     57   
    4858from sage.rings.integer import Integer
    4959from sage.rings.real_double import RDF
    5060from sage.modules.real_double_vector cimport RealDoubleVectorSpaceElement
     61
    5162
    5263max_print = 10
    5364digits = 4
    5465   
    5566cdef class TimeSeries:
    56     cdef double* _values
    57     cdef Py_ssize_t _length
    58    
    5967    def __new__(self, values=None):
    6068        """
    6169        Create new empty uninitialized time series.
    cdef class TimeSeries: 
    410418            [1.0000, 5.0000, 1.0000, 2.0000, -5.0000]
    411419        """
    412420        if not isinstance(right, TimeSeries):
    413             right = TimeSeries(right)
     421            raise TypeError, "right must be a time series"
    414422        if not isinstance(left, TimeSeries):
    415             left = TimeSeries(left)
     423            raise TypeError, "right must be a time series"
    416424        cdef TimeSeries R = right
    417425        cdef TimeSeries L = left
    418426        cdef TimeSeries t = new_time_series(L._length + R._length)
    cdef class TimeSeries: 
    734742        return v
    735743
    736744
    737     def plot(self, points=False, **kwds):
     745    def plot(self, Py_ssize_t plot_points=1000, points=False, **kwds):
    738746        """
    739747        Return a plot of this time series as a line or points through
    740748        (i,T(i)), where i ranges over nonnegative integers up to the
    741749        length of self.
    742750
    743751        INPUT:
     752            plot_points -- (default: 1000) 0 or positive integer; only
     753                           plot the given number of equally spaced
     754                           points in the time series; if 0, plot all points
    744755            points -- bool (default: False) -- if True, return just the
    745756                      points of the time series
    746757            **kwds -- passed to the line or point command
    cdef class TimeSeries: 
    754765            sage: v.plot() + v.plot(points=True, rgbcolor='red',pointsize=50)
    755766        """
    756767        from sage.plot.all import line, point
    757         v = self.list()
    758         w = list(enumerate(v))
     768        cdef Py_ssize_t s
     769
     770        if self._length < plot_points:
     771            plot_points = 0
     772       
     773        if plot_points > 0:
     774            s = self._length/plot_points
     775            if plot_points <= 0:
     776                raise ValueError, "plot_points must be a positive integer"
     777            v = self.scale_time(s).list()
     778        else:
     779            s = 1
     780            v = self.list()
     781        w = [(i * s, y) for i,y in enumerate(v)]
    759782        if points:
    760783            L = point(w, **kwds)           
    761784        else:
    cdef class TimeSeries: 
    763786        L.ymin(min(v))
    764787        L.ymax(max(v))
    765788        L.xmin(0)
    766         L.xmax(len(v))
     789        L.xmax(len(v)*s)
    767790        return L
    768791
    769792    def simple_moving_average(self, Py_ssize_t k):
    cdef class TimeSeries: 
    10281051        else:
    10291052            return s
    10301053
    1031     def histogram(self, Py_ssize_t bins=50):
     1054    def histogram(self, Py_ssize_t bins=50, bint normalize=False):
    10321055        """
    10331056        Return the frequency histogram of the values in
    10341057        this time series divided up into the given
    cdef class TimeSeries: 
    10561079            raise ValueError, "bins must be positive"
    10571080       
    10581081        cdef double mn = self.min(), mx = self.max()
    1059         cdef double r = mx - mn + 0.001, step = r/bins
     1082        cdef double r = mx - mn, step = r/bins
     1083
     1084        if r == 0:
     1085            raise ValueError, "bins have 0 width"
    10601086
    10611087        v = [(mn + j*step, mn + (j+1)*step) for j in range(bins)]
     1088        if self._length == 0:
     1089            return [], v
     1090       
    10621091        if step == 0:
    10631092            counts = [0]*bins
    10641093            counts[0] = [self._length]
    cdef class TimeSeries: 
    10741103        for i from 0 <= i < self._length:
    10751104            cnts[<Py_ssize_t>floor((self._values[i] - mn)/step)] += 1
    10761105
    1077         counts = [cnts[i] for i in range(bins)]
     1106        b = 1.0/(self._length * step)
     1107        if normalize:
     1108            counts = [cnts[i]*b for i in range(bins)]
     1109        else:
     1110            counts = [cnts[i] for i in range(bins)]
    10781111        sage_free(cnts)
    10791112
    10801113        return counts, v
    10811114
    1082     def plot_histogram(self, bins=50, **kwds):
     1115    def plot_histogram(self, bins=50, normalize=True, **kwds):
    10831116        """
    10841117        Return histogram plot of this time series with given number of bins.
    1085        
     1118               
    10861119        INPUT:
    10871120            bins -- positive integer (default: 50)
     1121            normalize -- whether to normalize so the total area in the
     1122                         bars of the histogram is 1.
    10881123            **kwds -- passed to the bar_chart function
    10891124        OUTPUT:
    10901125            a histogram plot
    cdef class TimeSeries: 
    10941129            sage: v.plot_histogram(bins=10)
    10951130        """
    10961131        from sage.plot.all import bar_chart, polygon
    1097         counts, intervals = self.histogram(bins)
     1132        counts, intervals = self.histogram(bins, normalize=normalize)
    10981133        s = 0
    10991134        for i, (x0,x1) in enumerate(intervals):
    11001135            s += polygon([(x0,0), (x0,counts[i]), (x1,counts[i]), (x1,0)], **kwds)
    cdef class TimeSeries: 
    11151150        import numpy
    11161151        #TODO: make this faster by accessing raw memory?
    11171152        return numpy.array(self.list(), dtype=float)
    1118            
     1153
     1154    def randomize(self, distribution='uniform', loc=0, scale=1, **kwds):
     1155        """
     1156        INPUT:
     1157            distribution -- 'uniform'
     1158                            'normal'
     1159                            'semicircle'
     1160        """
     1161        if distribution == 'uniform':
     1162            self._randomize_uniform(loc, scale)
     1163        elif distribution == 'normal':
     1164            self._randomize_normal(loc, scale)
     1165        elif distribution == 'semicircle':
     1166            self._randomize_semicircle(loc)
     1167        else:
     1168            raise NotImplementedError
     1169
     1170    def _randomize_uniform(self, double left, double right):
     1171        if left >= right:
     1172            raise ValueError, "left must be less than right"
     1173        cdef Py_ssize_t k
     1174        cdef double d = right - left
     1175        for k from 0 <= k < self._length:
     1176            self._values[k] = ranf() * d - left
     1177
     1178    def _randomize_normal(self, double m, double s):
     1179        """
     1180        INPUT:
     1181            m -- mean
     1182            s -- standard deviation
     1183
     1184        EXAMPLES:
     1185        """
     1186        # Ported from http://users.tkk.fi/~nbeijar/soft/terrain/source_o2/boxmuller.c
     1187        # This the box muller algorithm.
     1188        cdef double x1, x2, w, y1, y2
     1189        cdef Py_ssize_t k
     1190        for k from 0 <= k < self._length:
     1191            while 1:
     1192                x1 = 2.0 * ranf() - 1.0
     1193                x2 = 2.0 * ranf() - 1.0
     1194                w = x1 * x1 + x2 * x2
     1195                if w < 1.0: break
     1196            w = sqrt( (-2.0 * log( w ) ) / w )
     1197            y1 = x1 * w
     1198            y2 = x2 * w
     1199            self._values[k] = m + y1*s
     1200            k += 1
     1201            if k < self._length:
     1202                self._values[k] = m + y2*s
     1203
     1204    def _randomize_semicircle(self, double center):
     1205        cdef Py_ssize_t k
     1206        cdef double x, y, s, d = 2, left = center - 1, z
     1207        z = d*d
     1208        s = 1.5707963267948966192  # pi/2
     1209        for k from 0 <= k < self._length:
     1210            while 1:
     1211                x = ranf() * d - 1
     1212                y = ranf() * s
     1213                if y*y + x*x < 1:
     1214                    break
     1215            self._values[k] = x + center
    11191216
    11201217cdef new_time_series(Py_ssize_t length):
    11211218    """
  • setup.py

    diff -r d52073110418 -r b76f5b58a6ad setup.py
    a b symmetrica = Extension('sage.libs.symmet 
    477477
    478478time_series = Extension('sage.finance.time_series',['sage/finance/time_series.pyx'])
    479479
     480markov_multifractal = Extension('sage.finance.markov_multifractal_cython',
     481                                ['sage/finance/markov_multifractal_cython.pyx'])
     482
     483
    480484#####################################################
    481485
    482486ext_modules = [ \
    ext_modules = [ \ 
    590594    symmetrica,
    591595
    592596    time_series,
     597   
     598    markov_multifractal,
    593599
    594600    Extension('sage.media.channels',
    595601              sources = ['sage/media/channels.pyx']), \