Ticket #3773: sage-3773-part1.patch

File sage-3773-part1.patch, 11.3 KB (added by was, 13 years ago)
  • sage/stats/hmm/all.py

    # HG changeset patch
    # User William Stein <wstein@gmail.com>
    # Date 1217898137 25200
    # Node ID 25f2c4eaa11cd7189175a2dddffb57cbbcba10f1
    # Parent  1dfe749d493139b7326fb0e18d10466cd9e7b04b
    Added support for Gaussian Mixture Models.
    
    diff -r 1dfe749d4931 -r 25f2c4eaa11c sage/stats/hmm/all.py
    a b  
    77#############################################################################
    88
    99from hmm  import DiscreteHiddenMarkovModel
    10 from chmm import GaussianHiddenMarkovModel
     10from chmm import GaussianHiddenMarkovModel, GaussianMixtureHiddenMarkovModel
  • sage/stats/hmm/chmm.pyx

    diff -r 1dfe749d4931 -r 25f2c4eaa11c sage/stats/hmm/chmm.pyx
    a b include "misc.pxi" 
    1919include "misc.pxi"
    2020
    2121from sage.misc.randstate import random
     22from sage.misc.flatten   import flatten
    2223
    2324from sage.finance.time_series cimport TimeSeries
    2425
     26cdef extern from "math.h":
     27    double sqrt(double)
     28   
    2529cdef class ContinuousHiddenMarkovModel(HiddenMarkovModel):
    2630    """
    2731    Abstract base class for continuous hidden Markov models.
    cdef class GaussianHiddenMarkovModel(Con 
    97101    INPUT:
    98102        A  -- matrix; the transition matrix (n x n)
    99103        B  -- list of n pairs (mu, sigma) that define the
    100               Gaussian distributions associated to each state
     104              Gaussian distributions associated to each state,
     105              where mu is the mean and sigma the standard deviation.
    101106        pi -- list of floats that sums to 1.0; these are
    102107              the initial probabilities of each hidden state
    103108        name -- (default: None); a string
    cdef class GaussianHiddenMarkovModel(Con 
    146151        """
    147152        ContinuousHiddenMarkovModel.__init__(self, A, B, pi, name=name)
    148153
    149         # Set number of outputs.  This is 1 here because each
    150         # output is a single Gaussian distribution.
     154        # Set number of outputs. 
    151155        self.m.M = 1
    152 
    153156        # Set the model type to continuous
    154157        self.m.model_type = GHMM_kContinuousHMM
    155 
    156158        # 1 transition matrix
    157159        self.m.cos   =  1
    158160        # Set that no a prior model probabilities are set.
    159161        self.m.prior = -1
    160162        # Dimension is 1
    161163        self.m.dim   =  1
     164        self._initialize_state(pi)
     165        self.m.class_change = NULL
     166        self.initialized = True
    162167
     168    def _initialize_state(self, pi):
    163169        # Allocate and initialize states
    164170        cdef ghmm_cstate* states = <ghmm_cstate*> safe_malloc(sizeof(ghmm_cstate) * self.m.N)
    165171        cdef ghmm_cstate* state
    cdef class GaussianHiddenMarkovModel(Con 
    179185            e.type      = 0  # normal
    180186            e.dimension = 1
    181187            e.mean.val  = mu
    182             e.variance.val = sigma
     188            e.variance.val = sigma*sigma  # variance! not standard deviation
    183189            # fixing of emissions is deactivated by default           
    184190            e.fixed     = 0
    185191            e.sigmacd   = NULL
    cdef class GaussianHiddenMarkovModel(Con 
    212218                state.in_a[0][j]  = v[j]
    213219
    214220            #########################################################               
    215 
    216 
    217221        # Set states
    218222        self.m.s = states
    219 
    220         self.m.class_change = NULL
    221 
    222         self.initialized = True
    223223
    224224    def __reduce__(self):
    225225        """
    cdef class GaussianHiddenMarkovModel(Con 
    338338
    339339        return 0
    340340
     341    def fix_emission_state(self, Py_ssize_t i, bint fixed=True):
     342        """
     343        Sets the i-th emission state to be either fixed or not fixed.
     344        If it is fixed, then running the Baum-Welch algorithm will not
     345        change it.
     346       
     347        INPUT:
     348            i -- nonnegative integer < self.m.N
     349            fixed -- bool
     350
     351        EXAMPLES:
     352        We run Baum-Welch once without fixing the emission states:
     353            sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,0])
     354            sage: m.baum_welch([0,1])
     355            sage: m
     356            Gaussian Hidden Markov Model with 2 States
     357            Transition matrix:
     358            [0.0 1.0]
     359            [0.1 0.9]
     360            Emission parameters:
     361            [(0.0, 0.01), (1.0, 0.01)]
     362            Initial probabilities: [1.0, 0.0]
     363
     364        Now we run Baum-Welch with the emission states fixed.  Notice that they don't change.
     365            sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,0])
     366            sage: m.fix_emission_state(0); m.fix_emission_state(1)
     367            sage: m.baum_welch([0,1])
     368            sage: m
     369            Gaussian Hidden Markov Model with 2 States
     370            Transition matrix:
     371            [0.000368587006957    0.999631412993]
     372            [              0.1               0.9]
     373            Emission parameters:
     374            [(0.0, 1.0), (1.0, 1.0)]
     375            Initial probabilities: [1.0, 0.0]
     376        """
     377        if i < 0 or i >= self.m.N:
     378            raise IndexError, "index out of range"
     379        self.m.s[i].e.fixed = fixed
     380
     381    def fix_hidden_state(self, Py_ssize_t i, bint fixed=True):
     382        """
     383        Sets the i-th hidden state to be either fixed or not fixed.
     384        If it is fixed, then running the Baum-Welch algorithm will not
     385        change it.
     386       
     387        INPUT:
     388            i -- nonnegative integer < self.m.N
     389            fixed -- bool
     390
     391        EXAMPLES:
     392        """
     393        if i < 0 or i >= self.m.N:
     394            raise IndexError, "index out of range"
     395        self.m.s[i].fix = fixed
     396
    341397    def __repr__(self):
    342398        """
    343399        Return string representation of this Continuous HMM.
    cdef class GaussianHiddenMarkovModel(Con 
    401457
    402458        OUTPUT:
    403459            list of tuples (mu, sigma) that define Gaussian distributions associated to each state.
     460            Here mu is the mean and sigma the standard deviation.
    404461       
    405462        EXAMPLES:
    406463            sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(1.5,2),(-1,3)], [1,0], 'NAME')
    cdef class GaussianHiddenMarkovModel(Con 
    408465            [(1.5, 2.0), (-1.0, 3.0)]
    409466        """
    410467        cdef Py_ssize_t i       
    411         return [(self.m.s[i].e.mean.val, self.m.s[i].e.variance.val) for i in range(self.m.N)]
     468        return [(self.m.s[i].e.mean.val, sqrt(self.m.s[i].e.variance.val)) for i in range(self.m.N)]
    412469
    413470    def normalize(self):
    414471        """
    cdef class GaussianHiddenMarkovModel(Con 
    626683            Transition matrix:
    627684            [1.0]
    628685            Emission parameters:
    629             [(1.0, 0.0001)]
     686            [(1.0, 0.01)]
    630687            Initial probabilities: [1.0]
    631688
    632689        Training sequences of length 0 are gracefully ignored:
    def unpickle_gaussian_hmm_v0(A, B, pi, n 
    717774        Initial probabilities: [1.0]
    718775    """
    719776    return GaussianHiddenMarkovModel(A,B,pi,name)
     777
     778
     779cdef class GaussianMixtureHiddenMarkovModel(GaussianHiddenMarkovModel):
     780    """
     781    GaussianMixtureHiddenMarkovModel(A, B, pi, name)
     782
     783    INPUT:
     784        A  -- matrix; the transition matrix (n x n)
     785        B  -- list of lists of pairs (w, (mu, sigma)) that define the
     786              Gaussian mixture associated to each state, where w is
     787              the weight, mu is the mean and sigma the standard
     788              deviation.
     789        pi -- list of floats that sums to 1.0; these are
     790              the initial probabilities of each hidden state
     791        name -- (default: None); a string
     792    """
     793    def __init__(self, A, B, pi, name=None):
     794        """
     795        EXAMPLES:
     796        """
     797        # Turn B into a list of lists
     798        B = [flatten(x) for x in B]
     799        m = max([len(x) for x in B])
     800        if m == 0:
     801            raise ValueError, "number of Gaussian mixtures must be positive"
     802        B = [x + [0]*(m-len(x)) for x in B]
     803        GaussianHiddenMarkovModel.__init__(self, A, B, pi)
     804        print m//3
     805        self.m.M = m//3
     806        # Set number of outputs. 
     807
     808    def _initialize_state(self, pi):
     809        # Allocate and initialize states
     810        cdef ghmm_cstate* states = <ghmm_cstate*> safe_malloc(sizeof(ghmm_cstate) * self.m.N)
     811        cdef ghmm_cstate* state
     812        cdef ghmm_c_emission* e
     813        cdef Py_ssize_t i, j, k, M, n
     814
     815        for i in range(self.m.N):
     816            # Parameters of Gaussian distributions
     817            v = self.B[i]
     818            M = len(v)//3   # number of distinct Gaussians
     819
     820            # Get a reference to the i-th state for convenience of the notation below.
     821            state = &(states[i])
     822            state.M     = M
     823            state.pi    = pi[i]
     824            state.desc  = NULL
     825            state.fix   = 0
     826            e           = <ghmm_c_emission*> safe_malloc(sizeof(ghmm_c_emission)*M)
     827            weights     = []
     828           
     829            for n in range(M):
     830                e[n].type      = 0  # Gaussian
     831                e[n].dimension = 1
     832                mu             = v[n*3+1]
     833                sigma          = v[n*3+2]
     834                weights.append(  v[n*3] )
     835                e[n].mean.val     = mu
     836                e[n].variance.val = sigma*sigma  # variance! not standard deviation
     837               
     838                # fixing of emissions is deactivated by default           
     839                e[n].fixed     = 0
     840                e[n].sigmacd   = NULL
     841                e[n].sigmainv  = NULL
     842               
     843            state.e     = e
     844            state.c     = to_double_array(weights)
     845
     846            #########################################################
     847            # Initialize state transition data.
     848            # NOTE: This code is similar to a block of code in hmm.pyx.
     849           
     850            # Set "out" probabilities, i.e., the probabilities to
     851            # transition to another hidden state from this state.
     852            v = self.A[i]
     853            k = self.m.N
     854            state.out_states = k
     855            state.out_id = <int*> safe_malloc(sizeof(int)*k)
     856            state.out_a  = ighmm_cmatrix_alloc(1, k)
     857            for j in range(k):
     858                state.out_id[j] = j
     859                state.out_a[0][j]  = v[j]
     860
     861            # Set "in" probabilities
     862            v = self.A.column(i)
     863            state.in_states = k
     864            state.in_id = <int*> safe_malloc(sizeof(int)*k)
     865            state.in_a  = ighmm_cmatrix_alloc(1, k)
     866            for j in range(k):
     867                state.in_id[j] = j
     868                state.in_a[0][j]  = v[j]
     869
     870            #########################################################               
     871        # Set states
     872        self.m.s = states
     873
     874    def emission_parameters(self):
     875        """
     876        Return the emission parameters list.
     877
     878        OUTPUT:
     879           list of lists of tuples (weight, (mu, sigma))
     880p
     881        EXAMPLES:
     882            sage: m = hmm.GaussianMixtureHiddenMarkovModel([[0.5,0.5],[0.5,0.5]], [[(0.5,(0.0,1.0)), (0.1,(1,10000))],[(1,(1,1))]], [1,0])
     883            sage: m.emission_parameters()
     884            [[(0.5, (0.0, 1.0)), (0.10000000000000001, (1.0, 10000.0))],
     885             [(1.0, (1.0, 1.0)), (0.0, (0.0, 0.0))]]
     886        """
     887        cdef Py_ssize_t i,j
     888       
     889        return [[(self.m.s[i].c[j], (self.m.s[i].e[j].mean.val, sqrt(self.m.s[i].e[j].variance.val)))
     890                 for j in range(self.m.s[i].M)]  for i in range(self.m.N)]
     891
     892