Ticket #3773: sage-3773-part3.patch

File sage-3773-part3.patch, 14.0 KB (added by was, 13 years ago)
  • sage/stats/hmm/chmm.pyx

    # HG changeset patch
    # User William Stein <wstein@gmail.com>
    # Date 1218120013 25200
    # Node ID 61bf3243f0aca6aa5e1f3d9d59d701e59a3cb468
    # Parent  83635164ac474bbfc37e21e843bc62b41216d0e1
    trac #3773 -- part 3 -- Add fixing of emissions for all models; get doctest coverage back to 100%; fix more bugs by doing so.
    
    diff -r 83635164ac47 -r 61bf3243f0ac sage/stats/hmm/chmm.pyx
    a b cdef class GaussianHiddenMarkovModel(Con 
    128128        [(0.0, 1.0), (-1.0, 0.5), (1.0, 0.20000000000000001)]
    129129        Initial probabilities: [1.0, 0.0, 0.0]
    130130    """
    131     def __init__(self, A, B, pi, name=None):
     131    def __init__(self, A, B, pi=None, name=None):
    132132        """
    133133        EXAMPLES:
    134134        We make a very simple model:
    cdef class GaussianHiddenMarkovModel(Con 
    166166        self.initialized = True
    167167
    168168    def _initialize_state(self, pi):
    169         # Allocate and initialize states
     169        """
     170        Allocate and initialize states.
     171
     172        INPUT:
     173            pi -- initial probabilities
     174
     175        All other inputs are set as self.A and self.B.
     176
     177        EXAMPLES:
     178        This function is called implicitly during object creation.  It should
     179        never be called directly by the user, unless they want to LEAKE MEMORY.
     180
     181            sage: m = hmm.GaussianHiddenMarkovModel([[1]], [(1,10)], [1]) # indirect test
     182        """
    170183        cdef ghmm_cstate* states = <ghmm_cstate*> safe_malloc(sizeof(ghmm_cstate) * self.m.N)
    171184        cdef ghmm_cstate* state
    172185        cdef ghmm_c_emission* e
    cdef class GaussianHiddenMarkovModel(Con 
    338351
    339352        return 0
    340353
    341     def fix_emission_state(self, Py_ssize_t i, bint fixed=True):
     354    def fix_emissions(self, Py_ssize_t i, bint fixed=True):
    342355        """
    343         Sets the i-th emission state to be either fixed or not fixed.
    344         If it is fixed, then running the Baum-Welch algorithm will not
    345         change it.
     356        Sets the Gaussian emission parameters for the i-th state to be
     357        either fixed or not fixed.  If it is fixed, then running the
     358        Baum-Welch algorithm will not change the emission parameters
     359        for the i-th state.
    346360       
    347361        INPUT:
    348362            i -- nonnegative integer < self.m.N
    cdef class GaussianHiddenMarkovModel(Con 
    363377
    364378        Now we run Baum-Welch with the emission states fixed.  Notice that they don't change.
    365379            sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,0])
    366             sage: m.fix_emission_state(0); m.fix_emission_state(1)
     380            sage: m.fix_emissions(0); m.fix_emissions(1)
    367381            sage: m.baum_welch([0,1])
    368382            sage: m
    369383            Gaussian Hidden Markov Model with 2 States
    cdef class GaussianHiddenMarkovModel(Con 
    377391        if i < 0 or i >= self.m.N:
    378392            raise IndexError, "index out of range"
    379393        self.m.s[i].e.fixed = fixed
    380 
    381     def fix_hidden_state(self, Py_ssize_t i, bint fixed=True):
    382         """
    383         Sets the i-th hidden state to be either fixed or not fixed.
    384         If it is fixed, then running the Baum-Welch algorithm will not
    385         change it.
    386        
    387         INPUT:
    388             i -- nonnegative integer < self.m.N
    389             fixed -- bool
    390 
    391         EXAMPLES:
    392         """
    393         if i < 0 or i >= self.m.N:
    394             raise IndexError, "index out of range"
    395         self.m.s[i].fix = fixed
    396394
    397395    def __repr__(self):
    398396        """
    cdef class GaussianMixtureHiddenMarkovMo 
    789787        pi -- list of floats that sums to 1.0; these are
    790788              the initial probabilities of each hidden state
    791789        name -- (default: None); a string
     790
     791    EXAMPLES:
     792        sage: A  = [[0.5,0.5],[0.5,0.5]]
     793        sage: B  = [[(0.5,(0.0,1.0)), (0.1,(1,10000))],[(1,(1,1)), (0,(0,0.1))]]
     794        sage: pi = [1,0]
     795        sage: hmm.GaussianMixtureHiddenMarkovModel(A, B, pi)
     796        Gaussian Hidden Markov Model with 2 States
     797        Transition matrix:
     798        [0.5 0.5]
     799        [0.5 0.5]
     800        Emission parameters:
     801        [[(0.5, (0.0, 1.0)), (0.10000000000000001, (1.0, 10000.0))], [(1.0, (1.0, 1.0)), (0.0, (0.0, 0.10000000000000001))]]
     802        Initial probabilities: [1.0, 0.0]
     803
     804    TESTS:
     805    We test that standard deviations must be positive:
     806        sage: m = hmm.GaussianMixtureHiddenMarkovModel([[1]], [[(1,(0,0))]], [1])
     807        Traceback (most recent call last):
     808        ...
     809        ValueError: sigma must be positive (if weight is nonzero)
     810
     811    We test that number of mixtures must be positive:
     812        sage: m = hmm.GaussianMixtureHiddenMarkovModel([[1]], [[]], [1])
     813        Traceback (most recent call last):
     814        ...
     815        ValueError: number of Gaussian mixtures must be positive
    792816    """
    793     def __init__(self, A, B, pi, name=None):
     817    def __init__(self, A, B, pi=None, name=None):
    794818        """
    795819        EXAMPLES:
     820            sage: hmm.GaussianMixtureHiddenMarkovModel([[1]], [[(1,(0,1))]], [1], name='simple')
     821            Gaussian Hidden Markov Model simple with 1 States
     822            Transition matrix:
     823            [1.0]
     824            Emission parameters:
     825            [[(1.0, (0.0, 1.0))]]
     826            Initial probabilities: [1.0]
    796827        """
    797828        # Turn B into a list of lists
    798829        B = [flatten(x) for x in B]
    cdef class GaussianMixtureHiddenMarkovMo 
    800831        if m == 0:
    801832            raise ValueError, "number of Gaussian mixtures must be positive"
    802833        B = [x + [0]*(m-len(x)) for x in B]
    803         GaussianHiddenMarkovModel.__init__(self, A, B, pi)
    804         print m//3
     834        GaussianHiddenMarkovModel.__init__(self, A, B, pi, name=name)
    805835        self.m.M = m//3
    806836        # Set number of outputs. 
    807837
    808838    def _initialize_state(self, pi):
    809         # Allocate and initialize states
     839        """
     840        Allocate and initialize states.
     841
     842        INPUT:
     843            pi -- initial probabilities
     844
     845        All other inputs are set as self.A and self.B.
     846
     847        EXAMPLES:
     848        This function is called implicitly during object creation.  It should
     849        never be called directly by the user, unless they want to LEAKE MEMORY.
     850
     851            sage: m = hmm.GaussianMixtureHiddenMarkovModel([[1]], [[(1,(1,10))]], [1]) # indirect test
     852        """
    810853        cdef ghmm_cstate* states = <ghmm_cstate*> safe_malloc(sizeof(ghmm_cstate) * self.m.N)
    811854        cdef ghmm_cstate* state
    812855        cdef ghmm_c_emission* e
    cdef class GaussianMixtureHiddenMarkovMo 
    831874                e[n].dimension = 1
    832875                mu             = v[n*3+1]
    833876                sigma          = v[n*3+2]
     877                if sigma <= 0 and v[n*3]:
     878                    raise ValueError, "sigma must be positive (if weight is nonzero)"
    834879                weights.append(  v[n*3] )
    835880                e[n].mean.val     = mu
    836881                e[n].variance.val = sigma*sigma  # variance! not standard deviation
    p 
    890935                 for j in range(self.m.s[i].M)]  for i in range(self.m.N)]
    891936
    892937       
     938
     939    def fix_emissions(self, Py_ssize_t i, Py_ssize_t j, bint fixed=True):
     940        """
     941        Sets the j-th Gaussian of the emission parameters for the i-th
     942        state to be either fixed or not fixed.  If it is fixed, then
     943        running the Baum-Welch algorithm will not change the emission
     944        parameters for the i-th state.
     945       
     946        INPUT:
     947            i -- nonnegative integer < self.m.N
     948            j -- nonnegative integer < self.m.M
     949            fixed -- bool
     950
     951        EXAMPLES:
     952        We run Baum-Welch once without fixing the emission states:
     953            sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,0])
     954            sage: m.baum_welch([0,1])
     955            sage: m
     956            Gaussian Hidden Markov Model with 2 States
     957            Transition matrix:
     958            [0.0 1.0]
     959            [0.1 0.9]
     960            Emission parameters:
     961            [(0.0, 0.01), (1.0, 0.01)]
     962            Initial probabilities: [1.0, 0.0]
     963
     964        Now we run Baum-Welch with the emission states fixed.  Notice that they don't change.
     965            sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,0])
     966            sage: m.fix_emissions(0); m.fix_emissions(1)
     967            sage: m.baum_welch([0,1])
     968            sage: m
     969            Gaussian Hidden Markov Model with 2 States
     970            Transition matrix:
     971            [0.000368587006957    0.999631412993]
     972            [              0.1               0.9]
     973            Emission parameters:
     974            [(0.0, 1.0), (1.0, 1.0)]
     975            Initial probabilities: [1.0, 0.0]
     976        """
     977        if i < 0 or i >= self.m.N:
     978            raise IndexError, "index i out of range"
     979        if j < 0 or j >= self.m.M:
     980            raise IndexError, "index j out of range"
     981        self.m.s[i].e[j].fixed = fixed
  • sage/stats/hmm/hmm.pyx

    diff -r 83635164ac47 -r 61bf3243f0ac sage/stats/hmm/hmm.pyx
    a b Hidden Markov Models 
    22Hidden Markov Models
    33
    44AUTHOR: William Stein
    5 
    6 EXAMPLES:
    7 
    8 TODO:
    9    * make models pickleable (i.e., all parameters should be obtainable
    10      using functions to make this easy).
    11    * continuous hmm's
    125"""
    136
    147#############################################################################
    cdef class HiddenMarkovModel: 
    4437        sage: sage.stats.hmm.hmm.HiddenMarkovModel([[0.4,0.6],[1,0]], [[1,0],[0.5,0.5]], [0.5,0.5])
    4538        <sage.stats.hmm.hmm.HiddenMarkovModel object at ...>
    4639    """
    47     def __init__(self, A, B, pi):
     40    def __init__(self, A, B, pi=None):
    4841        """
    4942        INPUT:
    5043            A -- matrix or list
    cdef class HiddenMarkovModel: 
    8477            B = B.change_ring(RDF)
    8578
    8679        # Make sure the initial probabilities are all floats.
    87         self.pi = [float(x) for x in pi]
    88         if len(self.pi) != A.nrows():
    89             raise ValueError, "length of pi must equal number of rows of A"
     80        if pi is None:
     81            if A.nrows() == 0:
     82                self.pi = []
     83            else:
     84                self.pi = [1.0/A.nrows()]*A.nrows()
     85        else:
     86            self.pi = [float(x) for x in pi]
     87            if len(self.pi) != A.nrows():
     88                raise ValueError, "length of pi must equal number of rows of A"
    9089
    9190        # Record the now validated matrices A and B as attributes.
    9291        # They get used later as attributes in the constructors for
    cdef class HiddenMarkovModel: 
    9796
    9897cdef class DiscreteHiddenMarkovModel(HiddenMarkovModel):
    9998   
    100     def __init__(self, A, B, pi, emission_symbols=None, name=None):
     99    def __init__(self, A, B, pi=None, emission_symbols=None, name=None):
    101100        """
    102101        INPUTS:
    103102            A  -- square matrix of doubles; the state change probabilities
    cdef class DiscreteHiddenMarkovModel(Hid 
    292291        Deallocate memory allocated by the HMM.
    293292
    294293        EXAMPLES:
    295             sage: a = hmm.DiscreteHiddenMarkovModel([[0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1])  # indirect doctest
    296             sage: del a
     294            sage: m = hmm.DiscreteHiddenMarkovModel([[0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1])  # indirect doctest
     295            sage: del m
    297296        """
    298297        if self.initialized:
    299298            ghmm_dmodel_free(&self.m)
     299
     300    def fix_emissions(self, Py_ssize_t i, bint fixed=True):
     301        """
     302        Sets the i-th emission parameters to be either fixed or not
     303        fixed.  If it is fixed, then running the Baum-Welch algorithm
     304        will not change the emission parameters for the i-th state.
     305       
     306        INPUT:
     307            i -- nonnegative integer < self.m.N
     308            fixed -- bool
     309
     310        EXAMPLES:
     311        First without calling fix_emissions:
     312            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]],[[.5,.5],[.5,.5]])
     313            sage: m.baum_welch([0,0,0,1,1,1])
     314            sage: m.emission_matrix()
     315            [              1.0               0.0]
     316            [3.92881039079e-05    0.999960711896]
     317
     318        We call fix_emissions on the first state and notice that the first
     319        row of the emission matrix does not change:
     320            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]],[[.5,.5],[.5,.5]])
     321            sage: m.fix_emissions(0)
     322            sage: m.baum_welch([0,0,0,1,1,1])
     323            sage: m.emission_matrix()
     324            [              0.5               0.5]
     325            [0.000542712675606    0.999457287324]
     326
     327        We call fix_emissions on the second state and notice that the second
     328        row of the emission matrix does not change:
     329            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]],[[.5,.5],[.5,.5]])
     330            sage: m.fix_emissions(1)
     331            sage: m.baum_welch([0,0,0,1,1,1])
     332            sage: m.emission_matrix()
     333            [   0.999999904763 9.52366620142e-08]
     334            [              0.5               0.5]
     335
     336        TESTS:
     337        Make sure that out of range indices are handled correctly with an IndexError.
     338            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]],[[.5,.5],[.5,.5]])
     339            sage: m.fix_emissions(2)
     340            Traceback (most recent call last):
     341            ...
     342            IndexError: index out of range
     343            sage: m.fix_emissions(-1)
     344            Traceback (most recent call last):
     345            ...
     346            IndexError: index out of range
     347        """
     348        if i < 0 or i >= self.m.N:
     349            raise IndexError, "index out of range"
     350        self.m.s[i].fix = fixed
    300351
    301352    def __repr__(self):
    302353        """
    cdef class DiscreteHiddenMarkovModel(Hid 
    574625                    Viterbi path.
    575626            float -- log of the probability that the sequence of hidden
    576627                     states actually produced the given sequence seq.
    577                      [[TODO: I do not understand precisely what this means.]]
    578628
    579629        EXAMPLES:
    580630            sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[0.9,0.1],[0.1,0.9]], [0.5,0.5])