Ticket #3726: sage-3726-part6.patch

File sage-3726-part6.patch, 34.1 KB (added by was, 11 years ago)
  • sage/stats/hmm/all.py

    # HG changeset patch
    # User William Stein <wstein@gmail.com>
    # Date 1217736309 25200
    # Node ID 7bf1791b998d105d792d49376d46b9bef19ab838
    # Parent  0ec7e93289ae6ddd0ec6de054b4748ca25d32355
    trac #3726 -- got all doctest coverage to 100% and generally improved documentation throughout the code.
    
    diff -r 0ec7e93289ae -r 7bf1791b998d sage/stats/hmm/all.py
    a b from hmm import DiscreteHiddenMarkovMod 
     1#############################################################################
     2#       Copyright (C) 2008 William Stein <wstein@gmail.com>
     3#  Distributed under the terms of the GNU General Public License (GPL),
     4#  version 2 or any later version at your option.
     5#  The full text of the GPL is available at:
     6#                  http://www.gnu.org/licenses/
     7#############################################################################
     8
    19from hmm  import DiscreteHiddenMarkovModel
    210from chmm import GaussianHiddenMarkovModel
  • sage/stats/hmm/chmm.pxd

    diff -r 0ec7e93289ae -r 7bf1791b998d sage/stats/hmm/chmm.pxd
    a b  
     1#############################################################################
     2#       Copyright (C) 2008 William Stein <wstein@gmail.com>
     3#  Distributed under the terms of the GNU General Public License (GPL),
     4#  version 2 or any later version at your option.
     5#  The full text of the GPL is available at:
     6#                  http://www.gnu.org/licenses/
     7#############################################################################
    18
    29from hmm cimport HiddenMarkovModel
    310
    cdef extern from "ghmm/smodel.h": 
    193200
    194201
    195202    int ghmm_cmodel_free (ghmm_cmodel ** smo)
    196    
     203
     204    # Normalizes initial and transition probabilities and mixture weights.
     205    # 0 on success / -1 on error
     206    int ghmm_cmodel_normalize(ghmm_cmodel *smo)   
    197207       
    198208cdef class ContinuousHiddenMarkovModel(HiddenMarkovModel):
    199209    cdef ghmm_cmodel* m
  • sage/stats/hmm/chmm.pyx

    diff -r 0ec7e93289ae -r 7bf1791b998d sage/stats/hmm/chmm.pyx
    a b  
     1"""
     2Continuous Hidden Markov Models
    13
     4AUTHORS:
     5    -- William Stein (2008)
     6    -- The authors of GHMM http://ghmm.sourceforge.net/
     7"""
     8
     9#############################################################################
     10#       Copyright (C) 2008 William Stein <wstein@gmail.com>
     11#  Distributed under the terms of the GNU General Public License (GPL),
     12#  version 2 or any later version at your option.
     13#  The full text of the GPL is available at:
     14#                  http://www.gnu.org/licenses/
     15#############################################################################
    216
    317include "../../ext/stdsage.pxi"
    418
    519include "misc.pxi"
    620
    721cdef class ContinuousHiddenMarkovModel(HiddenMarkovModel):
    8     def __init__(self, A, B, pi, name=None):
     22    """
     23    Abstract base class for continuous hidden Markov models.
     24
     25       
     26    INPUT:
     27        A -- square matrix (or list of lists)
     28        B -- list or matrix with numbers of rows equal to number of rows of A;
     29             each row defines the emissions
     30        pi -- list
     31        name -- (default: None); a string
     32
     33    EXAMPLES:
     34        sage: sage.stats.hmm.chmm.ContinuousHiddenMarkovModel([[1.0]], [(-0.0,10.0)], [1], "model")
     35        <sage.stats.hmm.chmm.ContinuousHiddenMarkovModel object at ...>
     36    """
     37    def __init__(self, A, B, pi, name):
     38        """
     39        Constructor for continuous Hidden markov model abstract base class.
     40
     41        EXAMPLES:
     42        This class is an abstract base class, so shouldn't ever be constructed by users.
     43            sage: sage.stats.hmm.chmm.ContinuousHiddenMarkovModel([[1.0]], [(0.0,1.0)], [1], None)
     44            <sage.stats.hmm.chmm.ContinuousHiddenMarkovModel object at ...>
     45        """
    946        self.initialized = False
    1047        HiddenMarkovModel.__init__(self, A, B, pi)
    1148        self.m = <ghmm_cmodel*> safe_malloc(sizeof(ghmm_cmodel))
    1249        # Set number of states
    1350        self.m.N = self.A.nrows()
     51        # Assign model identifier (the name) if specified
     52        if name is not None:
     53            name = str(name)
     54            self.m.name = <char*> safe_malloc(len(name))
     55            strcpy(self.m.name, name)
     56        else:
     57            self.m.name = NULL
     58           
     59
     60    def name(self):
     61        """
     62        Return the name of this model.
     63
     64        OUTPUT:
     65            string or None
     66
     67        EXAMPLES:
     68            sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,0], 'Test Model')
     69            sage: m.name()
     70            'Test Model'
     71
     72        If the model is not explicitly named then this function returns None:
     73            sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,0])
     74            sage: m.name()
     75            sage: m.name() is None
     76            True
     77        """
     78        if self.m.name:
     79            s = str(self.m.name)
     80            return s
     81        else:
     82            return None
     83           
    1484
    1585cdef class GaussianHiddenMarkovModel(ContinuousHiddenMarkovModel):
    16     """
     86    r"""
    1787    Create a Gaussian Hidden Markov Model.  The probability
    1888    distribution associated with each state is a Gaussian
    1989    distribution.
    cdef class GaussianHiddenMarkovModel(Con 
    2696              Gaussian distributions associated to each state
    2797        pi -- list of floats that sums to 1.0; these are
    2898              the initial probabilities of each hidden state
    29         name -- (default: None);
     99        name -- (default: None); a string
    30100
    31101    EXAMPLES:
    32102    Define the transition matrix:
    cdef class GaussianHiddenMarkovModel(Con 
    38108    The initial probabilities per state:
    39109        sage: pi = [1,0,0]
    40110
    41     Create the continuous Gaussian hidden Markov model:
    42         sage: m = hmm.GaussianHiddenMarkovModel(A, B, pi); m
     111    We create the continuous Gaussian hidden Markov model defined by $A,B,\pi$:
     112        sage: hmm.GaussianHiddenMarkovModel(A, B, pi)
     113        Gaussian Hidden Markov Model with 3 States
     114        Transition matrix:
     115        [0.0 1.0 0.0]
     116        [0.5 0.0 0.5]
     117        [0.3 0.3 0.4]
     118        Emission parameters:
     119        [(0.0, 1.0), (-1.0, 0.5), (1.0, 0.20000000000000001)]
     120        Initial probabilities: [1.0, 0.0, 0.0]
    43121    """
    44122    def __init__(self, A, B, pi, name=None):
    45         ContinuousHiddenMarkovModel.__init__(self, A, B, pi)
     123        """
     124        EXAMPLES:
     125        We make a very simple model:
     126            sage: hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1], 'simple')
     127            Gaussian Hidden Markov Model simple with 1 States
     128            Transition matrix:
     129            [1.0]
     130            Emission parameters:
     131            [(0.0, 1.0)]
     132            Initial probabilities: [1.0]
     133
     134        We test a degenerate case:
     135            sage: hmm.GaussianHiddenMarkovModel([], [], [], 'simple')
     136            Gaussian Hidden Markov Model simple with 0 States
     137            Transition matrix:
     138            []
     139            Emission parameters:
     140            []
     141            Initial probabilities: []
     142        """
     143        ContinuousHiddenMarkovModel.__init__(self, A, B, pi, name=name)
    46144
    47145        # Set number of outputs.  This is 1 here because each
    48146        # output is a single Gaussian distribution.
    cdef class GaussianHiddenMarkovModel(Con 
    51149        # Set the model type to continuous
    52150        self.m.model_type = GHMM_kContinuousHMM
    53151
    54         # Assign model identifier if specified
    55         if name is not None:
    56             name = str(name)
    57             self.m.name = name
    58         else:
    59             self.m.name = NULL
    60            
    61152        # 1 transition matrix
    62153        self.m.cos   =  1
    63154        # Set that no a prior model probabilities are set.
    cdef class GaussianHiddenMarkovModel(Con 
    69160        cdef ghmm_cstate* states = <ghmm_cstate*> safe_malloc(sizeof(ghmm_cstate) * self.m.N)
    70161        cdef ghmm_cstate* state
    71162        cdef ghmm_c_emission* e
     163        cdef Py_ssize_t i, j, k
    72164
    73165        for i in range(self.m.N):
    74166            # Parameters of normal distribution
    cdef class GaussianHiddenMarkovModel(Con 
    78170            state.M     = 1
    79171            state.pi    = pi[i]
    80172            state.desc  = NULL
    81             state.out_states = 0
    82             state.in_states = 0
    83173            e = <ghmm_c_emission*> safe_malloc(sizeof(ghmm_c_emission))
    84174            e.type      = 0  # normal
    85175            e.dimension = 1
    cdef class GaussianHiddenMarkovModel(Con 
    91181            e.sigmainv  = NULL
    92182            state.e     = e
    93183            state.c     = to_double_array([1.0])
    94             state.in_a  = ighmm_cmatrix_alloc(1, self.m.N)
    95             state.out_a = ighmm_cmatrix_alloc(1, self.m.N)
     184
     185            #########################################################
     186            # Initialize state transition data.
     187            # NOTE: This code is similar to a block of code in hmm.pyx.
     188           
     189            # Set "out" probabilities, i.e., the probabilities to
     190            # transition to another hidden state from this state.
     191            v = self.A[i]
     192            k = self.m.N
     193            state.out_states = k
     194            state.out_id = <int*> safe_malloc(sizeof(int)*k)
     195            state.out_a  = ighmm_cmatrix_alloc(1, k)
     196            for j in range(k):
     197                state.out_id[j] = j
     198                state.out_a[0][j]  = v[j]
     199
     200            # Set "in" probabilities
     201            v = self.A.column(i)
     202            state.in_states = k
     203            state.in_id = <int*> safe_malloc(sizeof(int)*k)
     204            state.in_a  = ighmm_cmatrix_alloc(1, k)
     205            for j in range(k):
     206                state.in_id[j] = j
     207                state.in_a[0][j]  = v[j]
     208
     209            #########################################################               
     210
    96211
    97212        # Set states
    98213        self.m.s = states
    cdef class GaussianHiddenMarkovModel(Con 
    102217        self.initialized = True
    103218
    104219    def __dealloc__(self):
     220        """
     221        Dealloc the memory used by this Gaussian HMM, but only if the
     222        HMM was successfully initialized.
     223
     224        EXAMPLES: 
     225            sage: m = hmm.GaussianHiddenMarkovModel([[1.0]], [(0.0,1.0)], [1])   # implicit doctest
     226            sage: del m
     227        """
    105228        if self.initialized:
    106229            ghmm_cmodel_free(&self.m)
     230
     231    def __copy__(self):
     232        """
     233        Return a copy of this Gaussian HMM.
     234       
     235        EXAMPLES:
     236            sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,0], 'NAME')
     237            sage: copy(m)
     238            Gaussian Hidden Markov Model NAME with 2 States
     239            Transition matrix:
     240            [0.4 0.6]
     241            [0.1 0.9]
     242            Emission parameters:
     243            [(0.0, 1.0), (1.0, 1.0)]
     244            Initial probabilities: [1.0, 0.0]
     245        """
     246       
     247        return GaussianHiddenMarkovModel(self.transition_matrix(), self.emission_parameters(),
     248                                         self.initial_probabilities(), self.name())
     249
     250    def __cmp__(self, other):
     251        """
     252        Compare two Gaussian HMM's.
     253
     254        INPUT:
     255            self, other -- objects; if other is not a Gaussian HMM compare types.
     256        OUTPUT:
     257            -1,0,1
     258
     259        The transition matrices are compared, then the emission
     260        parameters, and the initial probabilities.  The names are not
     261        compared, so two GHMM's with the same parameters but different
     262        names compare to be equal.
     263
     264        EXAMPLES:
     265            sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,2], "Test 1")
     266            sage: m.__cmp__(m)
     267            0
     268
     269        Note that the name doesn't matter:
     270            sage: n = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,2], "Test 2")
     271            sage: m.__cmp__(n)
     272            0
     273
     274        Normalizing fixes the initial probabilities, hence m and n are no longer equal.
     275            sage: n.normalize()
     276            sage: m.__cmp__(n)
     277            1
     278        """
     279        if not isinstance(other, GaussianHiddenMarkovModel):
     280            return cmp(type(self), type(other))
     281
     282        if self is other: return 0  # easy special case
     283       
     284        cdef GaussianHiddenMarkovModel o = other
     285        if self.m.N < o.m.N:
     286            return -1
     287        elif self.m.N > o.m.N:
     288            return 1
     289        cdef Py_ssize_t i, j
     290
     291        # The code below is somewhat long and tedious because I want it to be
     292        # very fast.  All it does is explicitly loop through the transition
     293        # matrix, emission parameters and initial state probabilities checking
     294        # that they agree, and if not returning -1 or 1.
     295        # Compare transition matrices
     296        for i from 0 <= i < self.m.N:
     297            for j from 0 <= j < self.m.s[i].out_states:
     298                if self.m.s[i].out_a[0][j] < o.m.s[i].out_a[0][j]:
     299                    return -1
     300                elif self.m.s[i].out_a[0][j] > o.m.s[i].out_a[0][j]:
     301                    return 1
     302
     303        # Compare emissions parameters
     304        for i from 0 <= i < self.m.N:
     305            if self.m.s[i].e.mean.val < o.m.s[i].e.mean.val:
     306                return -1
     307            elif self.m.s[i].e.mean.val > o.m.s[i].e.mean.val:
     308                return 1
     309            if self.m.s[i].e.variance.val < o.m.s[i].e.variance.val:
     310                return -1
     311            elif self.m.s[i].e.variance.val > o.m.s[i].e.variance.val:
     312                return 1
     313           
     314        # Compare initial state probabilities
     315        for 0 <= i < self.m.N:
     316            if self.m.s[i].pi < o.m.s[i].pi:
     317                return -1
     318            elif self.m.s[i].pi > o.m.s[i].pi:
     319                return 1
     320
     321        return 0
    107322
    108323    def __repr__(self):
    109324        """
    cdef class GaussianHiddenMarkovModel(Con 
    114329
    115330        EXAMPLES:
    116331            sage: m = hmm.GaussianHiddenMarkovModel([[0.0,1.0,0],[0.5,0.0,0.5],[0.3,0.3,0.4]], [(0.0,1.0), (-1.0,0.5), (1.0,0.2)], [1,0,0])
    117             sage: a.__repr__()
    118             "Discrete Hidden Markov Model (2 states, 2 outputs)\nInitial probabilities: [0.5, 0.5]\nTransition matrix:\n[0.1 0.9]\n[0.1 0.9]\nEmission matrix:\n[0.9 0.1]\n[0.1 0.9]\nEmission symbols: [3/4, 'abc']"
     332            sage: m.__repr__()
     333            'Gaussian Hidden Markov Model with 3 States\nTransition matrix:\n[0.0 1.0 0.0]\n[0.5 0.0 0.5]\n[0.3 0.3 0.4]\nEmission parameters:\n[(0.0, 1.0), (-1.0, 0.5), (1.0, 0.20000000000000001)]\nInitial probabilities: [1.0, 0.0, 0.0]'
    119334        """
    120         s = "Gaussian Hidden Markov Model%s (%s states, %s outputs)"%(
     335        s = "Gaussian Hidden Markov Model%s with %s States"%(
    121336            ' ' + self.m.name if self.m.name else '',
    122             self.m.N, self.m.M)
    123         s += '\nInitial probabilities: %s'%self.initial_probabilities()
     337            self.m.N)
    124338        s += '\nTransition matrix:\n%s'%self.transition_matrix()
    125339        s += '\nEmission parameters:\n%s'%self.emission_parameters()
     340        s += '\nInitial probabilities: %s'%self.initial_probabilities()
    126341        return s
    127342
    128343    def initial_probabilities(self):
    cdef class GaussianHiddenMarkovModel(Con 
    135350        EXAMPLES:
    136351            sage: m = hmm.GaussianHiddenMarkovModel([[0.0,1.0,0],[0.5,0.0,0.5],[0.3,0.3,0.4]], [(0.0,1.0), (-1.0,0.5), (1.0,0.2)], [0.4,0.3,0.3])
    137352            sage: m.initial_probabilities()
    138             [0.4, 0.3, 0.3]
     353            [0.40000000000000002, 0.29999999999999999, 0.29999999999999999]
    139354        """
    140355        cdef Py_ssize_t i
    141356        return [self.m.s[i].pi for i in range(self.m.N)]
    142357   
    143     def transition_matrix(self, list_only=True):
     358    def transition_matrix(self):
    144359        """
    145360        Return the hidden state transition matrix.
     361
     362        OUTPUT:
     363            matrix whose rows give the transition probabilities of the
     364            Hidden Markov Model states.
    146365       
    147366        EXAMPLES:
    148367            sage: m = hmm.GaussianHiddenMarkovModel([[0.0,1.0,0],[0.5,0.0,0.5],[0.3,0.3,0.4]], [(0.0,1.0), (-1.0,0.5), (1.0,0.2)], [1,0,0])
    149368            sage: m.transition_matrix()
    150             [0.9 0.1]
    151             [0.9 0.1]
     369            [0.0 1.0 0.0]
     370            [0.5 0.0 0.5]
     371            [0.3 0.3 0.4]
    152372        """
    153373        cdef Py_ssize_t i, j
     374        # Update the state of the "immutable" matrix A, then return a reference to it.
    154375        for i from 0 <= i < self.m.N:
    155376            for j from 0 <= j < self.m.s[i].out_states:
    156377                self.A.set_unsafe_double(i,j,self.m.s[i].out_a[0][j])
    cdef class GaussianHiddenMarkovModel(Con 
    158379
    159380    def emission_parameters(self):
    160381        """
    161         Return the emission probability matrix.
     382        Return the emission parameters list.
     383
     384        OUTPUT:
     385            list of tuples (mu, sigma) that define Gaussian distributions associated to each state.
    162386       
    163387        EXAMPLES:
    164             sage: m = hmm.GaussianHiddenMarkovModel([[0.0,1.0,0],[0.5,0.0,0.5],[0.3,0.3,0.4]], [(0.0,1.0), (-1.0,0.5), (1.0,0.2)], [0.1,0.4,0.5])
     388            sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(1.5,2),(-1,3)], [1,0], 'NAME')
    165389            sage: m.emission_parameters()
    166             [(0.0, 1.0), (-1.0, 0.5), (1.0, 0.20000...)]
     390            [(1.5, 2.0), (-1.0, 3.0)]
    167391        """
    168         cdef Py_ssize_t i, j
    169         v = []
    170         for i from 0 <= i < self.m.N:
    171             v.append((self.m.s[i].e.mean.val, self.m.s[i].e.variance.val))
    172         return v
     392        cdef Py_ssize_t i       
     393        return [(self.m.s[i].e.mean.val, self.m.s[i].e.variance.val) for i in range(self.m.N)]
     394
     395    def normalize(self):
     396        """
     397        Normalize the transition and emission probabilities, if
     398        applicable.  This changes self in place.
     399
     400        EXAMPLES:
     401            sage: m = hmm.GaussianHiddenMarkovModel([[1.0,1.2],[0,0.1]], [(0.0,1.0),(1,1)], [1,2])
     402            sage: m.normalize()
     403            sage: m
     404            Gaussian Hidden Markov Model with 2 States
     405            Transition matrix:
     406            [0.454545454545 0.545454545455]
     407            [           0.0            1.0]
     408            Emission parameters:
     409            [(0.0, 1.0), (1.0, 1.0)]
     410            Initial probabilities: [0.33333333333333331, 0.66666666666666663]
     411        """
     412        if ghmm_cmodel_normalize(self.m):
     413            raise RuntimeError, "error normalizing model (note that model may still have been partly changed)"
  • sage/stats/hmm/hmm.pxd

    diff -r 0ec7e93289ae -r 7bf1791b998d sage/stats/hmm/hmm.pxd
    a b from sage.matrix.matrix_real_double_dens 
     1#############################################################################
     2#       Copyright (C) 2008 William Stein <wstein@gmail.com>
     3#  Distributed under the terms of the GNU General Public License (GPL),
     4#  version 2 or any later version at your option.
     5#  The full text of the GPL is available at:
     6#                  http://www.gnu.org/licenses/
     7#############################################################################
     8
    19from sage.matrix.matrix_real_double_dense cimport Matrix_real_double_dense
    210
    311cdef extern from "ghmm/ghmm.h":
  • sage/stats/hmm/hmm.pyx

    diff -r 0ec7e93289ae -r 7bf1791b998d sage/stats/hmm/hmm.pyx
    a b TODO: 
    1111   * continuous hmm's
    1212"""
    1313
     14#############################################################################
     15#       Copyright (C) 2008 William Stein <wstein@gmail.com>
     16#  Distributed under the terms of the GNU General Public License (GPL),
     17#  version 2 or any later version at your option.
     18#  The full text of the GPL is available at:
     19#                  http://www.gnu.org/licenses/
     20#############################################################################
     21
    1422import math
    1523
    1624from sage.matrix.all import is_Matrix, matrix
    include "misc.pxi" 
    2230include "misc.pxi"
    2331
    2432cdef class HiddenMarkovModel:
     33    """
     34    Abstract base class for all Hidden Markov Models.
     35
     36    INPUT:
     37        A -- matrix or list
     38        B -- matrix or list
     39        pi -- list of floats
     40
     41    EXAMPLES:
     42    One shouldn't directly call this constructor since this class is an abstract
     43    base class. 
     44        sage: sage.stats.hmm.hmm.HiddenMarkovModel([[0.4,0.6],[1,0]], [[1,0],[0.5,0.5]], [0.5,0.5])
     45        <sage.stats.hmm.hmm.HiddenMarkovModel object at ...>
     46    """
    2547    def __init__(self, A, B, pi):
     48        """
     49        INPUT:
     50            A -- matrix or list
     51            B -- matrix or list
     52            pi -- list of floats
     53
     54        EXAMPLES:
     55            sage: hmm.DiscreteHiddenMarkovModel([[1]], [[0.0,1.0]], [1])
     56            Discrete Hidden Markov Model with 1 States and 2 Emissions
     57            Transition matrix:
     58            [1.0]
     59            Emission matrix:
     60            [0.0 1.0]
     61            Initial probabilities: [1.0]
     62        """
     63        # Convert A to a matrix
    2664        if not is_Matrix(A):
    27             A = matrix(RDF, len(A), len(A[0]), A)
     65            n = len(A)
     66            A = matrix(RDF, n, len(A[0]) if n > 0 else 0, A)
     67
     68        # Convert B to a matrix           
    2869        if not is_Matrix(B):
    29             B = matrix(RDF, len(B), len(B[0]), B)
     70            n = len(B)
     71            B = matrix(RDF, n, len(B[0]) if n > 0 else 0, B)
     72
     73        # Some consistency checks
    3074        if not A.is_square():
     75            print A.parent()
    3176            raise ValueError, "A must be square"
    3277        if A.nrows() != B.nrows():
    3378            raise ValueError, "number of rows of A and B must be the same"
     79
     80        # Make sure A and B are over RDF.
    3481        if A.base_ring() != RDF:
    3582            A = A.change_ring(RDF)
    3683        if B.base_ring() != RDF:
    3784            B = B.change_ring(RDF)
    3885
     86        # Make sure the initial probabilities are all floats.
    3987        self.pi = [float(x) for x in pi]
    4088        if len(self.pi) != A.nrows():
    4189            raise ValueError, "length of pi must equal number of rows of A"
    4290
     91        # Record the now validated matrices A and B as attributes.
     92        # They get used later as attributes in the constructors for
     93        # derived classes.
    4394        self.A = A
    4495        self.B = B
    4596
    cdef class DiscreteHiddenMarkovModel(Hid 
    56107            name -- (optional) name of the model
    57108
    58109        EXAMPLES:
    59         We create a discrete HMM with 2 internal states on an alphabet of
    60         size 2.
    61        
     110        We create a discrete HMM with 2 internal states on an alphabet of size 2.
    62111            sage: a = hmm.DiscreteHiddenMarkovModel([[0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1])
    63            
    64112        """
    65         self.initialized = False
     113        # memory has not all been setup yet.
     114        self.initialized = False 
     115
     116        # This sets self.A, self.B and pi after doing appropriate coercions, etc.
    66117        HiddenMarkovModel.__init__(self, A, B, pi)
    67118
    68         cdef Py_ssize_t i, j, k
    69119        self.set_emission_symbols(emission_symbols)
    70120
    71121        self.m = <ghmm_dmodel*> safe_malloc(sizeof(ghmm_dmodel))
    cdef class DiscreteHiddenMarkovModel(Hid 
    101151        silent_states = []
    102152        tmp_order     = []
    103153       
    104         for i in range(self.m.N):
     154        cdef Py_ssize_t i, j, k
     155       
     156        for i from 0 <= i < self.m.N:
    105157            v = self.B[i]
    106158
    107159            # Get a reference to the i-th state for convenience of the notation below.
    cdef class DiscreteHiddenMarkovModel(Hid 
    127179
    128180            silent_states.append( 1 if sum(v) == 0 else 0)
    129181
    130             # Set out probabilities, i.e., the probabilities that each
    131             # symbol will be emitted from this state.
     182            # Set "out" probabilities, i.e., the probabilities to
     183            # transition to another hidden state from this state.
    132184            v = self.A[i]
    133             nz = v.nonzero_positions()
    134             k = len(nz)
     185            k = self.m.N
    135186            state.out_states = k
    136187            state.out_id = <int*> safe_malloc(sizeof(int)*k)
    137188            state.out_a  = <double*> safe_malloc(sizeof(double)*k)
    138             for j in range(k):
    139                 state.out_id[j] = nz[j]
    140                 state.out_a[j]  = v[nz[j]]
     189            for j from 0 <= j < k:
     190                state.out_id[j] = j
     191                state.out_a[j]  = v[j]
    141192
    142193            # Set "in" probabilities
    143194            v = self.A.column(i)
    144             nz = v.nonzero_positions()
    145             k = len(nz)
    146195            state.in_states = k
    147196            state.in_id = <int*> safe_malloc(sizeof(int)*k)
    148197            state.in_a  = <double*> safe_malloc(sizeof(double)*k)
    149             for j in range(k):
    150                 state.in_id[j] = nz[j]
    151                 state.in_a[j]  = v[nz[j]]
     198            for j from 0 <= j < k:
     199                state.in_id[j] = j
     200                state.in_a[j]  = v[j]
    152201               
    153202            state.fix = 0
    154203               
    155204        self.m.s = states
    156205        self.initialized = True
    157206
    158 ##         if sum(silent_states) > 0:
    159 ##             self.m.model_type |= GHMM_kSilentStates
    160 ##             self.m.silent = to_int_array(silent_states)
    161 ##         self.m.maxorder = max(tmp_order)
    162 ##         if self.m.maxorder > 0:
    163 ##             self.m.model_type |= GHMM_kHigherOrderEmissions
    164 ##             self.m.order = to_int_array(tmp_order)
    165 ##         # Initialize lookup table for powers of the alphabet size,
    166 ##         # which speeds up models with higher order states.
    167 ##         powLookUp = [1] * (self.m.maxorder+2)
    168 ##         for i in range(1,len(powLookUp)):
    169 ##             powLookUp[i] = powLookUp[i-1] * self.m.M
    170 ##         self.m.pow_lookup = to_int_array(powLookUp)
    171 ##         self.initialized = True
     207    def __cmp__(self, other):
     208        """
     209        Compare two Discrete HMM's.
     210
     211        INPUT:
     212            self, other -- objects; if other is not a Discrete HMM compare types.
     213        OUTPUT:
     214            -1,0,1
     215
     216        The transition matrices are compared, then the emission
     217        parameters, and the initial probabilities.  The names are not
     218        compared, so two GHMM's with the same parameters but different
     219        names compare to be equal.
     220
     221        EXAMPLES:
     222            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,2])
     223            sage: m.__cmp__(m)
     224            0
     225
     226        Note that the name doesn't matter:
     227            sage: n = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,2])
     228            sage: m.__cmp__(n)
     229            0
     230
     231        Normalizing fixes the initial probabilities, hence m and n are no longer equal.
     232            sage: n.normalize()
     233            sage: m.__cmp__(n)
     234            1
     235        """
     236        if not isinstance(other, DiscreteHiddenMarkovModel):
     237            return cmp(type(self), type(other))
     238
     239        if self is other: return 0  # easy special case
    172240       
     241        cdef DiscreteHiddenMarkovModel o = other
     242        if self.m.N < o.m.N:
     243            return -1
     244        elif self.m.N > o.m.N:
     245            return 1
     246        cdef Py_ssize_t i, j
     247
     248        # This code is similar to code in chmm.pyx, but with several small differences.
     249       
     250        # Compare transition matrices
     251        for i from 0 <= i < self.m.N:
     252            for j from 0 <= j < self.m.s[i].out_states:
     253                if self.m.s[i].out_a[j] < o.m.s[i].out_a[j]:
     254                    return -1
     255                elif self.m.s[i].out_a[j] > o.m.s[i].out_a[j]:
     256                    return 1
     257
     258        # Compare emission matrices
     259        for i from 0 <= i < self.m.N:
     260            for j from 0 <= j < self.B._ncols:
     261                if self.m.s[i].b[j] < o.m.s[i].b[j]:
     262                    return -1
     263                elif self.m.s[i].b[j] > o.m.s[i].b[j]:
     264                    return 1
     265               
     266        # Compare initial state probabilities
     267        for 0 <= i < self.m.N:
     268            if self.m.s[i].pi < o.m.s[i].pi:
     269                return -1
     270            elif self.m.s[i].pi > o.m.s[i].pi:
     271                return 1
     272
     273        return 0
     274
    173275    def __dealloc__(self):
     276        """
     277        Deallocate memory allocated by the HMM.
     278
     279        EXAMPLES:
     280            sage: a = hmm.DiscreteHiddenMarkovModel([[0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1])  # indirect doctest
     281            sage: del a
     282        """
    174283        if self.initialized:
    175284            ghmm_dmodel_free(&self.m)
    176285
    cdef class DiscreteHiddenMarkovModel(Hid 
    184293        EXAMPLES:
    185294            sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[0.9,0.1],[0.1,0.9]], [0.5,0.5], [3/4, 'abc'])
    186295            sage: a.__repr__()
    187             "Discrete Hidden Markov Model (2 states, 2 outputs)\nInitial probabilities: [0.5, 0.5]\nTransition matrix:\n[0.1 0.9]\n[0.1 0.9]\nEmission matrix:\n[0.9 0.1]\n[0.1 0.9]\nEmission symbols: [3/4, 'abc']"
     296            "Discrete Hidden Markov Model with 2 States and 2 Emissions\nTransition matrix:\n[0.1 0.9]\n[0.1 0.9]\nEmission matrix:\n[0.9 0.1]\n[0.1 0.9]\nInitial probabilities: [0.5, 0.5]\nEmission symbols: [3/4, 'abc']"
    188297        """
    189         s = "Discrete Hidden Markov Model%s (%s states, %s outputs)"%(
     298        s = "Discrete Hidden Markov Model%s with %s States and %s Emissions"%(
    190299            ' ' + self.m.name if self.m.name else '',
    191300            self.m.N, self.m.M)
    192         s += '\nInitial probabilities: %s'%self.initial_probabilities()
    193301        s += '\nTransition matrix:\n%s'%self.transition_matrix()
    194302        s += '\nEmission matrix:\n%s'%self.emission_matrix()
     303        s += '\nInitial probabilities: %s'%self.initial_probabilities()
    195304        if self._emission_symbols_dict:
    196305            s += '\nEmission symbols: %s'%self._emission_symbols
    197306        return s
    cdef class DiscreteHiddenMarkovModel(Hid 
    251360            sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,1],[1.2,0.9]], [[1,0.5],[0.5,1]], [0.1,1.2])
    252361            sage: a.normalize()
    253362            sage: a
    254             Discrete Hidden Markov Model (2 states, 2 outputs)
    255             Initial probabilities: [0.076923076923076927, 0.92307692307692302]
     363            Discrete Hidden Markov Model with 2 States and 2 Emissions
    256364            Transition matrix:
    257365            [0.333333333333 0.666666666667]
    258366            [0.571428571429 0.428571428571]
    259367            Emission matrix:
    260368            [0.666666666667 0.333333333333]
    261369            [0.333333333333 0.666666666667]
     370            Initial probabilities: [0.076923076923076927, 0.92307692307692302]
    262371        """
    263372        ghmm_dmodel_normalize(self.m)
    264373
    cdef class DiscreteHiddenMarkovModel(Hid 
    491600        Now the model's emission matrix changes since it is much
    492601        more likely to emit 1 than 0. 
    493602            sage: a
    494             Discrete Hidden Markov Model (2 states, 2 outputs)
    495             Initial probabilities: [0.5, 0.5]
     603            Discrete Hidden Markov Model with 2 States and 2 Emissions
    496604            Transition matrix:
    497605            [0.5 0.5]
    498606            [0.5 0.5]
    499607            Emission matrix:
    500608            [0.166666666667 0.833333333333]
    501609            [0.166666666667 0.833333333333]
     610            Initial probabilities: [0.5, 0.5]
    502611
    503612        Note that 1/6 = 1.666...:
    504613            sage: 1.0/6
    cdef class DiscreteHiddenMarkovModel(Hid 
    509618            sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.5,0.5]], [[0.5,0.5],[0.5,0.5]], [0.5,0.5], ['up','down'])
    510619            sage: a.baum_welch([['up','up'], ['down','up']])
    511620            sage: a
    512             Discrete Hidden Markov Model (2 states, 2 outputs)
    513             Initial probabilities: [0.5, 0.5]
     621            Discrete Hidden Markov Model with 2 States and 2 Emissions
    514622            Transition matrix:
    515623            [0.5 0.5]
    516624            [0.5 0.5]
    517625            Emission matrix:
    518626            [0.75 0.25]
    519627            [0.75 0.25]
    520             Emission symbols: ['up', 'down']       
     628            Initial probabilities: [0.5, 0.5]
     629            Emission symbols: ['up', 'down']
    521630
    522631        NOTE: Training for models including silent states is not yet supported.
    523632
    cdef class DiscreteHiddenMarkovModel(Hid 
    544653##################################################################################
    545654
    546655cdef ghmm_dseq* malloc_ghmm_dseq(seqs) except NULL:
     656    """
     657    Allocate a discrete sequence of samples.
     658
     659    INPUT:
     660        seqs -- a list of sequences
     661
     662    OUTPUT:
     663        C pointer to ghmm_dseq
     664    """
    547665    cdef ghmm_dseq* d = ghmm_dseq_calloc(len(seqs))
    548666    if d == NULL:
    549667        raise MemoryError
  • sage/stats/hmm/misc.pxi

    diff -r 0ec7e93289ae -r 7bf1791b998d sage/stats/hmm/misc.pxi
    a b  
     1#############################################################################
     2#       Copyright (C) 2008 William Stein <wstein@gmail.com>
     3#  Distributed under the terms of the GNU General Public License (GPL),
     4#  version 2 or any later version at your option.
     5#  The full text of the GPL is available at:
     6#                  http://www.gnu.org/licenses/
     7#############################################################################
     8
     9cdef extern from "string.h":
     10    char *strcpy(char *s1, char *s2)
    111
    212cdef double* to_double_array(v) except NULL:
     13    """
     14    Transform a Python list of floats to a C array of doubles.  The caller is
     15    responsible for deallocating the resulting memory.
     16   
     17    INPUT:
     18        v -- a list of objects coercible to floats
     19    OUTPUT:
     20        a newly allocated C array of doubles
     21    """
    322    cdef double x
    423    cdef double* w = <double*> safe_malloc(sizeof(double)*len(v))
    524    cdef Py_ssize_t i = 0
    cdef double* to_double_array(v) except N 
    928    return w
    1029
    1130cdef int* to_int_array(v) except NULL:
     31    """
     32    Transform a Python list of ints to a C array of ints.  The caller is
     33    responsible for deallocating the resulting memory.
     34   
     35    INPUT:
     36        v -- a list of objects coercible to ints
     37    OUTPUT:
     38        a newly allocated C array of ints
     39    """
    1240    cdef int x
    1341    cdef int* w = <int*> safe_malloc(sizeof(int)*len(v))
    1442    cdef Py_ssize_t i = 0
    cdef void* safe_malloc(int bytes) except 
    2149    """
    2250    malloc the given bytes of memory and check that the malloc
    2351    succeeds -- if not raise a MemoryError.
     52
     53    INPUT:
     54        bytes -- a nonnegatie integer
     55       
     56    OUTPUT:
     57        void pointer or raise a MemoryError.
    2458    """
    2559    cdef void* t = sage_malloc(bytes)
    2660    if not t: