Ticket #3726: sage-3726-part6.patch
File sage-3726-part6.patch, 34.1 KB (added by , 12 years ago) |
---|
-
sage/stats/hmm/all.py
# HG changeset patch # User William Stein <wstein@gmail.com> # Date 1217736309 25200 # Node ID 7bf1791b998d105d792d49376d46b9bef19ab838 # Parent 0ec7e93289ae6ddd0ec6de054b4748ca25d32355 trac #3726 -- got all doctest coverage to 100% and generally improved documentation throughout the code. diff -r 0ec7e93289ae -r 7bf1791b998d sage/stats/hmm/all.py
a b from hmm import DiscreteHiddenMarkovMod 1 ############################################################################# 2 # Copyright (C) 2008 William Stein <wstein@gmail.com> 3 # Distributed under the terms of the GNU General Public License (GPL), 4 # version 2 or any later version at your option. 5 # The full text of the GPL is available at: 6 # http://www.gnu.org/licenses/ 7 ############################################################################# 8 1 9 from hmm import DiscreteHiddenMarkovModel 2 10 from chmm import GaussianHiddenMarkovModel -
sage/stats/hmm/chmm.pxd
diff -r 0ec7e93289ae -r 7bf1791b998d sage/stats/hmm/chmm.pxd
a b 1 ############################################################################# 2 # Copyright (C) 2008 William Stein <wstein@gmail.com> 3 # Distributed under the terms of the GNU General Public License (GPL), 4 # version 2 or any later version at your option. 5 # The full text of the GPL is available at: 6 # http://www.gnu.org/licenses/ 7 ############################################################################# 1 8 2 9 from hmm cimport HiddenMarkovModel 3 10 … … cdef extern from "ghmm/smodel.h": 193 200 194 201 195 202 int ghmm_cmodel_free (ghmm_cmodel ** smo) 196 203 204 # Normalizes initial and transition probabilities and mixture weights. 205 # 0 on success / -1 on error 206 int ghmm_cmodel_normalize(ghmm_cmodel *smo) 197 207 198 208 cdef class ContinuousHiddenMarkovModel(HiddenMarkovModel): 199 209 cdef ghmm_cmodel* m -
sage/stats/hmm/chmm.pyx
diff -r 0ec7e93289ae -r 7bf1791b998d sage/stats/hmm/chmm.pyx
a b 1 """ 2 Continuous Hidden Markov Models 1 3 4 AUTHORS: 5 -- William Stein (2008) 6 -- The authors of GHMM http://ghmm.sourceforge.net/ 7 """ 8 9 ############################################################################# 10 # Copyright (C) 2008 William Stein <wstein@gmail.com> 11 # Distributed under the terms of the GNU General Public License (GPL), 12 # version 2 or any later version at your option. 13 # The full text of the GPL is available at: 14 # http://www.gnu.org/licenses/ 15 ############################################################################# 2 16 3 17 include "../../ext/stdsage.pxi" 4 18 5 19 include "misc.pxi" 6 20 7 21 cdef class ContinuousHiddenMarkovModel(HiddenMarkovModel): 8 def __init__(self, A, B, pi, name=None): 22 """ 23 Abstract base class for continuous hidden Markov models. 24 25 26 INPUT: 27 A -- square matrix (or list of lists) 28 B -- list or matrix with numbers of rows equal to number of rows of A; 29 each row defines the emissions 30 pi -- list 31 name -- (default: None); a string 32 33 EXAMPLES: 34 sage: sage.stats.hmm.chmm.ContinuousHiddenMarkovModel([[1.0]], [(-0.0,10.0)], [1], "model") 35 <sage.stats.hmm.chmm.ContinuousHiddenMarkovModel object at ...> 36 """ 37 def __init__(self, A, B, pi, name): 38 """ 39 Constructor for continuous Hidden markov model abstract base class. 40 41 EXAMPLES: 42 This class is an abstract base class, so shouldn't ever be constructed by users. 43 sage: sage.stats.hmm.chmm.ContinuousHiddenMarkovModel([[1.0]], [(0.0,1.0)], [1], None) 44 <sage.stats.hmm.chmm.ContinuousHiddenMarkovModel object at ...> 45 """ 9 46 self.initialized = False 10 47 HiddenMarkovModel.__init__(self, A, B, pi) 11 48 self.m = <ghmm_cmodel*> safe_malloc(sizeof(ghmm_cmodel)) 12 49 # Set number of states 13 50 self.m.N = self.A.nrows() 51 # Assign model identifier (the name) if specified 52 if name is not None: 53 name = str(name) 54 self.m.name = <char*> safe_malloc(len(name)) 55 strcpy(self.m.name, name) 56 else: 57 self.m.name = NULL 58 59 60 def name(self): 61 """ 62 Return the name of this model. 63 64 OUTPUT: 65 string or None 66 67 EXAMPLES: 68 sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,0], 'Test Model') 69 sage: m.name() 70 'Test Model' 71 72 If the model is not explicitly named then this function returns None: 73 sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,0]) 74 sage: m.name() 75 sage: m.name() is None 76 True 77 """ 78 if self.m.name: 79 s = str(self.m.name) 80 return s 81 else: 82 return None 83 14 84 15 85 cdef class GaussianHiddenMarkovModel(ContinuousHiddenMarkovModel): 16 """86 r""" 17 87 Create a Gaussian Hidden Markov Model. The probability 18 88 distribution associated with each state is a Gaussian 19 89 distribution. … … cdef class GaussianHiddenMarkovModel(Con 26 96 Gaussian distributions associated to each state 27 97 pi -- list of floats that sums to 1.0; these are 28 98 the initial probabilities of each hidden state 29 name -- (default: None); 99 name -- (default: None); a string 30 100 31 101 EXAMPLES: 32 102 Define the transition matrix: … … cdef class GaussianHiddenMarkovModel(Con 38 108 The initial probabilities per state: 39 109 sage: pi = [1,0,0] 40 110 41 Create the continuous Gaussian hidden Markov model: 42 sage: m = hmm.GaussianHiddenMarkovModel(A, B, pi); m 111 We create the continuous Gaussian hidden Markov model defined by $A,B,\pi$: 112 sage: hmm.GaussianHiddenMarkovModel(A, B, pi) 113 Gaussian Hidden Markov Model with 3 States 114 Transition matrix: 115 [0.0 1.0 0.0] 116 [0.5 0.0 0.5] 117 [0.3 0.3 0.4] 118 Emission parameters: 119 [(0.0, 1.0), (-1.0, 0.5), (1.0, 0.20000000000000001)] 120 Initial probabilities: [1.0, 0.0, 0.0] 43 121 """ 44 122 def __init__(self, A, B, pi, name=None): 45 ContinuousHiddenMarkovModel.__init__(self, A, B, pi) 123 """ 124 EXAMPLES: 125 We make a very simple model: 126 sage: hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1], 'simple') 127 Gaussian Hidden Markov Model simple with 1 States 128 Transition matrix: 129 [1.0] 130 Emission parameters: 131 [(0.0, 1.0)] 132 Initial probabilities: [1.0] 133 134 We test a degenerate case: 135 sage: hmm.GaussianHiddenMarkovModel([], [], [], 'simple') 136 Gaussian Hidden Markov Model simple with 0 States 137 Transition matrix: 138 [] 139 Emission parameters: 140 [] 141 Initial probabilities: [] 142 """ 143 ContinuousHiddenMarkovModel.__init__(self, A, B, pi, name=name) 46 144 47 145 # Set number of outputs. This is 1 here because each 48 146 # output is a single Gaussian distribution. … … cdef class GaussianHiddenMarkovModel(Con 51 149 # Set the model type to continuous 52 150 self.m.model_type = GHMM_kContinuousHMM 53 151 54 # Assign model identifier if specified55 if name is not None:56 name = str(name)57 self.m.name = name58 else:59 self.m.name = NULL60 61 152 # 1 transition matrix 62 153 self.m.cos = 1 63 154 # Set that no a prior model probabilities are set. … … cdef class GaussianHiddenMarkovModel(Con 69 160 cdef ghmm_cstate* states = <ghmm_cstate*> safe_malloc(sizeof(ghmm_cstate) * self.m.N) 70 161 cdef ghmm_cstate* state 71 162 cdef ghmm_c_emission* e 163 cdef Py_ssize_t i, j, k 72 164 73 165 for i in range(self.m.N): 74 166 # Parameters of normal distribution … … cdef class GaussianHiddenMarkovModel(Con 78 170 state.M = 1 79 171 state.pi = pi[i] 80 172 state.desc = NULL 81 state.out_states = 082 state.in_states = 083 173 e = <ghmm_c_emission*> safe_malloc(sizeof(ghmm_c_emission)) 84 174 e.type = 0 # normal 85 175 e.dimension = 1 … … cdef class GaussianHiddenMarkovModel(Con 91 181 e.sigmainv = NULL 92 182 state.e = e 93 183 state.c = to_double_array([1.0]) 94 state.in_a = ighmm_cmatrix_alloc(1, self.m.N) 95 state.out_a = ighmm_cmatrix_alloc(1, self.m.N) 184 185 ######################################################### 186 # Initialize state transition data. 187 # NOTE: This code is similar to a block of code in hmm.pyx. 188 189 # Set "out" probabilities, i.e., the probabilities to 190 # transition to another hidden state from this state. 191 v = self.A[i] 192 k = self.m.N 193 state.out_states = k 194 state.out_id = <int*> safe_malloc(sizeof(int)*k) 195 state.out_a = ighmm_cmatrix_alloc(1, k) 196 for j in range(k): 197 state.out_id[j] = j 198 state.out_a[0][j] = v[j] 199 200 # Set "in" probabilities 201 v = self.A.column(i) 202 state.in_states = k 203 state.in_id = <int*> safe_malloc(sizeof(int)*k) 204 state.in_a = ighmm_cmatrix_alloc(1, k) 205 for j in range(k): 206 state.in_id[j] = j 207 state.in_a[0][j] = v[j] 208 209 ######################################################### 210 96 211 97 212 # Set states 98 213 self.m.s = states … … cdef class GaussianHiddenMarkovModel(Con 102 217 self.initialized = True 103 218 104 219 def __dealloc__(self): 220 """ 221 Dealloc the memory used by this Gaussian HMM, but only if the 222 HMM was successfully initialized. 223 224 EXAMPLES: 225 sage: m = hmm.GaussianHiddenMarkovModel([[1.0]], [(0.0,1.0)], [1]) # implicit doctest 226 sage: del m 227 """ 105 228 if self.initialized: 106 229 ghmm_cmodel_free(&self.m) 230 231 def __copy__(self): 232 """ 233 Return a copy of this Gaussian HMM. 234 235 EXAMPLES: 236 sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,0], 'NAME') 237 sage: copy(m) 238 Gaussian Hidden Markov Model NAME with 2 States 239 Transition matrix: 240 [0.4 0.6] 241 [0.1 0.9] 242 Emission parameters: 243 [(0.0, 1.0), (1.0, 1.0)] 244 Initial probabilities: [1.0, 0.0] 245 """ 246 247 return GaussianHiddenMarkovModel(self.transition_matrix(), self.emission_parameters(), 248 self.initial_probabilities(), self.name()) 249 250 def __cmp__(self, other): 251 """ 252 Compare two Gaussian HMM's. 253 254 INPUT: 255 self, other -- objects; if other is not a Gaussian HMM compare types. 256 OUTPUT: 257 -1,0,1 258 259 The transition matrices are compared, then the emission 260 parameters, and the initial probabilities. The names are not 261 compared, so two GHMM's with the same parameters but different 262 names compare to be equal. 263 264 EXAMPLES: 265 sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,2], "Test 1") 266 sage: m.__cmp__(m) 267 0 268 269 Note that the name doesn't matter: 270 sage: n = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,2], "Test 2") 271 sage: m.__cmp__(n) 272 0 273 274 Normalizing fixes the initial probabilities, hence m and n are no longer equal. 275 sage: n.normalize() 276 sage: m.__cmp__(n) 277 1 278 """ 279 if not isinstance(other, GaussianHiddenMarkovModel): 280 return cmp(type(self), type(other)) 281 282 if self is other: return 0 # easy special case 283 284 cdef GaussianHiddenMarkovModel o = other 285 if self.m.N < o.m.N: 286 return -1 287 elif self.m.N > o.m.N: 288 return 1 289 cdef Py_ssize_t i, j 290 291 # The code below is somewhat long and tedious because I want it to be 292 # very fast. All it does is explicitly loop through the transition 293 # matrix, emission parameters and initial state probabilities checking 294 # that they agree, and if not returning -1 or 1. 295 # Compare transition matrices 296 for i from 0 <= i < self.m.N: 297 for j from 0 <= j < self.m.s[i].out_states: 298 if self.m.s[i].out_a[0][j] < o.m.s[i].out_a[0][j]: 299 return -1 300 elif self.m.s[i].out_a[0][j] > o.m.s[i].out_a[0][j]: 301 return 1 302 303 # Compare emissions parameters 304 for i from 0 <= i < self.m.N: 305 if self.m.s[i].e.mean.val < o.m.s[i].e.mean.val: 306 return -1 307 elif self.m.s[i].e.mean.val > o.m.s[i].e.mean.val: 308 return 1 309 if self.m.s[i].e.variance.val < o.m.s[i].e.variance.val: 310 return -1 311 elif self.m.s[i].e.variance.val > o.m.s[i].e.variance.val: 312 return 1 313 314 # Compare initial state probabilities 315 for 0 <= i < self.m.N: 316 if self.m.s[i].pi < o.m.s[i].pi: 317 return -1 318 elif self.m.s[i].pi > o.m.s[i].pi: 319 return 1 320 321 return 0 107 322 108 323 def __repr__(self): 109 324 """ … … cdef class GaussianHiddenMarkovModel(Con 114 329 115 330 EXAMPLES: 116 331 sage: m = hmm.GaussianHiddenMarkovModel([[0.0,1.0,0],[0.5,0.0,0.5],[0.3,0.3,0.4]], [(0.0,1.0), (-1.0,0.5), (1.0,0.2)], [1,0,0]) 117 sage: a.__repr__()118 "Discrete Hidden Markov Model (2 states, 2 outputs)\nInitial probabilities: [0.5, 0.5]\nTransition matrix:\n[0.1 0.9]\n[0.1 0.9]\nEmission matrix:\n[0.9 0.1]\n[0.1 0.9]\nEmission symbols: [3/4, 'abc']"332 sage: m.__repr__() 333 'Gaussian Hidden Markov Model with 3 States\nTransition matrix:\n[0.0 1.0 0.0]\n[0.5 0.0 0.5]\n[0.3 0.3 0.4]\nEmission parameters:\n[(0.0, 1.0), (-1.0, 0.5), (1.0, 0.20000000000000001)]\nInitial probabilities: [1.0, 0.0, 0.0]' 119 334 """ 120 s = "Gaussian Hidden Markov Model%s (%s states, %s outputs)"%(335 s = "Gaussian Hidden Markov Model%s with %s States"%( 121 336 ' ' + self.m.name if self.m.name else '', 122 self.m.N, self.m.M) 123 s += '\nInitial probabilities: %s'%self.initial_probabilities() 337 self.m.N) 124 338 s += '\nTransition matrix:\n%s'%self.transition_matrix() 125 339 s += '\nEmission parameters:\n%s'%self.emission_parameters() 340 s += '\nInitial probabilities: %s'%self.initial_probabilities() 126 341 return s 127 342 128 343 def initial_probabilities(self): … … cdef class GaussianHiddenMarkovModel(Con 135 350 EXAMPLES: 136 351 sage: m = hmm.GaussianHiddenMarkovModel([[0.0,1.0,0],[0.5,0.0,0.5],[0.3,0.3,0.4]], [(0.0,1.0), (-1.0,0.5), (1.0,0.2)], [0.4,0.3,0.3]) 137 352 sage: m.initial_probabilities() 138 [0.4 , 0.3, 0.3]353 [0.40000000000000002, 0.29999999999999999, 0.29999999999999999] 139 354 """ 140 355 cdef Py_ssize_t i 141 356 return [self.m.s[i].pi for i in range(self.m.N)] 142 357 143 def transition_matrix(self , list_only=True):358 def transition_matrix(self): 144 359 """ 145 360 Return the hidden state transition matrix. 361 362 OUTPUT: 363 matrix whose rows give the transition probabilities of the 364 Hidden Markov Model states. 146 365 147 366 EXAMPLES: 148 367 sage: m = hmm.GaussianHiddenMarkovModel([[0.0,1.0,0],[0.5,0.0,0.5],[0.3,0.3,0.4]], [(0.0,1.0), (-1.0,0.5), (1.0,0.2)], [1,0,0]) 149 368 sage: m.transition_matrix() 150 [0.9 0.1] 151 [0.9 0.1] 369 [0.0 1.0 0.0] 370 [0.5 0.0 0.5] 371 [0.3 0.3 0.4] 152 372 """ 153 373 cdef Py_ssize_t i, j 374 # Update the state of the "immutable" matrix A, then return a reference to it. 154 375 for i from 0 <= i < self.m.N: 155 376 for j from 0 <= j < self.m.s[i].out_states: 156 377 self.A.set_unsafe_double(i,j,self.m.s[i].out_a[0][j]) … … cdef class GaussianHiddenMarkovModel(Con 158 379 159 380 def emission_parameters(self): 160 381 """ 161 Return the emission probability matrix. 382 Return the emission parameters list. 383 384 OUTPUT: 385 list of tuples (mu, sigma) that define Gaussian distributions associated to each state. 162 386 163 387 EXAMPLES: 164 sage: m = hmm.GaussianHiddenMarkovModel([[0. 0,1.0,0],[0.5,0.0,0.5],[0.3,0.3,0.4]], [(0.0,1.0), (-1.0,0.5), (1.0,0.2)], [0.1,0.4,0.5])388 sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(1.5,2),(-1,3)], [1,0], 'NAME') 165 389 sage: m.emission_parameters() 166 [( 0.0, 1.0), (-1.0, 0.5), (1.0, 0.20000...)]390 [(1.5, 2.0), (-1.0, 3.0)] 167 391 """ 168 cdef Py_ssize_t i, j 169 v = [] 170 for i from 0 <= i < self.m.N: 171 v.append((self.m.s[i].e.mean.val, self.m.s[i].e.variance.val)) 172 return v 392 cdef Py_ssize_t i 393 return [(self.m.s[i].e.mean.val, self.m.s[i].e.variance.val) for i in range(self.m.N)] 394 395 def normalize(self): 396 """ 397 Normalize the transition and emission probabilities, if 398 applicable. This changes self in place. 399 400 EXAMPLES: 401 sage: m = hmm.GaussianHiddenMarkovModel([[1.0,1.2],[0,0.1]], [(0.0,1.0),(1,1)], [1,2]) 402 sage: m.normalize() 403 sage: m 404 Gaussian Hidden Markov Model with 2 States 405 Transition matrix: 406 [0.454545454545 0.545454545455] 407 [ 0.0 1.0] 408 Emission parameters: 409 [(0.0, 1.0), (1.0, 1.0)] 410 Initial probabilities: [0.33333333333333331, 0.66666666666666663] 411 """ 412 if ghmm_cmodel_normalize(self.m): 413 raise RuntimeError, "error normalizing model (note that model may still have been partly changed)" -
sage/stats/hmm/hmm.pxd
diff -r 0ec7e93289ae -r 7bf1791b998d sage/stats/hmm/hmm.pxd
a b from sage.matrix.matrix_real_double_dens 1 ############################################################################# 2 # Copyright (C) 2008 William Stein <wstein@gmail.com> 3 # Distributed under the terms of the GNU General Public License (GPL), 4 # version 2 or any later version at your option. 5 # The full text of the GPL is available at: 6 # http://www.gnu.org/licenses/ 7 ############################################################################# 8 1 9 from sage.matrix.matrix_real_double_dense cimport Matrix_real_double_dense 2 10 3 11 cdef extern from "ghmm/ghmm.h": -
sage/stats/hmm/hmm.pyx
diff -r 0ec7e93289ae -r 7bf1791b998d sage/stats/hmm/hmm.pyx
a b TODO: 11 11 * continuous hmm's 12 12 """ 13 13 14 ############################################################################# 15 # Copyright (C) 2008 William Stein <wstein@gmail.com> 16 # Distributed under the terms of the GNU General Public License (GPL), 17 # version 2 or any later version at your option. 18 # The full text of the GPL is available at: 19 # http://www.gnu.org/licenses/ 20 ############################################################################# 21 14 22 import math 15 23 16 24 from sage.matrix.all import is_Matrix, matrix … … include "misc.pxi" 22 30 include "misc.pxi" 23 31 24 32 cdef class HiddenMarkovModel: 33 """ 34 Abstract base class for all Hidden Markov Models. 35 36 INPUT: 37 A -- matrix or list 38 B -- matrix or list 39 pi -- list of floats 40 41 EXAMPLES: 42 One shouldn't directly call this constructor since this class is an abstract 43 base class. 44 sage: sage.stats.hmm.hmm.HiddenMarkovModel([[0.4,0.6],[1,0]], [[1,0],[0.5,0.5]], [0.5,0.5]) 45 <sage.stats.hmm.hmm.HiddenMarkovModel object at ...> 46 """ 25 47 def __init__(self, A, B, pi): 48 """ 49 INPUT: 50 A -- matrix or list 51 B -- matrix or list 52 pi -- list of floats 53 54 EXAMPLES: 55 sage: hmm.DiscreteHiddenMarkovModel([[1]], [[0.0,1.0]], [1]) 56 Discrete Hidden Markov Model with 1 States and 2 Emissions 57 Transition matrix: 58 [1.0] 59 Emission matrix: 60 [0.0 1.0] 61 Initial probabilities: [1.0] 62 """ 63 # Convert A to a matrix 26 64 if not is_Matrix(A): 27 A = matrix(RDF, len(A), len(A[0]), A) 65 n = len(A) 66 A = matrix(RDF, n, len(A[0]) if n > 0 else 0, A) 67 68 # Convert B to a matrix 28 69 if not is_Matrix(B): 29 B = matrix(RDF, len(B), len(B[0]), B) 70 n = len(B) 71 B = matrix(RDF, n, len(B[0]) if n > 0 else 0, B) 72 73 # Some consistency checks 30 74 if not A.is_square(): 75 print A.parent() 31 76 raise ValueError, "A must be square" 32 77 if A.nrows() != B.nrows(): 33 78 raise ValueError, "number of rows of A and B must be the same" 79 80 # Make sure A and B are over RDF. 34 81 if A.base_ring() != RDF: 35 82 A = A.change_ring(RDF) 36 83 if B.base_ring() != RDF: 37 84 B = B.change_ring(RDF) 38 85 86 # Make sure the initial probabilities are all floats. 39 87 self.pi = [float(x) for x in pi] 40 88 if len(self.pi) != A.nrows(): 41 89 raise ValueError, "length of pi must equal number of rows of A" 42 90 91 # Record the now validated matrices A and B as attributes. 92 # They get used later as attributes in the constructors for 93 # derived classes. 43 94 self.A = A 44 95 self.B = B 45 96 … … cdef class DiscreteHiddenMarkovModel(Hid 56 107 name -- (optional) name of the model 57 108 58 109 EXAMPLES: 59 We create a discrete HMM with 2 internal states on an alphabet of 60 size 2. 61 110 We create a discrete HMM with 2 internal states on an alphabet of size 2. 62 111 sage: a = hmm.DiscreteHiddenMarkovModel([[0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1]) 63 64 112 """ 65 self.initialized = False 113 # memory has not all been setup yet. 114 self.initialized = False 115 116 # This sets self.A, self.B and pi after doing appropriate coercions, etc. 66 117 HiddenMarkovModel.__init__(self, A, B, pi) 67 118 68 cdef Py_ssize_t i, j, k69 119 self.set_emission_symbols(emission_symbols) 70 120 71 121 self.m = <ghmm_dmodel*> safe_malloc(sizeof(ghmm_dmodel)) … … cdef class DiscreteHiddenMarkovModel(Hid 101 151 silent_states = [] 102 152 tmp_order = [] 103 153 104 for i in range(self.m.N): 154 cdef Py_ssize_t i, j, k 155 156 for i from 0 <= i < self.m.N: 105 157 v = self.B[i] 106 158 107 159 # Get a reference to the i-th state for convenience of the notation below. … … cdef class DiscreteHiddenMarkovModel(Hid 127 179 128 180 silent_states.append( 1 if sum(v) == 0 else 0) 129 181 130 # Set out probabilities, i.e., the probabilities that each131 # symbol will be emitted from this state.182 # Set "out" probabilities, i.e., the probabilities to 183 # transition to another hidden state from this state. 132 184 v = self.A[i] 133 nz = v.nonzero_positions() 134 k = len(nz) 185 k = self.m.N 135 186 state.out_states = k 136 187 state.out_id = <int*> safe_malloc(sizeof(int)*k) 137 188 state.out_a = <double*> safe_malloc(sizeof(double)*k) 138 for j in range(k):139 state.out_id[j] = nz[j]140 state.out_a[j] = v[ nz[j]]189 for j from 0 <= j < k: 190 state.out_id[j] = j 191 state.out_a[j] = v[j] 141 192 142 193 # Set "in" probabilities 143 194 v = self.A.column(i) 144 nz = v.nonzero_positions()145 k = len(nz)146 195 state.in_states = k 147 196 state.in_id = <int*> safe_malloc(sizeof(int)*k) 148 197 state.in_a = <double*> safe_malloc(sizeof(double)*k) 149 for j in range(k):150 state.in_id[j] = nz[j]151 state.in_a[j] = v[ nz[j]]198 for j from 0 <= j < k: 199 state.in_id[j] = j 200 state.in_a[j] = v[j] 152 201 153 202 state.fix = 0 154 203 155 204 self.m.s = states 156 205 self.initialized = True 157 206 158 ## if sum(silent_states) > 0: 159 ## self.m.model_type |= GHMM_kSilentStates 160 ## self.m.silent = to_int_array(silent_states) 161 ## self.m.maxorder = max(tmp_order) 162 ## if self.m.maxorder > 0: 163 ## self.m.model_type |= GHMM_kHigherOrderEmissions 164 ## self.m.order = to_int_array(tmp_order) 165 ## # Initialize lookup table for powers of the alphabet size, 166 ## # which speeds up models with higher order states. 167 ## powLookUp = [1] * (self.m.maxorder+2) 168 ## for i in range(1,len(powLookUp)): 169 ## powLookUp[i] = powLookUp[i-1] * self.m.M 170 ## self.m.pow_lookup = to_int_array(powLookUp) 171 ## self.initialized = True 207 def __cmp__(self, other): 208 """ 209 Compare two Discrete HMM's. 210 211 INPUT: 212 self, other -- objects; if other is not a Discrete HMM compare types. 213 OUTPUT: 214 -1,0,1 215 216 The transition matrices are compared, then the emission 217 parameters, and the initial probabilities. The names are not 218 compared, so two GHMM's with the same parameters but different 219 names compare to be equal. 220 221 EXAMPLES: 222 sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,2]) 223 sage: m.__cmp__(m) 224 0 225 226 Note that the name doesn't matter: 227 sage: n = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,2]) 228 sage: m.__cmp__(n) 229 0 230 231 Normalizing fixes the initial probabilities, hence m and n are no longer equal. 232 sage: n.normalize() 233 sage: m.__cmp__(n) 234 1 235 """ 236 if not isinstance(other, DiscreteHiddenMarkovModel): 237 return cmp(type(self), type(other)) 238 239 if self is other: return 0 # easy special case 172 240 241 cdef DiscreteHiddenMarkovModel o = other 242 if self.m.N < o.m.N: 243 return -1 244 elif self.m.N > o.m.N: 245 return 1 246 cdef Py_ssize_t i, j 247 248 # This code is similar to code in chmm.pyx, but with several small differences. 249 250 # Compare transition matrices 251 for i from 0 <= i < self.m.N: 252 for j from 0 <= j < self.m.s[i].out_states: 253 if self.m.s[i].out_a[j] < o.m.s[i].out_a[j]: 254 return -1 255 elif self.m.s[i].out_a[j] > o.m.s[i].out_a[j]: 256 return 1 257 258 # Compare emission matrices 259 for i from 0 <= i < self.m.N: 260 for j from 0 <= j < self.B._ncols: 261 if self.m.s[i].b[j] < o.m.s[i].b[j]: 262 return -1 263 elif self.m.s[i].b[j] > o.m.s[i].b[j]: 264 return 1 265 266 # Compare initial state probabilities 267 for 0 <= i < self.m.N: 268 if self.m.s[i].pi < o.m.s[i].pi: 269 return -1 270 elif self.m.s[i].pi > o.m.s[i].pi: 271 return 1 272 273 return 0 274 173 275 def __dealloc__(self): 276 """ 277 Deallocate memory allocated by the HMM. 278 279 EXAMPLES: 280 sage: a = hmm.DiscreteHiddenMarkovModel([[0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1]) # indirect doctest 281 sage: del a 282 """ 174 283 if self.initialized: 175 284 ghmm_dmodel_free(&self.m) 176 285 … … cdef class DiscreteHiddenMarkovModel(Hid 184 293 EXAMPLES: 185 294 sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[0.9,0.1],[0.1,0.9]], [0.5,0.5], [3/4, 'abc']) 186 295 sage: a.__repr__() 187 "Discrete Hidden Markov Model (2 states, 2 outputs)\nInitial probabilities: [0.5, 0.5]\nTransition matrix:\n[0.1 0.9]\n[0.1 0.9]\nEmission matrix:\n[0.9 0.1]\n[0.1 0.9]\nEmission symbols: [3/4, 'abc']"296 "Discrete Hidden Markov Model with 2 States and 2 Emissions\nTransition matrix:\n[0.1 0.9]\n[0.1 0.9]\nEmission matrix:\n[0.9 0.1]\n[0.1 0.9]\nInitial probabilities: [0.5, 0.5]\nEmission symbols: [3/4, 'abc']" 188 297 """ 189 s = "Discrete Hidden Markov Model%s (%s states, %s outputs)"%(298 s = "Discrete Hidden Markov Model%s with %s States and %s Emissions"%( 190 299 ' ' + self.m.name if self.m.name else '', 191 300 self.m.N, self.m.M) 192 s += '\nInitial probabilities: %s'%self.initial_probabilities()193 301 s += '\nTransition matrix:\n%s'%self.transition_matrix() 194 302 s += '\nEmission matrix:\n%s'%self.emission_matrix() 303 s += '\nInitial probabilities: %s'%self.initial_probabilities() 195 304 if self._emission_symbols_dict: 196 305 s += '\nEmission symbols: %s'%self._emission_symbols 197 306 return s … … cdef class DiscreteHiddenMarkovModel(Hid 251 360 sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,1],[1.2,0.9]], [[1,0.5],[0.5,1]], [0.1,1.2]) 252 361 sage: a.normalize() 253 362 sage: a 254 Discrete Hidden Markov Model (2 states, 2 outputs) 255 Initial probabilities: [0.076923076923076927, 0.92307692307692302] 363 Discrete Hidden Markov Model with 2 States and 2 Emissions 256 364 Transition matrix: 257 365 [0.333333333333 0.666666666667] 258 366 [0.571428571429 0.428571428571] 259 367 Emission matrix: 260 368 [0.666666666667 0.333333333333] 261 369 [0.333333333333 0.666666666667] 370 Initial probabilities: [0.076923076923076927, 0.92307692307692302] 262 371 """ 263 372 ghmm_dmodel_normalize(self.m) 264 373 … … cdef class DiscreteHiddenMarkovModel(Hid 491 600 Now the model's emission matrix changes since it is much 492 601 more likely to emit 1 than 0. 493 602 sage: a 494 Discrete Hidden Markov Model (2 states, 2 outputs) 495 Initial probabilities: [0.5, 0.5] 603 Discrete Hidden Markov Model with 2 States and 2 Emissions 496 604 Transition matrix: 497 605 [0.5 0.5] 498 606 [0.5 0.5] 499 607 Emission matrix: 500 608 [0.166666666667 0.833333333333] 501 609 [0.166666666667 0.833333333333] 610 Initial probabilities: [0.5, 0.5] 502 611 503 612 Note that 1/6 = 1.666...: 504 613 sage: 1.0/6 … … cdef class DiscreteHiddenMarkovModel(Hid 509 618 sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.5,0.5]], [[0.5,0.5],[0.5,0.5]], [0.5,0.5], ['up','down']) 510 619 sage: a.baum_welch([['up','up'], ['down','up']]) 511 620 sage: a 512 Discrete Hidden Markov Model (2 states, 2 outputs) 513 Initial probabilities: [0.5, 0.5] 621 Discrete Hidden Markov Model with 2 States and 2 Emissions 514 622 Transition matrix: 515 623 [0.5 0.5] 516 624 [0.5 0.5] 517 625 Emission matrix: 518 626 [0.75 0.25] 519 627 [0.75 0.25] 520 Emission symbols: ['up', 'down'] 628 Initial probabilities: [0.5, 0.5] 629 Emission symbols: ['up', 'down'] 521 630 522 631 NOTE: Training for models including silent states is not yet supported. 523 632 … … cdef class DiscreteHiddenMarkovModel(Hid 544 653 ################################################################################## 545 654 546 655 cdef ghmm_dseq* malloc_ghmm_dseq(seqs) except NULL: 656 """ 657 Allocate a discrete sequence of samples. 658 659 INPUT: 660 seqs -- a list of sequences 661 662 OUTPUT: 663 C pointer to ghmm_dseq 664 """ 547 665 cdef ghmm_dseq* d = ghmm_dseq_calloc(len(seqs)) 548 666 if d == NULL: 549 667 raise MemoryError -
sage/stats/hmm/misc.pxi
diff -r 0ec7e93289ae -r 7bf1791b998d sage/stats/hmm/misc.pxi
a b 1 ############################################################################# 2 # Copyright (C) 2008 William Stein <wstein@gmail.com> 3 # Distributed under the terms of the GNU General Public License (GPL), 4 # version 2 or any later version at your option. 5 # The full text of the GPL is available at: 6 # http://www.gnu.org/licenses/ 7 ############################################################################# 8 9 cdef extern from "string.h": 10 char *strcpy(char *s1, char *s2) 1 11 2 12 cdef double* to_double_array(v) except NULL: 13 """ 14 Transform a Python list of floats to a C array of doubles. The caller is 15 responsible for deallocating the resulting memory. 16 17 INPUT: 18 v -- a list of objects coercible to floats 19 OUTPUT: 20 a newly allocated C array of doubles 21 """ 3 22 cdef double x 4 23 cdef double* w = <double*> safe_malloc(sizeof(double)*len(v)) 5 24 cdef Py_ssize_t i = 0 … … cdef double* to_double_array(v) except N 9 28 return w 10 29 11 30 cdef int* to_int_array(v) except NULL: 31 """ 32 Transform a Python list of ints to a C array of ints. The caller is 33 responsible for deallocating the resulting memory. 34 35 INPUT: 36 v -- a list of objects coercible to ints 37 OUTPUT: 38 a newly allocated C array of ints 39 """ 12 40 cdef int x 13 41 cdef int* w = <int*> safe_malloc(sizeof(int)*len(v)) 14 42 cdef Py_ssize_t i = 0 … … cdef void* safe_malloc(int bytes) except 21 49 """ 22 50 malloc the given bytes of memory and check that the malloc 23 51 succeeds -- if not raise a MemoryError. 52 53 INPUT: 54 bytes -- a nonnegatie integer 55 56 OUTPUT: 57 void pointer or raise a MemoryError. 24 58 """ 25 59 cdef void* t = sage_malloc(bytes) 26 60 if not t: