Ticket #3773: sage-3773-part3.patch
File sage-3773-part3.patch, 14.0 KB (added by , 13 years ago) |
---|
-
sage/stats/hmm/chmm.pyx
# HG changeset patch # User William Stein <wstein@gmail.com> # Date 1218120013 25200 # Node ID 61bf3243f0aca6aa5e1f3d9d59d701e59a3cb468 # Parent 83635164ac474bbfc37e21e843bc62b41216d0e1 trac #3773 -- part 3 -- Add fixing of emissions for all models; get doctest coverage back to 100%; fix more bugs by doing so. diff -r 83635164ac47 -r 61bf3243f0ac sage/stats/hmm/chmm.pyx
a b cdef class GaussianHiddenMarkovModel(Con 128 128 [(0.0, 1.0), (-1.0, 0.5), (1.0, 0.20000000000000001)] 129 129 Initial probabilities: [1.0, 0.0, 0.0] 130 130 """ 131 def __init__(self, A, B, pi , name=None):131 def __init__(self, A, B, pi=None, name=None): 132 132 """ 133 133 EXAMPLES: 134 134 We make a very simple model: … … cdef class GaussianHiddenMarkovModel(Con 166 166 self.initialized = True 167 167 168 168 def _initialize_state(self, pi): 169 # Allocate and initialize states 169 """ 170 Allocate and initialize states. 171 172 INPUT: 173 pi -- initial probabilities 174 175 All other inputs are set as self.A and self.B. 176 177 EXAMPLES: 178 This function is called implicitly during object creation. It should 179 never be called directly by the user, unless they want to LEAKE MEMORY. 180 181 sage: m = hmm.GaussianHiddenMarkovModel([[1]], [(1,10)], [1]) # indirect test 182 """ 170 183 cdef ghmm_cstate* states = <ghmm_cstate*> safe_malloc(sizeof(ghmm_cstate) * self.m.N) 171 184 cdef ghmm_cstate* state 172 185 cdef ghmm_c_emission* e … … cdef class GaussianHiddenMarkovModel(Con 338 351 339 352 return 0 340 353 341 def fix_emission _state(self, Py_ssize_t i, bint fixed=True):354 def fix_emissions(self, Py_ssize_t i, bint fixed=True): 342 355 """ 343 Sets the i-th emission state to be either fixed or not fixed. 344 If it is fixed, then running the Baum-Welch algorithm will not 345 change it. 356 Sets the Gaussian emission parameters for the i-th state to be 357 either fixed or not fixed. If it is fixed, then running the 358 Baum-Welch algorithm will not change the emission parameters 359 for the i-th state. 346 360 347 361 INPUT: 348 362 i -- nonnegative integer < self.m.N … … cdef class GaussianHiddenMarkovModel(Con 363 377 364 378 Now we run Baum-Welch with the emission states fixed. Notice that they don't change. 365 379 sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,0]) 366 sage: m.fix_emission _state(0); m.fix_emission_state(1)380 sage: m.fix_emissions(0); m.fix_emissions(1) 367 381 sage: m.baum_welch([0,1]) 368 382 sage: m 369 383 Gaussian Hidden Markov Model with 2 States … … cdef class GaussianHiddenMarkovModel(Con 377 391 if i < 0 or i >= self.m.N: 378 392 raise IndexError, "index out of range" 379 393 self.m.s[i].e.fixed = fixed 380 381 def fix_hidden_state(self, Py_ssize_t i, bint fixed=True):382 """383 Sets the i-th hidden state to be either fixed or not fixed.384 If it is fixed, then running the Baum-Welch algorithm will not385 change it.386 387 INPUT:388 i -- nonnegative integer < self.m.N389 fixed -- bool390 391 EXAMPLES:392 """393 if i < 0 or i >= self.m.N:394 raise IndexError, "index out of range"395 self.m.s[i].fix = fixed396 394 397 395 def __repr__(self): 398 396 """ … … cdef class GaussianMixtureHiddenMarkovMo 789 787 pi -- list of floats that sums to 1.0; these are 790 788 the initial probabilities of each hidden state 791 789 name -- (default: None); a string 790 791 EXAMPLES: 792 sage: A = [[0.5,0.5],[0.5,0.5]] 793 sage: B = [[(0.5,(0.0,1.0)), (0.1,(1,10000))],[(1,(1,1)), (0,(0,0.1))]] 794 sage: pi = [1,0] 795 sage: hmm.GaussianMixtureHiddenMarkovModel(A, B, pi) 796 Gaussian Hidden Markov Model with 2 States 797 Transition matrix: 798 [0.5 0.5] 799 [0.5 0.5] 800 Emission parameters: 801 [[(0.5, (0.0, 1.0)), (0.10000000000000001, (1.0, 10000.0))], [(1.0, (1.0, 1.0)), (0.0, (0.0, 0.10000000000000001))]] 802 Initial probabilities: [1.0, 0.0] 803 804 TESTS: 805 We test that standard deviations must be positive: 806 sage: m = hmm.GaussianMixtureHiddenMarkovModel([[1]], [[(1,(0,0))]], [1]) 807 Traceback (most recent call last): 808 ... 809 ValueError: sigma must be positive (if weight is nonzero) 810 811 We test that number of mixtures must be positive: 812 sage: m = hmm.GaussianMixtureHiddenMarkovModel([[1]], [[]], [1]) 813 Traceback (most recent call last): 814 ... 815 ValueError: number of Gaussian mixtures must be positive 792 816 """ 793 def __init__(self, A, B, pi , name=None):817 def __init__(self, A, B, pi=None, name=None): 794 818 """ 795 819 EXAMPLES: 820 sage: hmm.GaussianMixtureHiddenMarkovModel([[1]], [[(1,(0,1))]], [1], name='simple') 821 Gaussian Hidden Markov Model simple with 1 States 822 Transition matrix: 823 [1.0] 824 Emission parameters: 825 [[(1.0, (0.0, 1.0))]] 826 Initial probabilities: [1.0] 796 827 """ 797 828 # Turn B into a list of lists 798 829 B = [flatten(x) for x in B] … … cdef class GaussianMixtureHiddenMarkovMo 800 831 if m == 0: 801 832 raise ValueError, "number of Gaussian mixtures must be positive" 802 833 B = [x + [0]*(m-len(x)) for x in B] 803 GaussianHiddenMarkovModel.__init__(self, A, B, pi) 804 print m//3 834 GaussianHiddenMarkovModel.__init__(self, A, B, pi, name=name) 805 835 self.m.M = m//3 806 836 # Set number of outputs. 807 837 808 838 def _initialize_state(self, pi): 809 # Allocate and initialize states 839 """ 840 Allocate and initialize states. 841 842 INPUT: 843 pi -- initial probabilities 844 845 All other inputs are set as self.A and self.B. 846 847 EXAMPLES: 848 This function is called implicitly during object creation. It should 849 never be called directly by the user, unless they want to LEAKE MEMORY. 850 851 sage: m = hmm.GaussianMixtureHiddenMarkovModel([[1]], [[(1,(1,10))]], [1]) # indirect test 852 """ 810 853 cdef ghmm_cstate* states = <ghmm_cstate*> safe_malloc(sizeof(ghmm_cstate) * self.m.N) 811 854 cdef ghmm_cstate* state 812 855 cdef ghmm_c_emission* e … … cdef class GaussianMixtureHiddenMarkovMo 831 874 e[n].dimension = 1 832 875 mu = v[n*3+1] 833 876 sigma = v[n*3+2] 877 if sigma <= 0 and v[n*3]: 878 raise ValueError, "sigma must be positive (if weight is nonzero)" 834 879 weights.append( v[n*3] ) 835 880 e[n].mean.val = mu 836 881 e[n].variance.val = sigma*sigma # variance! not standard deviation … … p 890 935 for j in range(self.m.s[i].M)] for i in range(self.m.N)] 891 936 892 937 938 939 def fix_emissions(self, Py_ssize_t i, Py_ssize_t j, bint fixed=True): 940 """ 941 Sets the j-th Gaussian of the emission parameters for the i-th 942 state to be either fixed or not fixed. If it is fixed, then 943 running the Baum-Welch algorithm will not change the emission 944 parameters for the i-th state. 945 946 INPUT: 947 i -- nonnegative integer < self.m.N 948 j -- nonnegative integer < self.m.M 949 fixed -- bool 950 951 EXAMPLES: 952 We run Baum-Welch once without fixing the emission states: 953 sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,0]) 954 sage: m.baum_welch([0,1]) 955 sage: m 956 Gaussian Hidden Markov Model with 2 States 957 Transition matrix: 958 [0.0 1.0] 959 [0.1 0.9] 960 Emission parameters: 961 [(0.0, 0.01), (1.0, 0.01)] 962 Initial probabilities: [1.0, 0.0] 963 964 Now we run Baum-Welch with the emission states fixed. Notice that they don't change. 965 sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,0]) 966 sage: m.fix_emissions(0); m.fix_emissions(1) 967 sage: m.baum_welch([0,1]) 968 sage: m 969 Gaussian Hidden Markov Model with 2 States 970 Transition matrix: 971 [0.000368587006957 0.999631412993] 972 [ 0.1 0.9] 973 Emission parameters: 974 [(0.0, 1.0), (1.0, 1.0)] 975 Initial probabilities: [1.0, 0.0] 976 """ 977 if i < 0 or i >= self.m.N: 978 raise IndexError, "index i out of range" 979 if j < 0 or j >= self.m.M: 980 raise IndexError, "index j out of range" 981 self.m.s[i].e[j].fixed = fixed -
sage/stats/hmm/hmm.pyx
diff -r 83635164ac47 -r 61bf3243f0ac sage/stats/hmm/hmm.pyx
a b Hidden Markov Models 2 2 Hidden Markov Models 3 3 4 4 AUTHOR: William Stein 5 6 EXAMPLES:7 8 TODO:9 * make models pickleable (i.e., all parameters should be obtainable10 using functions to make this easy).11 * continuous hmm's12 5 """ 13 6 14 7 ############################################################################# … … cdef class HiddenMarkovModel: 44 37 sage: sage.stats.hmm.hmm.HiddenMarkovModel([[0.4,0.6],[1,0]], [[1,0],[0.5,0.5]], [0.5,0.5]) 45 38 <sage.stats.hmm.hmm.HiddenMarkovModel object at ...> 46 39 """ 47 def __init__(self, A, B, pi ):40 def __init__(self, A, B, pi=None): 48 41 """ 49 42 INPUT: 50 43 A -- matrix or list … … cdef class HiddenMarkovModel: 84 77 B = B.change_ring(RDF) 85 78 86 79 # Make sure the initial probabilities are all floats. 87 self.pi = [float(x) for x in pi] 88 if len(self.pi) != A.nrows(): 89 raise ValueError, "length of pi must equal number of rows of A" 80 if pi is None: 81 if A.nrows() == 0: 82 self.pi = [] 83 else: 84 self.pi = [1.0/A.nrows()]*A.nrows() 85 else: 86 self.pi = [float(x) for x in pi] 87 if len(self.pi) != A.nrows(): 88 raise ValueError, "length of pi must equal number of rows of A" 90 89 91 90 # Record the now validated matrices A and B as attributes. 92 91 # They get used later as attributes in the constructors for … … cdef class HiddenMarkovModel: 97 96 98 97 cdef class DiscreteHiddenMarkovModel(HiddenMarkovModel): 99 98 100 def __init__(self, A, B, pi , emission_symbols=None, name=None):99 def __init__(self, A, B, pi=None, emission_symbols=None, name=None): 101 100 """ 102 101 INPUTS: 103 102 A -- square matrix of doubles; the state change probabilities … … cdef class DiscreteHiddenMarkovModel(Hid 292 291 Deallocate memory allocated by the HMM. 293 292 294 293 EXAMPLES: 295 sage: a= hmm.DiscreteHiddenMarkovModel([[0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1]) # indirect doctest296 sage: del a294 sage: m = hmm.DiscreteHiddenMarkovModel([[0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1]) # indirect doctest 295 sage: del m 297 296 """ 298 297 if self.initialized: 299 298 ghmm_dmodel_free(&self.m) 299 300 def fix_emissions(self, Py_ssize_t i, bint fixed=True): 301 """ 302 Sets the i-th emission parameters to be either fixed or not 303 fixed. If it is fixed, then running the Baum-Welch algorithm 304 will not change the emission parameters for the i-th state. 305 306 INPUT: 307 i -- nonnegative integer < self.m.N 308 fixed -- bool 309 310 EXAMPLES: 311 First without calling fix_emissions: 312 sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]],[[.5,.5],[.5,.5]]) 313 sage: m.baum_welch([0,0,0,1,1,1]) 314 sage: m.emission_matrix() 315 [ 1.0 0.0] 316 [3.92881039079e-05 0.999960711896] 317 318 We call fix_emissions on the first state and notice that the first 319 row of the emission matrix does not change: 320 sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]],[[.5,.5],[.5,.5]]) 321 sage: m.fix_emissions(0) 322 sage: m.baum_welch([0,0,0,1,1,1]) 323 sage: m.emission_matrix() 324 [ 0.5 0.5] 325 [0.000542712675606 0.999457287324] 326 327 We call fix_emissions on the second state and notice that the second 328 row of the emission matrix does not change: 329 sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]],[[.5,.5],[.5,.5]]) 330 sage: m.fix_emissions(1) 331 sage: m.baum_welch([0,0,0,1,1,1]) 332 sage: m.emission_matrix() 333 [ 0.999999904763 9.52366620142e-08] 334 [ 0.5 0.5] 335 336 TESTS: 337 Make sure that out of range indices are handled correctly with an IndexError. 338 sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]],[[.5,.5],[.5,.5]]) 339 sage: m.fix_emissions(2) 340 Traceback (most recent call last): 341 ... 342 IndexError: index out of range 343 sage: m.fix_emissions(-1) 344 Traceback (most recent call last): 345 ... 346 IndexError: index out of range 347 """ 348 if i < 0 or i >= self.m.N: 349 raise IndexError, "index out of range" 350 self.m.s[i].fix = fixed 300 351 301 352 def __repr__(self): 302 353 """ … … cdef class DiscreteHiddenMarkovModel(Hid 574 625 Viterbi path. 575 626 float -- log of the probability that the sequence of hidden 576 627 states actually produced the given sequence seq. 577 [[TODO: I do not understand precisely what this means.]]578 628 579 629 EXAMPLES: 580 630 sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[0.9,0.1],[0.1,0.9]], [0.5,0.5])