Ticket #3773: sage-3773-3726-referee_response.patch
File sage-3773-3726-referee_response.patch, 20.6 KB (added by , 13 years ago) |
---|
-
sage/matrix/matrix_real_double_dense.pxd
# HG changeset patch # User William Stein <wstein@gmail.com> # Date 1218440054 25200 # Node ID 3dd3cb24895105810bc0a01ad697ac5e64635fe6 # Parent 61bf3243f0aca6aa5e1f3d9d59d701e59a3cb468 trac #3726 and #3773 -- address all the referee comments. diff -r 61bf3243f0ac -r 3dd3cb248951 sage/matrix/matrix_real_double_dense.pxd
a b cdef class Matrix_real_double_dense(matr 15 15 cdef int _LU_valid 16 16 cdef _c_compute_LU(self) 17 17 cdef set_unsafe_double(self, Py_ssize_t i, Py_ssize_t j, double value) 18 cdef double get_unsafe_double(self, Py_ssize_t i, Py_ssize_t j) -
sage/matrix/matrix_real_double_dense.pyx
diff -r 61bf3243f0ac -r 3dd3cb248951 sage/matrix/matrix_real_double_dense.pyx
a b cdef class Matrix_real_double_dense(matr 227 227 228 228 cdef set_unsafe_double(self, Py_ssize_t i, Py_ssize_t j, double value): 229 229 gsl_matrix_set(self._matrix,i,j,value) 230 230 231 cdef double get_unsafe_double(self, Py_ssize_t i, Py_ssize_t j): 232 return gsl_matrix_get(self._matrix,i,j) 231 233 232 234 ######################################################################## 233 235 # LEVEL 2 functionality -
sage/stats/hmm/chmm.pyx
diff -r 61bf3243f0ac -r 3dd3cb248951 sage/stats/hmm/chmm.pyx
a b cdef class GaussianHiddenMarkovModel(Con 106 106 pi -- list of floats that sums to 1.0; these are 107 107 the initial probabilities of each hidden state 108 108 name -- (default: None); a string 109 normalize -- (optional; default=True) whether or not to 110 normalize the model so the probabilities add to 1 109 111 110 112 EXAMPLES: 111 113 Define the transition matrix: … … cdef class GaussianHiddenMarkovModel(Con 128 130 [(0.0, 1.0), (-1.0, 0.5), (1.0, 0.20000000000000001)] 129 131 Initial probabilities: [1.0, 0.0, 0.0] 130 132 """ 131 def __init__(self, A, B, pi=None, name=None ):133 def __init__(self, A, B, pi=None, name=None, normalize=True): 132 134 """ 133 135 EXAMPLES: 134 136 We make a very simple model: … … cdef class GaussianHiddenMarkovModel(Con 148 150 Emission parameters: 149 151 [] 150 152 Initial probabilities: [] 153 154 TESTS: 155 We test normalize=False: 156 sage: hmm.GaussianHiddenMarkovModel([[1,100],[0,1]], [(0,1),(0,2)], [1/2,1/2], normalize=False).transition_matrix() 157 [ 1.0 100.0] 158 [ 0.0 1.0] 151 159 """ 152 160 ContinuousHiddenMarkovModel.__init__(self, A, B, pi, name=name) 153 161 … … cdef class GaussianHiddenMarkovModel(Con 164 172 self._initialize_state(pi) 165 173 self.m.class_change = NULL 166 174 self.initialized = True 175 if normalize: 176 self.normalize() 167 177 168 178 def _initialize_state(self, pi): 169 179 """ … … cdef class GaussianHiddenMarkovModel(Con 293 303 names compare to be equal. 294 304 295 305 EXAMPLES: 296 sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,2], "Test 1" )306 sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,2], "Test 1", normalize=False) 297 307 sage: m.__cmp__(m) 298 308 0 299 309 300 310 Note that the name doesn't matter: 301 sage: n = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,2], "Test 2" )311 sage: n = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,2], "Test 2", normalize=False) 302 312 sage: m.__cmp__(n) 303 313 0 304 314 … … cdef class GaussianHiddenMarkovModel(Con 485 495 if ghmm_cmodel_normalize(self.m): 486 496 raise RuntimeError, "error normalizing model (note that model may still have been partly changed)" 487 497 488 def sample(self, long length, long number):498 def sample(self, long length, number=None): 489 499 """ 490 500 Return number samples from this HMM of given length. 491 501 … … cdef class GaussianHiddenMarkovModel(Con 493 503 length -- positive integer 494 504 495 505 OUTPUT: 496 a list of number TimeSeries each of the given length 506 if number is not given, return a single TimeSeries. 507 if number is given, return a list of TimeSeries. 497 508 498 509 EXAMPLES: 499 510 sage: m = hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1]) … … cdef class GaussianHiddenMarkovModel(Con 501 512 sage: m.sample(5,2) 502 513 [[-2.2808, -0.0699, 0.1858, 1.3624, 1.8252], 503 514 [0.0080, 0.1244, 0.5098, 0.9961, 0.4710]] 515 516 sage: m = hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1]) 517 sage: set_random_seed(0) 518 sage: m.sample(5) 519 [-2.2808, -0.0699, 0.1858, 1.3624, 1.8252] 504 520 """ 521 if number is None: 522 single = True 523 number = 1 524 else: 525 single = False 505 526 seed = random() 506 527 cdef ghmm_cseq *d = ghmm_cmodel_generate_sequences(self.m, seed, length, number, length) 507 528 cdef Py_ssize_t i, j … … cdef class GaussianHiddenMarkovModel(Con 512 533 for i from 0 <= i < length: 513 534 T._values[i] = d.seq[j][i] 514 535 ans.append(T) 536 if single: 537 return ans[0] 515 538 return ans 516 539 # The line below would replace the above by something that returns a list of lists. 517 540 #return [[d.seq[j][i] for i in range(length)] for j in range(number)] 518 519 def sample_single(self, long length):520 """521 Return a single sample computed using this Gaussian Hidden Markov Model.522 523 INPUT:524 length -- positive integer525 OUTPUT:526 a TimeSeries527 528 EXAMPLES:529 sage: m = hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1])530 sage: set_random_seed(0)531 sage: m.sample_single(5)532 [-2.2808, -0.0699, 0.1858, 1.3624, 1.8252]533 """534 return self.sample(length,1)[0]535 541 536 542 537 543 … … cdef class GaussianHiddenMarkovModel(Con 657 663 Baum-Welch algorithm to increase the probability of observing O. 658 664 659 665 INPUT: 660 training_seqs -- a list of lists of emission symbols; all sequences of length 0 are ignored. 666 training_seqs -- a list of lists of emission symbols; all sequences of 667 length 0 are ignored. 661 668 max_iter -- integer or None (default: 10000) maximum number 662 669 of Baum-Welch steps to take 663 670 log_likehood_cutoff -- positive float or None (default: 0.00001); … … cdef class GaussianHiddenMarkovModel(Con 684 691 [(1.0, 0.01)] 685 692 Initial probabilities: [1.0] 686 693 694 We train using a list of lists: 695 sage: m = hmm.GaussianHiddenMarkovModel([[1,0],[0,1]], [(0,1),(0,2)], [1/2,1/2]) 696 sage: m.baum_welch([[1,2,], [3,2]]) 697 sage: m 698 Gaussian Hidden Markov Model with 2 States 699 Transition matrix: 700 [1.0 0.0] 701 [0.0 1.0] 702 Emission parameters: 703 [(1.946539535984342, 0.70508296299241024), (2.0208156913293394, 0.70680033099099593)] 704 Initial probabilities: [0.28024729110782109, 0.71975270889217891] 705 706 We train the same model, but waiting one of the lists more than the other. 707 sage: m = hmm.GaussianHiddenMarkovModel([[1,0],[0,1]], [(0,1),(0,2)], [1/2,1/2]) 708 sage: m.baum_welch([([1,2,],10), ([3,2],1)]) 709 sage: m 710 Gaussian Hidden Markov Model with 2 States 711 Transition matrix: 712 [1.0 0.0] 713 [0.0 1.0] 714 Emission parameters: 715 [(1.5851786151779879, 0.57264580740105153), (1.5945035666064733, 0.57928632238916189)] 716 Initial probabilities: [0.38546857052811945, 0.61453142947188055] 717 718 687 719 Training sequences of length 0 are gracefully ignored: 688 720 sage: m.baum_welch([]) 689 721 sage: m.baum_welch([([],1)]) … … cdef ghmm_cseq* to_cseq(seq) except NULL 713 745 sequences a ValueError is raised, since GHMM doesn't treat 714 746 this degenerate case well. 715 747 """ 716 if isinstance(seq, list) and len(seq) > 0 and not isinstance(seq[0], tuple):748 if isinstance(seq, list) and len(seq) > 0 and not isinstance(seq[0], (list, tuple)): 717 749 seq = TimeSeries(seq) 718 750 if isinstance(seq, TimeSeries): 719 751 seq = [(seq,float(1))] … … cdef class GaussianMixtureHiddenMarkovMo 787 819 pi -- list of floats that sums to 1.0; these are 788 820 the initial probabilities of each hidden state 789 821 name -- (default: None); a string 822 normalize -- (optional; default=True) whether or not to normalize 823 the model so the probabilities add to 1 790 824 791 825 EXAMPLES: 792 826 sage: A = [[0.5,0.5],[0.5,0.5]] … … cdef class GaussianMixtureHiddenMarkovMo 798 832 [0.5 0.5] 799 833 [0.5 0.5] 800 834 Emission parameters: 801 [[( 0.5, (0.0, 1.0)), (0.10000000000000001, (1.0, 10000.0))], [(1.0, (1.0, 1.0)), (0.0, (0.0, 0.10000000000000001))]]835 [[(1.0, (0.0, 1.0)), (0.10000000000000001, (1.0, 10000.0))], [(1.0, (1.0, 1.0)), (0.0, (0.0, 0.10000000000000001))]] 802 836 Initial probabilities: [1.0, 0.0] 837 803 838 804 839 TESTS: 805 840 We test that standard deviations must be positive: … … cdef class GaussianMixtureHiddenMarkovMo 814 849 ... 815 850 ValueError: number of Gaussian mixtures must be positive 816 851 """ 817 def __init__(self, A, B, pi=None, name=None ):852 def __init__(self, A, B, pi=None, name=None, normalize=True): 818 853 """ 819 854 EXAMPLES: 820 855 sage: hmm.GaussianMixtureHiddenMarkovModel([[1]], [[(1,(0,1))]], [1], name='simple') … … cdef class GaussianMixtureHiddenMarkovMo 922 957 923 958 OUTPUT: 924 959 list of lists of tuples (weight, (mu, sigma)) 925 p 960 926 961 EXAMPLES: 927 962 sage: m = hmm.GaussianMixtureHiddenMarkovModel([[0.5,0.5],[0.5,0.5]], [[(0.5,(0.0,1.0)), (0.1,(1,10000))],[(1,(1,1))]], [1,0]) 928 963 sage: m.emission_parameters() 929 [[(0.5, (0.0, 1.0)), (0.10000000000000001, (1.0, 10000.0))], 930 [(1.0, (1.0, 1.0)), (0.0, (0.0, 0.0))]] 964 [[(1.0, (0.0, 1.0)), (0.10000000000000001, (1.0, 10000.0))], [(1.0, (1.0, 1.0)), (0.0, (0.0, 0.0))]] 931 965 """ 932 966 cdef Py_ssize_t i,j 933 967 -
sage/stats/hmm/hmm.pyx
diff -r 61bf3243f0ac -r 3dd3cb248951 sage/stats/hmm/hmm.pyx
a b cdef class HiddenMarkovModel: 53 53 [0.0 1.0] 54 54 Initial probabilities: [1.0] 55 55 """ 56 cdef Py_ssize_t n 57 56 58 # Convert A to a matrix 57 59 if not is_Matrix(A): 58 60 n = len(A) … … cdef class HiddenMarkovModel: 93 95 self.A = A 94 96 self.B = B 95 97 98 # Check that all entries of A are nonnegative and raise a 99 # ValueError otherwise. Note that we don't check B since it 100 # has entries that are potentially negative in the continuous 101 # case. But GHMM prints clear warnings when the emission 102 # probabilities are negative, i.e., it does not silently give 103 # wrong results like it does for negative transition 104 # probabilities. 105 cdef Py_ssize_t i, j 106 for i from 0 <= i < self.A._nrows: 107 for j from 0 <= j < self.A._ncols: 108 if self.A.get_unsafe_double(i,j) < 0: 109 raise ValueError, "each transition probability must be nonnegative" 110 111 96 112 97 113 cdef class DiscreteHiddenMarkovModel(HiddenMarkovModel): 98 99 def __init__(self, A, B, pi=None, emission_symbols=None, name=None): 114 """ 115 Create a discrete hidden Markov model. 116 117 hmm.DiscreteHiddenMarkovModel(A, B, pi=None, emission_symbols=None, name=None, normalize=True) 118 n 119 INPUTS: 120 A -- square matrix of doubles; the state change probabilities 121 B -- matrix of doubles; emission probabilities 122 pi -- list of floats; probabilities for each initial state 123 emission_state -- list of B.ncols() symbols (just used for printing) 124 name -- (optional) name of the model 125 normalize -- (optional; default=True) whether or not to normalize 126 the model so the probabilities add to 1 127 128 EXAMPLES: 129 We create a discrete HMM with 2 internal states on an alphabet of size 2. 130 sage: hmm.DiscreteHiddenMarkovModel([[0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1]) 131 Discrete Hidden Markov Model with 2 States and 2 Emissions 132 Transition matrix: 133 [0.2 0.8] 134 [0.5 0.5] 135 Emission matrix: 136 [1.0 0.0] 137 [0.0 1.0] 138 Initial probabilities: [0.0, 1.0] 139 140 The transition probabilities must be nonnegative: 141 sage: hmm.DiscreteHiddenMarkovModel([[-0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1]) 142 Traceback (most recent call last): 143 ... 144 ValueError: each transition probability must be nonnegative 145 146 The transition probabilities are by default automatically normalized: 147 sage: a = hmm.DiscreteHiddenMarkovModel([[0.2,0.3],[0.5,0.5]], [[1,0],[0,1]], [0,1]) 148 sage: a.transition_matrix() 149 [0.4 0.6] 150 [0.5 0.5] 151 """ 152 def __init__(self, A, B, pi=None, emission_symbols=None, name=None, normalize=True): 100 153 """ 101 INPUTS:102 A -- square matrix of doubles; the state change probabilities103 B -- matrix of doubles; emission probabilities104 pi -- list of floats; probabilities for each initial state105 emission_state -- list of B.ncols() symbols (just used for printing)106 name -- (optional) name of the model107 108 154 EXAMPLES: 109 We create a discrete HMM with 2 internal states on an alphabet of size 2. 110 sage: a = hmm.DiscreteHiddenMarkovModel([[0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1]) 155 sage: hmm.DiscreteHiddenMarkovModel([[1]], [[0.0,1.0]], [1]) 156 Discrete Hidden Markov Model with 1 States and 2 Emissions 157 Transition matrix: 158 [1.0] 159 Emission matrix: 160 [0.0 1.0] 161 Initial probabilities: [1.0] 111 162 """ 112 163 # memory has not all been setup yet. 113 164 self.initialized = False … … cdef class DiscreteHiddenMarkovModel(Hid 203 254 204 255 self.m.s = states 205 256 self.initialized = True 257 if normalize: 258 self.normalize() 206 259 207 260 def __cmp__(self, other): 208 261 """ … … cdef class DiscreteHiddenMarkovModel(Hid 219 272 names compare to be equal. 220 273 221 274 EXAMPLES: 222 sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,2] )275 sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,2], normalize=False) 223 276 sage: m.__cmp__(m) 224 277 0 225 278 226 279 Note that the name doesn't matter: 227 sage: n = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,2] )280 sage: n = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,2], normalize=False) 228 281 sage: m.__cmp__(n) 229 282 0 230 283 … … cdef class DiscreteHiddenMarkovModel(Hid 460 513 """ 461 514 ghmm_dmodel_normalize(self.m) 462 515 463 def sample_single(self, long length): 464 """ 465 Return a single sample computed using this Hidden Markov Model. 466 467 EXAMPLE: 468 sage: set_random_seed(0) 469 sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[1,0],[0,1]], [0,1]) 470 sage: a.sample_single(20) 471 [1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 472 sage: a.sample_single(1000).count(0) 473 113 474 475 If the emission symbols are set 476 sage: set_random_seed(0) 477 sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.1,0.9]], [[1,0],[0,1]], [0,1], ['up', 'down']) 478 sage: print a.sample_single(10) 479 ['down', 'up', 'down', 'down', 'up', 'down', 'down', 'up', 'down', 'up'] 480 481 """ 482 seed = random() 483 cdef ghmm_dseq *d = ghmm_dmodel_generate_sequences(self.m, seed, length, 1, length) 484 cdef Py_ssize_t i 485 v = [d.seq[0][i] for i in range(length)] 486 ghmm_dseq_free(&d) 487 if self._emission_symbols_dict: 488 w = self._emission_symbols 489 return [w[i] for i in v] 490 else: 491 return v 492 493 def sample(self, long length, long number): 516 def sample(self, long length, number=None): 494 517 """ 495 518 Return number samples from this HMM of given length. 519 520 INPUT: 521 length -- positive integer 522 number -- (default: None) if given, compute list of this many sample sequences 523 524 OUTPUT: 525 if number is not given, return a single TimeSeries. 526 if number is given, return a list of TimeSeries. 496 527 497 528 EXAMPLES: 498 529 sage: set_random_seed(0) 499 530 sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[1,0],[0,1]], [0,1]) 500 531 sage: print a.sample(10, 3) 501 532 [[1, 0, 1, 1, 0, 1, 1, 0, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]] 533 sage: a.sample(15) 534 [1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1] 535 sage: list(a.sample(1000)).count(0) 536 95 537 538 If the emission symbols are set 539 sage: set_random_seed(0) 540 sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.1,0.9]], [[1,0],[0,1]], [0,1], ['up', 'down']) 541 sage: a.sample(10) 542 ['down', 'up', 'down', 'down', 'up', 'down', 'down', 'up', 'down', 'up'] 502 543 """ 503 544 seed = random() 545 if number is None: 546 number = 1 547 single = True 548 else: 549 single = False 504 550 cdef ghmm_dseq *d = ghmm_dmodel_generate_sequences(self.m, seed, length, number, length) 505 551 cdef Py_ssize_t i, j 506 552 v = [[d.seq[j][i] for i in range(length)] for j in range(number)] 507 553 if self._emission_symbols_dict: 508 554 w = self._emission_symbols 509 return [[w[i] for i in z] for z in v] 510 else: 511 return v 555 v = [[w[i] for i in z] for z in v] 556 if single: 557 return v[0] 558 return v 512 559 513 560 def emission_symbols(self): 514 561 """ … … cdef class DiscreteHiddenMarkovModel(Hid 533 580 sage: a.set_emission_symbols([3,5]) 534 581 sage: a.emission_symbols() 535 582 [3, 5] 536 sage: a.sample _single(10)583 sage: a.sample(10) 537 584 [5, 3, 5, 5, 3, 5, 5, 3, 5, 3] 538 585 sage: a.set_emission_symbols([pi,5/9+e]) 539 sage: a.sample _single(10)586 sage: a.sample(10) 540 587 [e + 5/9, e + 5/9, e + 5/9, e + 5/9, e + 5/9, e + 5/9, pi, pi, e + 5/9, pi] 541 588 """ 542 589 if emission_symbols is None: … … cdef class DiscreteHiddenMarkovModel(Hid 707 754 sage: 1.0/6 708 755 0.166666666666667 709 756 757 We run Baum-Welch on a single sequence: 758 sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.5,0.5]], [[0.5,0.5],[0.5,0.5]], [0.5,0.5]) 759 sage: a.baum_welch([1,0,1]*10) 760 sage: a 761 Discrete Hidden Markov Model with 2 States and 2 Emissions 762 Transition matrix: 763 [0.5 0.5] 764 [0.5 0.5] 765 Emission matrix: 766 [0.333333333333 0.666666666667] 767 [0.333333333333 0.666666666667] 768 Initial probabilities: [0.5, 0.5] 769 710 770 TESTS: 711 771 We test training with non-default string symbols: 712 772 sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.5,0.5]], [[0.5,0.5],[0.5,0.5]], [0.5,0.5], ['up','down']) … … def unpickle_discrete_hmm_v0(A, B, pi, e 783 843 def unpickle_discrete_hmm_v0(A, B, pi, emission_symbols,name): 784 844 """ 785 845 TESTS: 786 sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[ 1,1]], [1,0], name='test model')846 sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[0.5,0.5]], [1,0], name='test model') 787 847 sage: loads(dumps(m)) == m 788 848 True 789 849 sage: loads(dumps(m)).name() … … def unpickle_discrete_hmm_v0(A, B, pi, e 795 855 [0.1 0.9] 796 856 Emission matrix: 797 857 [0.0 1.0] 798 [ 1.0 1.0]858 [0.5 0.5] 799 859 Initial probabilities: [1.0, 0.0] 800 860 Emission symbols: ['a', 'b'] 801 861 """