Ticket #3773: sage-3773-3726-referee_response.patch

File sage-3773-3726-referee_response.patch, 20.6 KB (added by was, 13 years ago)

this addresses all josh's comments on #3773 and also #3726

  • sage/matrix/matrix_real_double_dense.pxd

    # HG changeset patch
    # User William Stein <wstein@gmail.com>
    # Date 1218440054 25200
    # Node ID 3dd3cb24895105810bc0a01ad697ac5e64635fe6
    # Parent  61bf3243f0aca6aa5e1f3d9d59d701e59a3cb468
    trac #3726 and #3773 -- address all the referee comments.
    
    diff -r 61bf3243f0ac -r 3dd3cb248951 sage/matrix/matrix_real_double_dense.pxd
    a b cdef class Matrix_real_double_dense(matr 
    1515    cdef int _LU_valid
    1616    cdef _c_compute_LU(self)
    1717    cdef set_unsafe_double(self, Py_ssize_t i, Py_ssize_t j, double value)
     18    cdef double get_unsafe_double(self, Py_ssize_t i, Py_ssize_t j)
  • sage/matrix/matrix_real_double_dense.pyx

    diff -r 61bf3243f0ac -r 3dd3cb248951 sage/matrix/matrix_real_double_dense.pyx
    a b cdef class Matrix_real_double_dense(matr 
    227227
    228228    cdef set_unsafe_double(self, Py_ssize_t i, Py_ssize_t j, double value):
    229229        gsl_matrix_set(self._matrix,i,j,value)
    230    
     230
     231    cdef double get_unsafe_double(self, Py_ssize_t i, Py_ssize_t j):
     232        return gsl_matrix_get(self._matrix,i,j)
    231233
    232234    ########################################################################
    233235    # LEVEL 2 functionality
  • sage/stats/hmm/chmm.pyx

    diff -r 61bf3243f0ac -r 3dd3cb248951 sage/stats/hmm/chmm.pyx
    a b cdef class GaussianHiddenMarkovModel(Con 
    106106        pi -- list of floats that sums to 1.0; these are
    107107              the initial probabilities of each hidden state
    108108        name -- (default: None); a string
     109        normalize -- (optional; default=True) whether or not to
     110                     normalize the model so the probabilities add to 1
    109111
    110112    EXAMPLES:
    111113    Define the transition matrix:
    cdef class GaussianHiddenMarkovModel(Con 
    128130        [(0.0, 1.0), (-1.0, 0.5), (1.0, 0.20000000000000001)]
    129131        Initial probabilities: [1.0, 0.0, 0.0]
    130132    """
    131     def __init__(self, A, B, pi=None, name=None):
     133    def __init__(self, A, B, pi=None, name=None, normalize=True):
    132134        """
    133135        EXAMPLES:
    134136        We make a very simple model:
    cdef class GaussianHiddenMarkovModel(Con 
    148150            Emission parameters:
    149151            []
    150152            Initial probabilities: []
     153
     154        TESTS:
     155        We test normalize=False:
     156            sage: hmm.GaussianHiddenMarkovModel([[1,100],[0,1]], [(0,1),(0,2)], [1/2,1/2], normalize=False).transition_matrix()
     157            [  1.0 100.0]
     158            [  0.0   1.0]           
    151159        """
    152160        ContinuousHiddenMarkovModel.__init__(self, A, B, pi, name=name)
    153161
    cdef class GaussianHiddenMarkovModel(Con 
    164172        self._initialize_state(pi)
    165173        self.m.class_change = NULL
    166174        self.initialized = True
     175        if normalize:
     176            self.normalize()
    167177
    168178    def _initialize_state(self, pi):
    169179        """
    cdef class GaussianHiddenMarkovModel(Con 
    293303        names compare to be equal.
    294304
    295305        EXAMPLES:
    296             sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,2], "Test 1")
     306            sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,2], "Test 1", normalize=False)
    297307            sage: m.__cmp__(m)
    298308            0
    299309
    300310        Note that the name doesn't matter:
    301             sage: n = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,2], "Test 2")
     311            sage: n = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,2], "Test 2", normalize=False)
    302312            sage: m.__cmp__(n)
    303313            0
    304314
    cdef class GaussianHiddenMarkovModel(Con 
    485495        if ghmm_cmodel_normalize(self.m):
    486496            raise RuntimeError, "error normalizing model (note that model may still have been partly changed)"
    487497
    488     def sample(self, long length, long number):
     498    def sample(self, long length, number=None):
    489499        """
    490500        Return number samples from this HMM of given length.
    491501
    cdef class GaussianHiddenMarkovModel(Con 
    493503            length -- positive integer
    494504           
    495505        OUTPUT:
    496             a list of number TimeSeries each of the given length
     506            if number is not given, return a single TimeSeries.
     507            if number is given, return a list of TimeSeries.
    497508
    498509        EXAMPLES:
    499510            sage: m = hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1])
    cdef class GaussianHiddenMarkovModel(Con 
    501512            sage: m.sample(5,2)
    502513            [[-2.2808, -0.0699, 0.1858, 1.3624, 1.8252],
    503514             [0.0080, 0.1244, 0.5098, 0.9961, 0.4710]]
     515
     516            sage: m = hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1])
     517            sage: set_random_seed(0)
     518            sage: m.sample(5)
     519            [-2.2808, -0.0699, 0.1858, 1.3624, 1.8252]
    504520        """
     521        if number is None:
     522            single = True
     523            number = 1
     524        else:
     525            single = False
    505526        seed = random()
    506527        cdef ghmm_cseq *d = ghmm_cmodel_generate_sequences(self.m, seed, length, number, length)
    507528        cdef Py_ssize_t i, j
    cdef class GaussianHiddenMarkovModel(Con 
    512533            for i from 0 <= i < length:
    513534                T._values[i] = d.seq[j][i]
    514535            ans.append(T)
     536        if single:
     537            return ans[0]
    515538        return ans
    516539        # The line below would replace the above by something that returns a list of lists.
    517540        #return [[d.seq[j][i] for i in range(length)] for j in range(number)]
    518  
    519     def sample_single(self, long length):
    520         """
    521         Return a single sample computed using this Gaussian Hidden Markov Model.
    522 
    523         INPUT:
    524             length -- positive integer
    525         OUTPUT:
    526             a TimeSeries
    527 
    528         EXAMPLES:
    529             sage: m = hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1])
    530             sage: set_random_seed(0)
    531             sage: m.sample_single(5)
    532             [-2.2808, -0.0699, 0.1858, 1.3624, 1.8252]
    533         """
    534         return self.sample(length,1)[0]
    535541
    536542
    537543           
    cdef class GaussianHiddenMarkovModel(Con 
    657663        Baum-Welch algorithm to increase the probability of observing O.
    658664
    659665        INPUT:
    660             training_seqs -- a list of lists of emission symbols; all sequences of length 0 are ignored.
     666            training_seqs -- a list of lists of emission symbols; all sequences of
     667                      length 0 are ignored.
    661668            max_iter -- integer or None (default: 10000) maximum number
    662669                      of Baum-Welch steps to take
    663670            log_likehood_cutoff -- positive float or None (default: 0.00001);
    cdef class GaussianHiddenMarkovModel(Con 
    684691            [(1.0, 0.01)]
    685692            Initial probabilities: [1.0]
    686693
     694        We train using a list of lists:
     695            sage: m = hmm.GaussianHiddenMarkovModel([[1,0],[0,1]], [(0,1),(0,2)], [1/2,1/2])
     696            sage: m.baum_welch([[1,2,], [3,2]])
     697            sage: m
     698            Gaussian Hidden Markov Model with 2 States
     699            Transition matrix:
     700            [1.0 0.0]
     701            [0.0 1.0]
     702            Emission parameters:
     703            [(1.946539535984342, 0.70508296299241024), (2.0208156913293394, 0.70680033099099593)]
     704            Initial probabilities: [0.28024729110782109, 0.71975270889217891]
     705
     706        We train the same model, but waiting one of the lists more than the other.
     707            sage: m = hmm.GaussianHiddenMarkovModel([[1,0],[0,1]], [(0,1),(0,2)], [1/2,1/2])
     708            sage: m.baum_welch([([1,2,],10), ([3,2],1)])
     709            sage: m
     710            Gaussian Hidden Markov Model with 2 States
     711            Transition matrix:
     712            [1.0 0.0]
     713            [0.0 1.0]
     714            Emission parameters:
     715            [(1.5851786151779879, 0.57264580740105153), (1.5945035666064733, 0.57928632238916189)]
     716            Initial probabilities: [0.38546857052811945, 0.61453142947188055]
     717       
     718
    687719        Training sequences of length 0 are gracefully ignored:
    688720            sage: m.baum_welch([])
    689721            sage: m.baum_welch([([],1)])
    cdef ghmm_cseq* to_cseq(seq) except NULL 
    713745    sequences a ValueError is raised, since GHMM doesn't treat
    714746    this degenerate case well.
    715747    """
    716     if isinstance(seq, list) and len(seq) > 0 and not isinstance(seq[0], tuple):
     748    if isinstance(seq, list) and len(seq) > 0 and not isinstance(seq[0], (list, tuple)):
    717749        seq = TimeSeries(seq)
    718750    if isinstance(seq, TimeSeries):
    719751        seq = [(seq,float(1))]
    cdef class GaussianMixtureHiddenMarkovMo 
    787819        pi -- list of floats that sums to 1.0; these are
    788820              the initial probabilities of each hidden state
    789821        name -- (default: None); a string
     822        normalize -- (optional; default=True) whether or not to normalize
     823                     the model so the probabilities add to 1
    790824
    791825    EXAMPLES:
    792826        sage: A  = [[0.5,0.5],[0.5,0.5]]
    cdef class GaussianMixtureHiddenMarkovMo 
    798832        [0.5 0.5]
    799833        [0.5 0.5]
    800834        Emission parameters:
    801         [[(0.5, (0.0, 1.0)), (0.10000000000000001, (1.0, 10000.0))], [(1.0, (1.0, 1.0)), (0.0, (0.0, 0.10000000000000001))]]
     835        [[(1.0, (0.0, 1.0)), (0.10000000000000001, (1.0, 10000.0))], [(1.0, (1.0, 1.0)), (0.0, (0.0, 0.10000000000000001))]]
    802836        Initial probabilities: [1.0, 0.0]
     837       
    803838
    804839    TESTS:
    805840    We test that standard deviations must be positive:
    cdef class GaussianMixtureHiddenMarkovMo 
    814849        ...
    815850        ValueError: number of Gaussian mixtures must be positive
    816851    """
    817     def __init__(self, A, B, pi=None, name=None):
     852    def __init__(self, A, B, pi=None, name=None, normalize=True):
    818853        """
    819854        EXAMPLES:
    820855            sage: hmm.GaussianMixtureHiddenMarkovModel([[1]], [[(1,(0,1))]], [1], name='simple')
    cdef class GaussianMixtureHiddenMarkovMo 
    922957
    923958        OUTPUT:
    924959           list of lists of tuples (weight, (mu, sigma))
    925 p
     960
    926961        EXAMPLES:
    927962            sage: m = hmm.GaussianMixtureHiddenMarkovModel([[0.5,0.5],[0.5,0.5]], [[(0.5,(0.0,1.0)), (0.1,(1,10000))],[(1,(1,1))]], [1,0])
    928963            sage: m.emission_parameters()
    929             [[(0.5, (0.0, 1.0)), (0.10000000000000001, (1.0, 10000.0))],
    930              [(1.0, (1.0, 1.0)), (0.0, (0.0, 0.0))]]
     964            [[(1.0, (0.0, 1.0)), (0.10000000000000001, (1.0, 10000.0))], [(1.0, (1.0, 1.0)), (0.0, (0.0, 0.0))]]
    931965        """
    932966        cdef Py_ssize_t i,j
    933967       
  • sage/stats/hmm/hmm.pyx

    diff -r 61bf3243f0ac -r 3dd3cb248951 sage/stats/hmm/hmm.pyx
    a b cdef class HiddenMarkovModel: 
    5353            [0.0 1.0]
    5454            Initial probabilities: [1.0]
    5555        """
     56        cdef Py_ssize_t n
     57       
    5658        # Convert A to a matrix
    5759        if not is_Matrix(A):
    5860            n = len(A)
    cdef class HiddenMarkovModel: 
    9395        self.A = A
    9496        self.B = B
    9597
     98        # Check that all entries of A are nonnegative and raise a
     99        # ValueError otherwise. Note that we don't check B since it
     100        # has entries that are potentially negative in the continuous
     101        # case.  But GHMM prints clear warnings when the emission
     102        # probabilities are negative, i.e., it does not silently give
     103        # wrong results like it does for negative transition
     104        # probabilities.
     105        cdef Py_ssize_t i, j
     106        for i from 0 <= i < self.A._nrows:
     107            for j from 0 <= j < self.A._ncols:
     108                if self.A.get_unsafe_double(i,j) < 0:
     109                    raise ValueError, "each transition probability must be nonnegative"
     110               
     111
    96112
    97113cdef class DiscreteHiddenMarkovModel(HiddenMarkovModel):
    98    
    99     def __init__(self, A, B, pi=None, emission_symbols=None, name=None):
     114    """
     115    Create a discrete hidden Markov model.
     116
     117    hmm.DiscreteHiddenMarkovModel(A, B, pi=None, emission_symbols=None, name=None, normalize=True)
     118n   
     119    INPUTS:
     120        A  -- square matrix of doubles; the state change probabilities
     121        B  -- matrix of doubles; emission probabilities
     122        pi -- list of floats; probabilities for each initial state
     123        emission_state -- list of B.ncols() symbols (just used for printing)
     124        name -- (optional) name of the model
     125        normalize -- (optional; default=True) whether or not to normalize
     126                     the model so the probabilities add to 1
     127
     128    EXAMPLES:
     129    We create a discrete HMM with 2 internal states on an alphabet of size 2.
     130        sage: hmm.DiscreteHiddenMarkovModel([[0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1])
     131        Discrete Hidden Markov Model with 2 States and 2 Emissions
     132        Transition matrix:
     133        [0.2 0.8]
     134        [0.5 0.5]
     135        Emission matrix:
     136        [1.0 0.0]
     137        [0.0 1.0]
     138        Initial probabilities: [0.0, 1.0]
     139
     140    The transition probabilities must be nonnegative:
     141        sage: hmm.DiscreteHiddenMarkovModel([[-0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1])
     142        Traceback (most recent call last):
     143        ...
     144        ValueError: each transition probability must be nonnegative
     145
     146    The transition probabilities are by default automatically normalized:
     147        sage: a = hmm.DiscreteHiddenMarkovModel([[0.2,0.3],[0.5,0.5]], [[1,0],[0,1]], [0,1])
     148        sage: a.transition_matrix()
     149        [0.4 0.6]
     150        [0.5 0.5]
     151    """
     152    def __init__(self, A, B, pi=None, emission_symbols=None, name=None, normalize=True):
    100153        """
    101         INPUTS:
    102             A  -- square matrix of doubles; the state change probabilities
    103             B  -- matrix of doubles; emission probabilities
    104             pi -- list of floats; probabilities for each initial state
    105             emission_state -- list of B.ncols() symbols (just used for printing)
    106             name -- (optional) name of the model
    107 
    108154        EXAMPLES:
    109         We create a discrete HMM with 2 internal states on an alphabet of size 2.
    110             sage: a = hmm.DiscreteHiddenMarkovModel([[0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1])
     155            sage: hmm.DiscreteHiddenMarkovModel([[1]], [[0.0,1.0]], [1])
     156            Discrete Hidden Markov Model with 1 States and 2 Emissions
     157            Transition matrix:
     158            [1.0]
     159            Emission matrix:
     160            [0.0 1.0]
     161            Initial probabilities: [1.0]
    111162        """
    112163        # memory has not all been setup yet.
    113164        self.initialized = False 
    cdef class DiscreteHiddenMarkovModel(Hid 
    203254               
    204255        self.m.s = states
    205256        self.initialized = True
     257        if normalize:
     258            self.normalize()
    206259
    207260    def __cmp__(self, other):
    208261        """
    cdef class DiscreteHiddenMarkovModel(Hid 
    219272        names compare to be equal.
    220273
    221274        EXAMPLES:
    222             sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,2])
     275            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,2], normalize=False)
    223276            sage: m.__cmp__(m)
    224277            0
    225278
    226279        Note that the name doesn't matter:
    227             sage: n = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,2])
     280            sage: n = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,2], normalize=False)
    228281            sage: m.__cmp__(n)
    229282            0
    230283
    cdef class DiscreteHiddenMarkovModel(Hid 
    460513        """
    461514        ghmm_dmodel_normalize(self.m)
    462515
    463     def sample_single(self, long length):
    464         """
    465         Return a single sample computed using this Hidden Markov Model.
    466        
    467         EXAMPLE:
    468             sage: set_random_seed(0)
    469             sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[1,0],[0,1]], [0,1])
    470             sage: a.sample_single(20)
    471             [1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    472             sage: a.sample_single(1000).count(0)
    473             113
    474 
    475         If the emission symbols are set
    476             sage: set_random_seed(0)
    477             sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.1,0.9]], [[1,0],[0,1]], [0,1], ['up', 'down'])
    478             sage: print a.sample_single(10)
    479             ['down', 'up', 'down', 'down', 'up', 'down', 'down', 'up', 'down', 'up']
    480            
    481         """
    482         seed = random()
    483         cdef ghmm_dseq *d = ghmm_dmodel_generate_sequences(self.m, seed, length, 1, length)
    484         cdef Py_ssize_t i
    485         v = [d.seq[0][i] for i in range(length)]
    486         ghmm_dseq_free(&d)
    487         if self._emission_symbols_dict:
    488             w = self._emission_symbols
    489             return [w[i] for i in v]
    490         else:
    491             return v
    492 
    493     def sample(self, long length, long number):
     516    def sample(self, long length, number=None):
    494517        """
    495518        Return number samples from this HMM of given length.
     519
     520        INPUT:
     521            length -- positive integer
     522            number -- (default: None) if given, compute list of this many sample sequences
     523
     524        OUTPUT:
     525            if number is not given, return a single TimeSeries.
     526            if number is given, return a list of TimeSeries.
    496527
    497528        EXAMPLES:
    498529            sage: set_random_seed(0)
    499530            sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[1,0],[0,1]], [0,1])
    500531            sage: print a.sample(10, 3)
    501532            [[1, 0, 1, 1, 0, 1, 1, 0, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
     533            sage: a.sample(15)
     534            [1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1]
     535            sage: list(a.sample(1000)).count(0)
     536            95
     537
     538        If the emission symbols are set
     539            sage: set_random_seed(0)
     540            sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.1,0.9]], [[1,0],[0,1]], [0,1], ['up', 'down'])
     541            sage: a.sample(10)
     542            ['down', 'up', 'down', 'down', 'up', 'down', 'down', 'up', 'down', 'up']
    502543        """
    503544        seed = random()
     545        if number is None:
     546            number = 1
     547            single = True
     548        else:
     549            single = False
    504550        cdef ghmm_dseq *d = ghmm_dmodel_generate_sequences(self.m, seed, length, number, length)
    505551        cdef Py_ssize_t i, j
    506552        v = [[d.seq[j][i] for i in range(length)] for j in range(number)]
    507553        if self._emission_symbols_dict:
    508554            w = self._emission_symbols
    509             return [[w[i] for i in z] for z in v]
    510         else:
    511             return v
     555            v = [[w[i] for i in z] for z in v]
     556        if single:
     557            return v[0]
     558        return v
    512559
    513560    def emission_symbols(self):
    514561        """
    cdef class DiscreteHiddenMarkovModel(Hid 
    533580            sage: a.set_emission_symbols([3,5])
    534581            sage: a.emission_symbols()
    535582            [3, 5]
    536             sage: a.sample_single(10)
     583            sage: a.sample(10)
    537584            [5, 3, 5, 5, 3, 5, 5, 3, 5, 3]
    538585            sage: a.set_emission_symbols([pi,5/9+e])
    539             sage: a.sample_single(10)
     586            sage: a.sample(10)
    540587            [e + 5/9, e + 5/9, e + 5/9, e + 5/9, e + 5/9, e + 5/9, pi, pi, e + 5/9, pi]
    541588        """
    542589        if emission_symbols is None:
    cdef class DiscreteHiddenMarkovModel(Hid 
    707754            sage: 1.0/6
    708755            0.166666666666667
    709756
     757        We run Baum-Welch on a single sequence:
     758            sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.5,0.5]], [[0.5,0.5],[0.5,0.5]], [0.5,0.5])
     759            sage: a.baum_welch([1,0,1]*10)
     760            sage: a
     761            Discrete Hidden Markov Model with 2 States and 2 Emissions
     762            Transition matrix:
     763            [0.5 0.5]
     764            [0.5 0.5]
     765            Emission matrix:
     766            [0.333333333333 0.666666666667]
     767            [0.333333333333 0.666666666667]
     768            Initial probabilities: [0.5, 0.5]
     769
    710770        TESTS:
    711771        We test training with non-default string symbols:
    712772            sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.5,0.5]], [[0.5,0.5],[0.5,0.5]], [0.5,0.5], ['up','down'])
    def unpickle_discrete_hmm_v0(A, B, pi, e 
    783843def unpickle_discrete_hmm_v0(A, B, pi, emission_symbols,name):
    784844    """
    785845    TESTS:
    786         sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,0], name='test model')
     846        sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[0.5,0.5]], [1,0], name='test model')
    787847        sage: loads(dumps(m)) == m
    788848        True
    789849        sage: loads(dumps(m)).name()
    def unpickle_discrete_hmm_v0(A, B, pi, e 
    795855        [0.1 0.9]
    796856        Emission matrix:
    797857        [0.0 1.0]
    798         [1.0 1.0]
     858        [0.5 0.5]
    799859        Initial probabilities: [1.0, 0.0]
    800860        Emission symbols: ['a', 'b']
    801861    """