Ticket #13346: trac13346_kolakoski_cython-sl.patch

File trac13346_kolakoski_cython-sl.patch, 15.4 KB (added by slabbe, 10 years ago)
  • sage/combinat/words/word.py

    # HG changeset patch
    # User Sebastien Labbe <slabqc at gmail.com>
    # Date 1344542950 14400
    # Node ID fc5abecf9124c9af15c96e0af7d5d021a863743a
    # Parent  90cdc6a8ad4e4bed83142b4ca0e6c551acd73204
    Add cython implementation for Kolakoski Word
    
    diff --git a/sage/combinat/words/word.py b/sage/combinat/words/word.py
    a b from sage.combinat.words.finite_word imp 
    2727from sage.combinat.words.infinite_word import InfiniteWord_class
    2828from word_datatypes import (WordDatatype_str,
    2929                            WordDatatype_list,
    30                             WordDatatype_tuple)
     30                            WordDatatype_tuple,
     31                            WordDatatype_Kolakoski)
    3132from word_infinite_datatypes import (
    3233                            WordDatatype_iter_with_caching,
    3334                            WordDatatype_iter,
    def Word(data=None, alphabet=None, lengt 
    170171
    171172    return parent(data=data, length=length, datatype=datatype, caching=caching)
    172173
     174##### Specific words #####
     175class KolakoskiWord(WordDatatype_Kolakoski, InfiniteWord_class):
     176    r"""
     177
     178    EXAMPLES::
     179
     180        sage: K = words.KolakoskiWord(algorithm='Bernardi')
     181        sage: K
     182        word: 1221121221221121122121121221121121221221...
     183
     184    TESTS:
     185
     186    Pickle is supported ::
     187
     188        sage: K = words.KolakoskiWord(algorithm='Bernardi')
     189        sage: loads(dumps(K))
     190        word: 1221121221221121122121121221121121221221...
     191    """
     192    pass
     193
    173194#######################################################################
    174195#                                                                     #
    175196#                    Concrete word classes                            #
  • sage/combinat/words/word_datatypes.pyx

    diff --git a/sage/combinat/words/word_datatypes.pyx b/sage/combinat/words/word_datatypes.pyx
    a b  
    11r"""
    22Datatypes for finite words
     3
     4Each datatype class must define at least two methods:
     5
     6    * ``__iter__(self)``
     7    * ``__getitem__(self, key)``
     8
     9The first allows iteration over a word and the second allows to get the
     10i-th letter of a word.
    311"""
    412#*****************************************************************************
    513#       Copyright (C) 2009 Franco Saliola <saliola@gmail.com>
    Datatypes for finite words 
    1422#*****************************************************************************
    1523
    1624from sage.structure.sage_object cimport SageObject
     25import itertools
    1726
     27###############################
     28# Base class for Word datatypes
     29###############################
    1830cdef class WordDatatype(SageObject):
    1931    r"""
    2032    The generic WordDatatype class.
    cdef class WordDatatype(SageObject): 
    5062        """
    5163        return self.__class__, (self._parent, self._data)
    5264
     65################################################
     66# Basic Python Word datatypes (list, str, tuple)
     67################################################
    5368cdef class WordDatatype_list(WordDatatype):
    5469    r"""
    5570    Datatype class for words defined by lists.
    cdef class WordDatatype_tuple(WordDataty 
    11141129
    11151130    __add__ = __mul__
    11161131
     1132##########################################################################
     1133# Specific Word datatypes (Kolakoski word, Fixed point of morphisms, etc.)
     1134##########################################################################
     1135cdef class WordDatatype_Kolakoski(WordDatatype):
     1136    r"""
     1137    Word datatype class for the Kolakoski word.
     1138
     1139    This is a cython implementation inspired from the 10 lines of C code
     1140    written by Dominique Bernardi and shared during Sage Days 28 in Orsay,
     1141    France, in January 2011.
     1142
     1143    INPUT:
     1144
     1145    - ``parent`` - the parent
     1146
     1147    EXAMPLES::
     1148
     1149        sage: from sage.combinat.words.word_datatypes import WordDatatype_Kolakoski
     1150        sage: parent = Words([1,2])
     1151        sage: K = WordDatatype_Kolakoski(parent)
     1152        sage: K
     1153        <type 'sage.combinat.words.word_datatypes.WordDatatype_Kolakoski'>
     1154        sage: K[0]
     1155        1
     1156        sage: K[1]
     1157        2
     1158    """
     1159    cdef public _limit
     1160    def __init__(self, parent):
     1161        r"""
     1162        Constructor. See documentation of WordDatatype_Kolakoski for more
     1163        details.
     1164
     1165        EXAMPLES::
     1166
     1167            sage: from sage.combinat.words.word_datatypes import WordDatatype_Kolakoski
     1168            sage: parent = Words([1,2])
     1169            sage: WordDatatype_Kolakoski(parent)
     1170            <type 'sage.combinat.words.word_datatypes.WordDatatype_Kolakoski'>
     1171        """
     1172        self._parent = parent
     1173        self._limit = 365583569409
     1174
     1175    def __getitem__(self, n):
     1176        r"""
     1177        Return the n-th letter of the Kolakoski word.
     1178
     1179        This is a cython implementation inspired from the 10 lines of C
     1180        code written by Dominique Bernardi and shared during Sage Days 28
     1181        in Orsay, France, in January 2011.
     1182
     1183        INPUT:
     1184
     1185        - ``n`` - integer such that `n < 365583569409` or slice
     1186
     1187        OUPUT:
     1188
     1189        n-th letter of the Kolakoski Word or a substring
     1190
     1191        .. NOTE::
     1192
     1193            Assuming ``sizeof(unsigned long long)`` is `8`, i.e. 64 bit,
     1194            when the variable ``i`` is equal to ``365583569408`` then ``f``
     1195            is equal to ``2^64 - 1`` so that the line ``g = f + 1`` in the
     1196            loop will bust the capacity.  Hence ``__getitem__`` returns bad
     1197            values beyond ``365583569409``. Maybe the limit is smaller
     1198            because of other operations in the loop. This still has to be
     1199            verified.
     1200
     1201        EXAMPLES::
     1202
     1203            sage: from sage.combinat.words.word_datatypes import WordDatatype_Kolakoski
     1204            sage: parent = Words([1,2])
     1205            sage: K = WordDatatype_Kolakoski(parent)
     1206            sage: K
     1207            <type 'sage.combinat.words.word_datatypes.WordDatatype_Kolakoski'>
     1208            sage: K[0]
     1209            1
     1210            sage: K[1]
     1211            2
     1212            sage: [K[i] for i in range(20)]
     1213            [1, 2, 2, 1, 1, 2, 1, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1]
     1214
     1215        Slices works too::
     1216
     1217            sage: K[:]
     1218            word: 1221121221221121122121121221121121221221...
     1219            sage: K[:20]
     1220            word: 12211212212211211221
     1221            sage: K[2:20:2]
     1222            word: 211221212
     1223            sage: K[2:20:-2]
     1224            word:
     1225            sage: K[20:2:-2]
     1226            word: 221212211
     1227
     1228        TESTS:
     1229
     1230        Not yet implemented for too large integers ::
     1231
     1232            sage: K[365583569410]
     1233            Traceback (most recent call last):
     1234            ...
     1235            NotImplementedError: when n is larger then 365583569409
     1236        """
     1237        cdef unsigned long long e = 0, f = 0, g, m, i
     1238        if isinstance(n, slice):
     1239            key = n
     1240            from sage.rings.all import Infinity
     1241            from math import ceil
     1242            if not(key.start is None) and key.start < 0 or \
     1243                    not(key.stop is None) and key.stop < 0:
     1244                raise ValueError, "for infinite words, start and stop values cannot be negative"
     1245            step = 1 if key.step is None else key.step
     1246            if step >= 0:
     1247                start = 0 if key.start is None else key.start
     1248                if key.stop is None:
     1249                    length = Infinity
     1250                else: # key.stop > 0
     1251                    length = int(max(0,ceil((key.stop-start)/float(step))))
     1252                data = itertools.islice(self, start, key.stop, step)
     1253                return self._parent(data, length=length)
     1254            else:
     1255                if key.start is None or key.start < 0:
     1256                    raise ValueError, "start value must be nonnegative for negative step values"
     1257                start = key.start
     1258                stop = 0 if key.stop is None else key.stop
     1259                length = int(max(0,ceil((key.stop-start)/float(step))))
     1260                data = list(itertools.islice(self, start+1))[key]
     1261                return self._parent(data, length=length)
     1262        elif n > self._limit:
     1263            raise NotImplementedError("when n is larger then %s" % self._limit)
     1264        else:
     1265            if n == 0:
     1266                e = 1
     1267            elif n == 1:
     1268                e = 0
     1269            else:
     1270                for i from 0 <= i < n-1:
     1271                    g = f + 1
     1272                    m = f ^ g
     1273                    e ^= m / 2
     1274                    f = g + (e & m) / 2
     1275            return int(2 - (e & 1))
     1276
     1277    def __iter__(self):
     1278        r"""
     1279        Iterator over the Kolakoski sequence.
     1280
     1281        This is a cython implementation inspired from the 10 lines of C
     1282        code written by Dominique Bernardi and shared during Sage Days 28
     1283        in Orsay, France, in January 2011.
     1284
     1285        .. NOTE::
     1286
     1287            This iterator is defined not further than `365583569409` terms
     1288            if ``sizeof(unsigned long long) == 8`` and less in fact (around
     1289            `10^11`).
     1290
     1291        TODO: Define this iterator further then the limit.
     1292
     1293        EXAMPLES::
     1294
     1295            sage: from sage.combinat.words.word_datatypes import WordDatatype_Kolakoski
     1296            sage: parent = Words([1,2])
     1297            sage: K = WordDatatype_Kolakoski(parent)
     1298            sage: it = iter(K)
     1299            sage: [it.next() for _ in range(20)]
     1300            [1, 2, 2, 1, 1, 2, 1, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1]
     1301        """
     1302        yield 1
     1303        yield 2
     1304        cdef unsigned long long e = 0, f = 0, g, m
     1305        while True:
     1306            g = f + 1
     1307            m = f ^ g
     1308            e ^= m / 2
     1309            f = g + (e & m) / 2
     1310            yield int(2 - (e & 1))
     1311
     1312    def __reduce__(self):
     1313        r"""
     1314        Pickle support
     1315
     1316        TESTS::
     1317
     1318            sage: from sage.combinat.words.word_datatypes import WordDatatype_Kolakoski
     1319            sage: parent = Words([1,2])
     1320            sage: K = WordDatatype_Kolakoski(parent)
     1321            sage: K.__reduce__()
     1322            (<type 'sage.combinat.words.word_datatypes.WordDatatype_Kolakoski'>, (Words over Ordered Alphabet [1, 2],))
     1323        """
     1324        return self.__class__, (self._parent, )
  • sage/combinat/words/word_generators.py

    diff --git a/sage/combinat/words/word_generators.py b/sage/combinat/words/word_generators.py
    a b class WordGenerator(object): 
    947947            else:
    948948                s1, s0 = s1*cf.next() + s0, s1
    949949
    950     def KolakoskiWord(self, alphabet=(1,2)):
     950    def KolakoskiWord(self, alphabet=(1,2), algorithm='default'):
    951951        r"""
    952952        Returns the Kolakoski word over the given alphabet and
    953953        starting with the first letter of the alphabet.
    954954
     955        The classical Kolakoski sequence [1,3] was first studied by
     956        Oldenburger [2], where it appears as the unique solution to the
     957        problem of a trajectory on the alphabet `\{1,2\}` which is
     958        identical to its exponent trajectory.
     959
    955960        Let `A = \{a,b\}` be an alphabet, where `a` and `b` are two
    956961        distinct positive integers. The Kolakoski word `K_{a,b}`
    957962        over `A` and starting with `a` is the unique infinite word `w`
    958963        such that `w = \Delta(w)`, where `\Delta(w)` is the word
    959964        encoding the runs of `w` (see ``delta()`` method on words for
    960965        more details).
    961        
     966
    962967        Note that `K_{a,b} \neq K_{b,a}`. On the other hand, the
    963968        words `K_{a,b}` and `K_{b,a}` are the unique two words over `A`
    964969        that are fixed by `\Delta`.
    class WordGenerator(object): 
    967972
    968973        -  ``alphabet`` - (default: (1,2)) an iterable of two positive
    969974           integers
     975        - ``algorithm`` - string (optional, default: ``'default'``) Can be
     976          one of the following:
     977
     978            - ``'default``` - use ``'Bernardi'`` if ``alphabet`` is
     979              ``(1,2)``; otherwise, use ``'python'``
     980            - ``'python'`` - use python implementation
     981            - ``'Bernardi'`` - use cython implementation inspired from the
     982              10 lines of C code written by Dominique Bernardi and shared
     983              during Sage Days 28 in Orsay, France, in January 2011. In
     984              this case, the alphabet must be ``(1,2)``.
    970985
    971986        OUTPUT:
    972987
    class WordGenerator(object): 
    982997            sage: w.delta()
    983998            word: 1221121221221121122121121221121121221221...
    984999
     1000        The cython implementation is much faster than the python one::
     1001
     1002            sage: K = words.KolakoskiWord(algorithm='Bernardi')
     1003            sage: K[10^6]      # takes 0.02 seconds
     1004            2
     1005            sage: K = words.KolakoskiWord(algorithm='python')
     1006            sage: K[10^6]      # not tested : takes too long
     1007            2
     1008
    9851009        The other Kolakoski word on the same alphabet::
    9861010
    9871011            sage: w = words.KolakoskiWord(alphabet = (2,1))
    class WordGenerator(object): 
    9981022            sage: w.delta()
    9991023            word: 2255222225555522552255225555522222555552...
    10001024
    1001         TESTS::
     1025        TESTS:
     1026
     1027        We make sure both implementation correspond for the prefix of
     1028        length 100::
     1029
     1030            sage: Kb = words.KolakoskiWord(algorithm='Bernardi')
     1031            sage: Kp = words.KolakoskiWord(algorithm='python')
     1032            sage: Kp[:100] == Kb[:100]
     1033            True
     1034
     1035        We make sure the definition is satisfied::
    10021036
    10031037            sage: for i in range(1,10):
    10041038            ...       for j in range(1,10):
    class WordGenerator(object): 
    10181052        - [1] William Kolakoski, proposal 5304, American Mathematical Monthly
    10191053          72 (1965), 674; for a partial solution, see "Self Generating Runs,"
    10201054          by Necdet Üçoluk, Amer. Math. Mon. 73 (1966), 681-2.
     1055
     1056        - [2] R. Oldenburger, Exponent trajectories in dynamical systems,
     1057          Trans. Amer. Math. Soc. 46 (1939), 453–466.
     1058
     1059        - [3] http://en.wikipedia.org/wiki/Kolakoski_sequence
     1060
     1061        AUTHORS:
     1062
     1063        - Alexandre Blondin-Massé (January 2011) - Python implementation
     1064        - Sébastien Labbé (February 2011) - Added a Cython implementation
     1065          which was easily translated from the 10 lines of C code written
     1066          by Dominique Bernardi and shared during Sage Days 28 in Orsay.
    10211067        """
    1022         a, b = alphabet
    1023         if a not in ZZ or a <= 0 or b not in ZZ or b <= 0 or a == b:
    1024             msg = 'The alphabet (=%s) must consist of two distinct positive integers'%(alphabet,)
    1025             raise ValueError, msg
    1026         return Words(alphabet)(self._KolakoskiWord_iterator(a, b), datatype = 'iter')
     1068        # Change default into something
     1069        if algorithm == 'default':
     1070            if alphabet == (1,2):
     1071                algorithm = 'Bernardi'
     1072            else:
     1073                algorithm = 'python'
     1074
     1075        # Return proper Kolakoski word
     1076        if algorithm == 'Bernardi':
     1077            if not alphabet == (1,2):
     1078                raise ValueError, "With Bernardi's algorithm, the alphabet (=%s) must be (1,2)" % alphabet
     1079            from sage.combinat.words.word import KolakoskiWord
     1080            parent = Words(alphabet=alphabet)
     1081            return KolakoskiWord(parent)
     1082        elif algorithm == 'python':
     1083            a, b = alphabet
     1084            if a not in ZZ or a <= 0 or b not in ZZ or b <= 0 or a == b:
     1085                msg = 'The alphabet (=%s) must consist of two distinct positive integers'%(alphabet,)
     1086                raise ValueError, msg
     1087            return Words(alphabet)(self._KolakoskiWord_iterator(a, b), datatype = 'iter')
     1088        else:
     1089            raise ValueError, "Unkonwn value for algorithm (=%s)" % algorithm
     1090
    10271091
    10281092    def _KolakoskiWord_iterator(self, a=1, b=2):
    10291093        r"""