Ticket #14159: trac_14159_weak_value_triple_dict.patch

File trac_14159_weak_value_triple_dict.patch, 18.0 KB (added by SimonKing, 7 years ago)

Optional weak values for mono- and tripledict

  • sage/categories/homset.py

    # HG changeset patch
    # User Simon King <simon.king@uni-jena.de>
    # Date 1363025892 -3600
    # Node ID d10aa3f464c3f9f878e7e8f7e7a7cacfa9ab944b
    # Parent  02fbdf47d9b8c6d6bad9256009a28ce1965e99c2
    #14159: Optional support for weak values in Triple- and MonoDict. Safer callback.
    
    diff --git a/sage/categories/homset.py b/sage/categories/homset.py
    a b  
    7474###################################
    7575# Use the weak "triple" dictionary
    7676# introduced in trac ticket #715
     77# with weak values, as introduced in
     78# trac ticket #14159
    7779
    78 from weakref import KeyedRef
    79 from sage.structure.coerce_dict import signed_id, TripleDict
    80 _cache = TripleDict(53)
     80from sage.structure.coerce_dict import TripleDict
     81_cache = TripleDict(53, weak_values=True)
    8182
    8283def Hom(X, Y, category=None):
    8384    """
     
    216217    global _cache
    217218    key = (X,Y,category)
    218219    try:
    219         H = _cache[key]()
     220        H = _cache[key]
    220221    except KeyError:
    221222        H = None
    222223    if H is not None:
     
    244245    # Now, as the category may have changed, we try to find the hom set in the cache, again:
    245246    key = (X,Y,category)
    246247    try:
    247         H = _cache[key]()
     248        H = _cache[key]
    248249    except KeyError:
    249250        H = None
    250251    if H is not None:
     
    263264    H = category.hom_category().parent_class(X, Y, category = category)
    264265           
    265266    ##_cache[key] = weakref.ref(H)
    266     _cache[key] = KeyedRef(H, _cache.eraser, (signed_id(X),signed_id(Y),signed_id(category)))
     267    _cache[key] = H
    267268    return H
    268269
    269270def hom(X, Y, f):
  • sage/structure/coerce_dict.pxd

    diff --git a/sage/structure/coerce_dict.pxd b/sage/structure/coerce_dict.pxd
    a b  
    22    cdef __weakref__
    33    cdef Py_ssize_t _size
    44    cdef buckets
     5    cdef bint weak_values
    56    cdef double threshold
    67    cdef public MonoDictEraser eraser
    78    cdef get(self, object k)
     
    1415    cdef __weakref__
    1516    cdef Py_ssize_t _size
    1617    cdef buckets
     18    cdef bint weak_values
    1719    cdef double threshold
    1820    cdef public TripleDictEraser eraser
    1921    cdef get(self, object k1, object k2, object k3)
  • sage/structure/coerce_dict.pyx

    diff --git a/sage/structure/coerce_dict.pyx b/sage/structure/coerce_dict.pyx
    a b  
    1010Containers for storing coercion data
    1111
    1212This module provides :class:`TripleDict` and :class:`MonoDict`. These are
    13 structures similar to ``WeakKeyDictionary`` in Python's weakref module,
    14 and are optimized for lookup speed. The keys for :class:`TripleDict` consist
    15 of triples (k1,k2,k3) and are looked up by identity rather than equality. The
    16 keys are stored by weakrefs if possible. If any one of the components k1, k2,
    17 k3 gets garbage collected, then the entry is removed from the :class:`TripleDict`.
     13structures similar to :class:`~weakref.WeakKeyDictionary` in Python's weakref
     14module, and are optimized for lookup speed. The keys for :class:`TripleDict`
     15consist of triples (k1,k2,k3) and are looked up by identity rather than
     16equality. The keys are stored by weakrefs if possible. If any one of the
     17components k1, k2, k3 gets garbage collected, then the entry is removed from
     18the :class:`TripleDict`.
    1819
    1920Key components that do not allow for weakrefs are stored via a normal
    2021refcounted reference. That means that any entry stored using a triple
     
    2223as an entry in a normal dictionary: Its existence in :class:`TripleDict`
    2324prevents it from being garbage collected.
    2425
    25 That container currently is used to store coercion and conversion maps
    26 between two parents (:trac:`715`) and to store homsets of pairs of objects
    27 of a category (:trac:`11521`). In both cases, it is essential that the parent
    28 structures remain garbage collectable, it is essential that the data access
    29 is faster than with a usual ``WeakKeyDictionary``, and we enforce the "unique
    30 parent condition" in Sage (parent structures should be identical if they are
    31 equal).
     26That container currently is used to store coercion and conversion maps between
     27two parents (:trac:`715`) and to store homsets of pairs of objects of a
     28category (:trac:`11521`). In both cases, it is essential that the parent
     29structures remain garbage collectable, it is essential that the data access is
     30faster than with a usual :class:`~weakref.WeakKeyDictionary`, and we enforce
     31the "unique parent condition" in Sage (parent structures should be identical
     32if they are equal).
    3233
    3334:class:`MonoDict` behaves similarly, but it takes a single item as a key. It
    3435is used for caching the parents which allow a coercion map into a fixed other
    3536parent (:trac:`12313`).
    3637
     38By :trac:`14159`, :class:`MonoDict` and :class:`TripleDict` can be optionally
     39used with weak references on the values.
     40
    3741"""
    3842include "../ext/python_list.pxi"
    3943
     
    178182        cdef list buckets = D.buckets
    179183        if buckets is None:
    180184            return
    181         cdef Py_ssize_t h = r.key
     185        cdef Py_ssize_t h
     186        cdef int offset
     187        h,offset = r.key
    182188        cdef list bucket = <object>PyList_GET_ITEM(buckets, (<size_t>h) % PyList_GET_SIZE(buckets))
    183189        cdef Py_ssize_t i
    184190        for i from 0 <= i < PyList_GET_SIZE(bucket) by 3:
    185191            if PyInt_AsSsize_t(PyList_GET_ITEM(bucket,i))==h:
    186                 del bucket[i:i+3]
    187                 D._size -= 1
    188                 break
     192                if PyList_GET_ITEM(bucket,i+offset)==<void *>r:
     193                    del bucket[i:i+3]
     194                    D._size -= 1
     195                    break
     196                else:
     197                    break
    189198
    190199cdef class TripleDictEraser:
    191200    """
     
    278287        # stored key of the unique triple r() had been part of.
    279288        # We remove that unique triple from self.D
    280289        cdef Py_ssize_t k1,k2,k3
    281         k1,k2,k3 = r.key
     290        cdef int offset
     291        k1,k2,k3,offset = r.key
    282292        cdef Py_ssize_t h = (k1 + 13*k2 ^ 503*k3)
    283293        cdef list bucket = <object>PyList_GET_ITEM(buckets, (<size_t>h) % PyList_GET_SIZE(buckets))
    284294        cdef Py_ssize_t i
     
    286296            if PyInt_AsSsize_t(PyList_GET_ITEM(bucket, i))==k1 and \
    287297               PyInt_AsSsize_t(PyList_GET_ITEM(bucket, i+1))==k2 and \
    288298               PyInt_AsSsize_t(PyList_GET_ITEM(bucket, i+2))==k3:
    289                 del bucket[i:i+7]
    290                 D._size -= 1
    291                 break
     299                if PyList_GET_ITEM(bucket, i+offset)==<void *>r:
     300                    del bucket[i:i+7]
     301                    D._size -= 1
     302                    break
     303                else:
     304                    break
    292305
    293306cdef class MonoDict:
    294307    """
     
    306319    It is bare-bones in the sense that not all dictionary methods are
    307320    implemented.
    308321
     322    IMPLEMENTATION:
     323
    309324    It is implemented as a list of lists (hereafter called buckets). The bucket
    310325    is chosen according to a very simple hash based on the object pointer,
    311326    and each bucket is of the form [id(k1), r1, value1, id(k2), r2, value2, ...],
     
    317332    In the latter case the presence of the key in the dictionary prevents it from
    318333    being garbage collected.
    319334
    320     To spread objects evenly, the size should ideally be a prime, and certainly
    321     not divisible by 2.
     335    INPUT:
     336
     337    - ``size`` -- an integer, the initial number of buckets. To spread objects
     338      evenly, the size should ideally be a prime, and certainly not divisible
     339      by 2.
     340    - ``data`` -- optional iterable defining initial data.
     341    - ``threshold`` -- optional number, default `0.7`. It determines how frequently
     342      the dictionary will be resized (large threshold implies rare resizing).
     343    - ``weak_values`` -- optional bool (default False). If it is true, weak references
     344      to the values in this dictionary will be used, when possible.
    322345
    323346    EXAMPLES::
    324347
     
    346369    Not all features of Python dictionaries are available, but iteration over
    347370    the dictionary items is possible::
    348371
    349         sage: # for some reason the following fails in "make ptest"
     372        sage: # for some reason the following failed in "make ptest"
    350373        sage: # on some installations, see #12313 for details
    351374        sage: sorted(L.iteritems()) # random layout
    352375        [(-15, 3), ('a', 1), ('ab', 2)]
     
    410433        sage: len(LE)    # indirect doctest
    411434        1
    412435
    413     AUTHOR:
     436    TESTS:
     437
     438    Here, we demonstrate the use of weak values.
     439    ::
     440
     441        sage: M = MonoDict(13)
     442        sage: MW = MonoDict(13, weak_values=True)
     443        sage: class Foo: pass
     444        sage: a = Foo()
     445        sage: b = Foo()
     446        sage: k = 1
     447        sage: M[k] = a
     448        sage: MW[k] = b
     449        sage: M[k] is a
     450        True
     451        sage: MW[k] is b
     452        True
     453        sage: k in M
     454        True
     455        sage: k in MW
     456        True
     457
     458    While ``M`` uses a strong reference to ``a``, ``MW`` uses a *weak*
     459    reference to ``b``, and after deleting ``b``, the corresponding item of
     460    ``MW`` will be removed during the next garbage collection::
     461
     462        sage: import gc
     463        sage: del a,b
     464        sage: _ = gc.collect()
     465        sage: k in M
     466        True
     467        sage: k in MW
     468        False
     469        sage: len(MW)
     470        0
     471        sage: len(M)
     472        1
     473
     474   Note that ``MW`` also accepts values that do not allow for weak references::
     475
     476        sage: MW[k] = int(5)
     477        sage: MW[k]
     478        5
     479
     480    AUTHORS:
    414481
    415482    - Simon King (2012-01)
    416483    - Nils Bruin (2012-08)
     484    - Simon King (2013-02)
    417485    """
    418     def __init__(self, size, data=None, threshold=0.7):
     486    def __init__(self, size, data=None, threshold=0.7, weak_values=False):
    419487        """
    420488        Create a special dict using singletons for keys.
    421489
     
    432500        self.threshold = threshold
    433501        self.buckets = [[] for i from 0 <= i < size]
    434502        self._size = 0
     503        self.weak_values = weak_values
    435504        self.eraser = MonoDictEraser(self)
    436505        if data is not None:
    437506            for k, v in data.iteritems():
     
    563632                if isinstance(r, KeyedRef) and PyWeakref_GetObject(r) == Py_None:
    564633                    return False
    565634                else:
    566                     return True
     635                    return (not self.weak_values) or PyWeakref_GetObject(<object>PyList_GET_ITEM(bucket, i+2)) != Py_None
    567636        return False
    568637
    569638    def __getitem__(self, k):
     
    599668        cdef Py_ssize_t i
    600669        cdef list all_buckets = self.buckets
    601670        cdef list bucket = <object>PyList_GET_ITEM(all_buckets, (<size_t>h) % PyList_GET_SIZE(all_buckets))
    602         cdef object r
     671        cdef object r, val
     672        cdef PyObject * out
    603673        for i from 0 <= i < PyList_GET_SIZE(bucket) by 3:
    604674            if PyInt_AsSsize_t(PyList_GET_ITEM(bucket, i)) == h:
    605675                r = <object>PyList_GET_ITEM(bucket, i+1)
    606676                if isinstance(r, KeyedRef) and PyWeakref_GetObject(r) == Py_None:
    607677                    raise KeyError, k
    608678                else:
    609                     return <object>PyList_GET_ITEM(bucket, i+2)
     679                    val = <object>PyList_GET_ITEM(bucket, i+2)
     680                    if self.weak_values:
     681                        if not isinstance(val, KeyedRef):
     682                            return val
     683                        out = PyWeakref_GetObject(val)
     684                        if out == Py_None:
     685                            raise KeyError, k
     686                        return <object>out
     687                    else:
     688                        return val
    610689        raise KeyError, k
    611690
    612691    def __setitem__(self, k, value):
     
    634713            self.resize()
    635714        cdef Py_ssize_t h = signed_id(k)
    636715        cdef Py_ssize_t i
     716        if self.weak_values:
     717            try:
     718                value = KeyedRef(value,self.eraser,(h,2))
     719            except TypeError:
     720                pass
    637721        cdef list bucket = <object>PyList_GET_ITEM(self.buckets,(<size_t> h) % PyList_GET_SIZE(self.buckets))
    638722        cdef object r
    639723        for i from 0 <= i < PyList_GET_SIZE(bucket) by 3:
     
    669753        #investigate our partial entry.
    670754        PyList_Append(bucket, h)
    671755        try:
    672             PyList_Append(bucket, KeyedRef(k,self.eraser,h))
     756            PyList_Append(bucket, KeyedRef(k,self.eraser,(h,1)))
    673757        except TypeError:
    674758            PyList_Append(bucket, k)
    675759        PyList_Append(bucket, value)
     
    836920    If a key component ki supports weak references then ri is a weak reference to
    837921    ki; otherwise ri is identical to ki.
    838922   
    839     If any of the key components k1,k2,k3 (this can happen for a key component that
    840     supports weak references) gets garbage collected then the entire entry
    841     disappears. In that sense this structure behaves like a nested WeakKeyDictionary.
     923    INPUT:
    842924
    843     To spread objects evenly, the size should ideally be a prime, and certainly
    844     not divisible by 2.
     925    - ``size`` -- an integer, the initial number of buckets. To spread objects
     926      evenly, the size should ideally be a prime, and certainly not divisible
     927      by 2.
     928    - ``data`` -- optional iterable defining initial data.
     929    - ``threshold`` -- optional number, default `0.7`. It determines how frequently
     930      the dictionary will be resized (large threshold implies rare resizing).
     931    - ``weak_values`` -- optional bool (default False). If it is true, weak references
     932      to the values in this dictionary will be used, when possible.
     933
     934    If any of the key components k1,k2,k3 (this can happen for a key component
     935    that supports weak references) gets garbage collected then the entire
     936    entry disappears. In that sense this structure behaves like a nested
     937    :class:`~weakref.WeakKeyDictionary`.
    845938
    846939    EXAMPLES::
    847940
     
    9201013        sage: len(LE)    # indirect doctest
    9211014        1
    9221015
     1016    TESTS:
     1017
     1018    Here, we demonstrate the use of weak values.
     1019    ::
     1020
     1021        sage: class Foo: pass
     1022        sage: T = TripleDict(13)
     1023        sage: TW = TripleDict(13, weak_values=True)
     1024        sage: a = Foo()
     1025        sage: b = Foo()
     1026        sage: k = 1
     1027        sage: T[a,k,k]=1
     1028        sage: T[k,a,k]=2
     1029        sage: T[k,k,a]=3
     1030        sage: T[k,k,k]=a
     1031        sage: TW[b,k,k]=1
     1032        sage: TW[k,b,k]=2
     1033        sage: TW[k,k,b]=3
     1034        sage: TW[k,k,k]=b
     1035        sage: len(T)
     1036        4
     1037        sage: len(TW)
     1038        4
     1039        sage: (k,k,k) in T
     1040        True
     1041        sage: (k,k,k) in TW
     1042        True
     1043        sage: T[k,k,k] is a
     1044        True
     1045        sage: TW[k,k,k] is b
     1046        True
     1047
     1048    Now, ``T`` holds a strong reference to ``a``, namely in ``T[k,k,k]``. Hence,
     1049    when we delete ``a``, *all* items of ``T`` survive::
     1050
     1051        sage: del a
     1052        sage: _ = gc.collect()
     1053        sage: len(T)
     1054        4
     1055
     1056    Only when we remove the strong reference, the items become collectable::
     1057
     1058        sage: del T[k,k,k]
     1059        sage: _ = gc.collect()
     1060        sage: len(T)
     1061        0
     1062
     1063    The situation is different for ``TW``, since it only holds *weak*
     1064    references to ``a``. Therefore, all items become collectable after
     1065    deleting ``a``::
     1066
     1067        sage: del b
     1068        sage: _ = gc.collect()
     1069        sage: len(TW)
     1070        0
     1071
    9231072    .. NOTE::
    9241073
    9251074        The index `h` corresponding to the key [k1, k2, k3] is computed as a
     
    9491098    - Simon King, 2012-01
    9501099
    9511100    - Nils Bruin, 2012-08
     1101
     1102    - Simon King, 2013-02
    9521103    """
    9531104
    954     def __init__(self, size, data=None, threshold=0.7):
     1105    def __init__(self, size, data=None, threshold=0.7, weak_values=False):
    9551106        """
    9561107        Create a special dict using triples for keys.
    9571108
     
    9681119        self.threshold = threshold
    9691120        self.buckets = [[] for i from 0 <= i <  size]
    9701121        self._size = 0
     1122        self.weak_values = weak_values
    9711123        self.eraser = TripleDictEraser(self)
    9721124        if data is not None:
    9731125            for (k1,k2,k3), v in data.iteritems():
     
    11301282        cdef Py_ssize_t h3 = signed_id(k3)
    11311283        cdef Py_ssize_t h = (h1 + 13*h2 ^ 503*h3)
    11321284
    1133         cdef object r1,r2,r3
     1285        cdef object r1,r2,r3, val
     1286        cdef PyObject* ref_val
    11341287        cdef Py_ssize_t i
    11351288        cdef list all_buckets = self.buckets
    11361289        cdef list bucket = <object>PyList_GET_ITEM(all_buckets, (<size_t>h )% PyList_GET_SIZE(all_buckets))
     
    11471300                        (isinstance(r3,KeyedRef) and PyWeakref_GetObject(r3) == Py_None):
    11481301                    raise KeyError, (k1,k2,k3)
    11491302                else:
    1150                     return <object>PyList_GET_ITEM(bucket, i+6)
     1303                    val = <object>PyList_GET_ITEM(bucket, i+6)
     1304                    if self.weak_values:
     1305                        if not isinstance(val, KeyedRef):
     1306                            return val
     1307                        ref_val = PyWeakref_GetObject(val)
     1308                        if ref_val == Py_None:
     1309                            raise KeyError, (k1,k2,k3)
     1310                        return <object>ref_val
     1311                    else:
     1312                        return val
    11511313        raise KeyError, (k1, k2, k3)
    11521314
    11531315    def __setitem__(self, k, value):
     
    11771339        cdef Py_ssize_t h2 = signed_id(k2)
    11781340        cdef Py_ssize_t h3 = signed_id(k3)
    11791341        cdef Py_ssize_t h = (h1 + 13*h2 ^ 503*h3)
     1342        if self.weak_values:
     1343            try:
     1344                value = KeyedRef(value,self.eraser,(h1, h2, h3, 6))
     1345            except TypeError:
     1346                pass
    11801347
    11811348        cdef object r1,r2,r3
    11821349        cdef Py_ssize_t i
     
    12081375        #at this point the key triple isn't present so we append a new entry.
    12091376        #we first form the appropriate weakrefs to receive callbacks on.
    12101377        try:
    1211             r1 = KeyedRef(k1,self.eraser,(h1, h2, h3))
     1378            r1 = KeyedRef(k1,self.eraser,(h1, h2, h3, 3))
    12121379        except TypeError:
    12131380            r1 = k1
    12141381        if k2 is not k1:
    12151382            try:
    1216                 r2 = KeyedRef(k2,self.eraser,(h1, h2, h3))
     1383                r2 = KeyedRef(k2,self.eraser,(h1, h2, h3, 4))
    12171384            except TypeError:
    12181385                r2 = k2
    12191386        else:
    12201387            r2 = None
    12211388        if k3 is not k2 or k3 is not k1:
    12221389            try:
    1223                 r3 = KeyedRef(k3,self.eraser,(h1, h2, h3))
     1390                r3 = KeyedRef(k3,self.eraser,(h1, h2, h3, 5))
    12241391            except TypeError:
    12251392                r3 = k3
    12261393        else: