"""
Fast and safe weak value dictionary
AUTHORS:
- Simon King (2013-10)
- Nils Bruin (2013-10)
Python's :mod:`weakref` module provides
:class:`~weakref.WeakValueDictionary`. This behaves similar to a dictionary,
but it does not prevent its values from garbage collection. Hence, it stores
the values by weak references with callback functions: The callback function
deletes a key-value pair from the dictionary, as soon as the value becomes
subject to garbage collection.
However, a problem arises if hash and comparison of the key depend on the
value that is being garbage collected::
sage: import weakref
sage: class Vals(object): pass
sage: class Keys:
....: def __init__(self, val):
....: self.val = weakref.ref(val)
....: def __hash__(self):
....: return hash(self.val())
....: def __eq__(self, other):
....: return self.val() == other.val()
sage: ValList = [Vals() for _ in range(10)]
sage: D = weakref.WeakValueDictionary()
sage: for v in ValList:
....: D[Keys(v)] = v
sage: len(D)
10
sage: del ValList, v
Exception KeyError: (<__main__.Keys instance at ...>,) in ignored
Exception KeyError: (<__main__.Keys instance at ...>,) in ignored
Exception KeyError: (<__main__.Keys instance at ...>,) in ignored
Exception KeyError: (<__main__.Keys instance at ...>,) in ignored
...
sage: len(D) > 1
True
Hence, there are scary error messages, and moreover the defunct items have not
been removed from the dictionary.
Therefore, Sage provides an alternative implementation
:class:`sage.misc.weak_dict.WeakValueDictionary`, using a callback that
removes the defunct item not based on hash and equality check of the key (this
is what fails in the example above), but based on comparison by identity. This
is possible, since references with callback function are distinct even if they
point to the same object. Hence, even if the same object ``O`` occurs as value
for several keys, each reference to ``O`` corresponds to a unique key. We see
no error messages, and the items get correctly removed::
sage: ValList = [Vals() for _ in range(10)]
sage: import sage.misc.weak_dict
sage: D = sage.misc.weak_dict.WeakValueDictionary()
sage: for v in ValList:
....: D[Keys(v)] = v
sage: len(D)
10
sage: del ValList
sage: len(D)
1
sage: del v
sage: len(D)
0
Another problem arises when iterating over the items of a dictionary: If
garbage collection occurs during iteration, then the content of the dictionary
changes, and the iteration breaks for :class:`weakref.WeakValueDictionary`::
sage: class Cycle:
....: def __init__(self):
....: self.selfref = self
sage: C = [Cycle() for n in range(10)]
sage: D = weakref.WeakValueDictionary(enumerate(C))
sage: import gc
sage: gc.disable()
sage: del C[:5]
sage: len(D)
10
sage: for k in D.iterkeys():
....: gc.enable()
....: _ = gc.collect()
Traceback (most recent call last):
...
RuntimeError: dictionary changed size during iteration
With :class:`~sage.misc.weak_dict.WeakValueDictionary`, the behaviour is
safer. Note that iteration over a WeakValueDictionary is non-deterministic,
since the lifetime of values (and hence the presence of keys) in the dictionary
may depend on when garbage collection occurs. The method implemented here
will at least postpone dictionary mutations due to garbage collection callbacks.
This means that as long as there is at least one iterator active on a dictionary,
none of its keys will be deallocated (which could have side-effects).
Which entries are returned is of course still dependent on when garbage
collection occurs. Note that when a key gets returned as "present" in the
dictionary, there is no guarantee one can actually retrieve its value: it may
have been garbage collected in the mean time.
Note that Sage's weak value dictionary is actually an instance of
:class:`dict`, in contrast to :mod:`weakref`'s weak value dictionary::
sage: issubclass(weakref.WeakValueDictionary, dict)
False
sage: issubclass(sage.misc.weak_dict.WeakValueDictionary, dict)
True
See :trac:`13394` for a discussion of some of the design considerations.
"""
########################################################################
# Copyright (C) 2013 Simon King
# Nils Bruin
#
# Distributed under the terms of the GNU General Public License (GPL)
#
# http://www.gnu.org/licenses/
########################################################################
import weakref
from weakref import KeyedRef
from copy import deepcopy
from cpython.dict cimport *
from cpython.weakref cimport *
from cpython.list cimport *
from cpython cimport Py_XINCREF, Py_XDECREF
cdef extern from "Python.h":
ctypedef struct PyDictEntry:
Py_ssize_t me_hash
PyObject* me_key
PyObject* me_value
ctypedef struct PyDictObject:
Py_ssize_t ma_fill
Py_ssize_t ma_used
Py_ssize_t ma_mask
PyDictEntry* ma_table
PyObject* Py_None
#we need this redefinition because we want to be able to call
#PyWeakref_GetObject with borrowed references. This is the recommended
#strategy according to Cython/Includes/cpython/__init__.pxd
PyObject* PyWeakref_GetObject(PyObject * wr)
#this one's just missing.
long PyObject_Hash(object obj)
#this routine extracts the "dummy" sentinel value that is used in dicts to mark
#"freed" slots. We need that to delete things ourselves.
cdef PyObject* init_dummy() except NULL:
cdef dict D = dict()
cdef PyDictObject* mp = D
cdef size_t mask
cdef PyDictEntry* ep0 = mp.ma_table
cdef PyDictEntry* ep
cdef size_t i
global dummy
D[0]=0; del D[0] #ensure that there is a "deleted" entry in the dict
mask = mp.ma_mask
#since our entry had hash 0, we should really succeed on the first iteration
for i in range(mask+1):
ep = &(ep0[i])
if ep.me_key != NULL:
return ep.me_key
raise RuntimeError("Problem initializing dummy")
#note that dummy here is a borrowed reference. That's not a problem because
#we're never giving it up and dictobject.c is also holding a permanent reference
#to this object
cdef PyObject* dummy = init_dummy()
#this routine looks for the first entry in dict D with given hash of the
#key and given (identical!) value and deletes that entry.
cdef del_dictitem_by_exact_value(PyDictObject *mp, PyObject *value, long hash):
"""
This is used in callbacks for the weak values of :class:`WeakValueDictionary`.
INPUT:
- ``PyDictObject *mp`` -- pointer to a dict
- ``PyObject *value`` -- pointer to a value of the dictionary
- ``long hash`` -- hash of the key by which the value is stored in the dict
The hash bucket determined by the given hash is searched for the item
containing the given value. If this item can't be found, the function is
silently returning. Otherwise, the item is removed from the dict.
TESTS:
The following is an indirect doctest, as discussed on :trac:`13394`.
::
sage: from sage.misc.weak_dict import WeakValueDictionary
sage: V = [set(range(n)) for n in range(5)]
sage: D = WeakValueDictionary(enumerate(V))
The line ``V[k] = None`` triggers execution of the callback functions of
the dict values. However, the actual deletion is postponed till after the
iteration over the dictionary has finished. Hence, when the callbacks are
executed, the values which the callback belongs to has already been
overridded by a new value. Therefore, the callback does not delete the
item::
sage: for k in D.iterkeys(): # indirect doctest
....: V[k] = None
....: D[k] = ZZ
sage: len(D)
5
sage: D[1]
Integer Ring
"""
cdef size_t i
cdef size_t perturb
cdef size_t mask = mp.ma_mask
cdef PyDictEntry* ep0 = mp.ma_table
cdef PyDictEntry* ep
i = hash & mask
ep = &(ep0[i])
if ep.me_key == NULL:
# key not found
return
perturb = hash
while ((ep.me_value) != value or ep.me_hash != hash):
i = (i << 2) + i + perturb +1
ep = &ep0[i & mask]
if ep.me_key == NULL:
# key not found
return
perturb = perturb >> 5 #this is the value of PERTURB_SHIFT
old_key = ep.me_key
if dummy == NULL:
raise RuntimeError("dummy needs to be initialized")
Py_XINCREF(dummy)
ep.me_key = dummy
old_value = ep.me_value
ep.me_value = NULL
mp.ma_used -= 1
#in our case, the value is always a dead weakref, so decreffing that is
#fairly safe
Py_XDECREF(old_value)
#this could have any effect.
Py_XDECREF(old_key)
def test_del_dictitem_by_exact_value(D, value, h):
"""
This function helps testing some cdef function used to delete dictionary items.
INPUT:
- ``D`` -- a Python ````.
- ``value`` -- an object that is value ``D``.
- ``h`` -- the hash of the key under which to find ``value`` in ``D``.
The underlying cdef function deletes an item from ``D`` that is in the
hash bucket determined by ``h`` and whose value is identic with
``value``. Of course, this only makes sense if the pairs ``(h, value)``
corresponding to items in ``D`` are pair-wise distinct.
If a matching item can not be found, the function does nothing and
silently returns.
TESTS:
See :trac:`13394` for a discussion.
::
sage: from sage.misc.weak_dict import test_del_dictitem_by_exact_value
sage: B=1000
sage: L=list(range(B))
sage: D1=dict()
sage: D2=dict()
sage: for i in range(100000): # long time
....: ki=L[floor(random()*B)]
....: vi=L[floor(random()*B)]
....: D1[ki]=vi
....: D2[ki]=vi
....: ko=L[floor(random()*B)]
....: if ko in D1:
....: vo=D1[ko]
....: del D1[ko]
....: test_del_dictitem_by_exact_value(D2,vo,hash(ko))
....: assert D1 == D2
No action is taken if the item prescribed by key hash and value does not
exist in the dictionary::
sage: D = {1: ZZ}
sage: test_del_dictitem_by_exact_value(D, ZZ, 2)
sage: D
{1: Integer Ring}
sage: test_del_dictitem_by_exact_value(D, QQ, 1)
sage: D
{1: Integer Ring}
"""
return del_dictitem_by_exact_value(D, value, h)
cdef class WeakValueDictionary(dict):
"""
IMPLEMENTATION:
The :class:`WeakValueDictionary` inherits from :class:`dict`. In its
implementation, it stores weakrefs to the actual values under the keys.
All access routines are wrapped to transparently place and remove these
weakrefs.
NOTE:
In contrast to :class:`weakref.WeakValueDictionary` in Python's
:mod:`weakref` module, the callback does not need to assume that the
dictionary key is a valid Python object when it is called. There is no
need to compute the hash or compare the dictionary keys. This is why
the example below would not work with
:class:`weakref.WeakValueDictionary`, but does work with
:class:`sage.misc.weak_dict.WeakValueDictionary`.
EXAMPLES::
sage: import weakref
sage: class Vals(object): pass
sage: class Keys:
....: def __init__(self, val):
....: self.val = weakref.ref(val)
....: def __hash__(self):
....: return hash(self.val())
....: def __eq__(self, other):
....: return self.val() == other.val()
sage: ValList = [Vals() for _ in range(10)]
sage: import sage.misc.weak_dict
sage: D = sage.misc.weak_dict.WeakValueDictionary()
sage: for v in ValList:
....: D[Keys(v)] = v
sage: len(D)
10
sage: del ValList
sage: len(D)
1
sage: del v
sage: len(D)
0
TESTS::
The following reflects the behaviour of the callback on weak dict values,
as discussed on :trac:`13394`. ::
sage: from sage.misc.weak_dict import WeakValueDictionary
sage: V = [set(range(n)) for n in range(5)]
sage: D = WeakValueDictionary(enumerate(V))
The line ``V[k] = None`` triggers execution of the callback functions of
the dictionary values. However, the actual deletion is postponed till
after the iteration over the dictionary has finished. Hence, when the
callbacks are executed, the values which the callback belongs to has
already been overridded by a new value. Therefore, the callback does not
delete the item::
sage: for k in D.iterkeys(): # indirect doctest
....: V[k] = None
....: D[k] = ZZ
sage: len(D)
5
sage: D[1]
Integer Ring
The following is a stress test for weak value dictionaries::
sage: class C(object):
....: def __init__(self, n):
....: self.n = n
....: def __cmp__(self, other):
....: return cmp(type(self),type(other)) or cmp(self.n, other.n)
sage: B = 100
sage: L = [None]*B
sage: D1 = WeakValueDictionary()
sage: D2 = WeakValueDictionary()
sage: for i in range(10000):
....: ki = floor(random()*B)
....: vi = C(floor(random()*B))
....: D1[ki] = vi
....: D2[ki] = vi
....: L[ki] = vi
....: del vi
....: ko = floor(random()*B)
....: if ko in D1:
....: del D1[ko]
....: L[ko] = None
....: assert D1 == D2
"""
cdef callback
cdef int _guard_level
cdef list _pending_removals
cdef _iteration_context
def __init__(self, data=()):
"""
Create a :class:`WeakValueDictionary` with given initial data.
INPUT:
Optional iterable of key-value pairs
EXAMPLES::
sage: L = [(p,GF(p)) for p in prime_range(10)]
sage: import sage.misc.weak_dict
sage: D = sage.misc.weak_dict.WeakValueDictionary()
sage: len(D)
0
sage: D = sage.misc.weak_dict.WeakValueDictionary(L)
sage: len(D) == len(L)
True
"""
dict.__init__(self)
# Define a callback function. In contrast to what is done in Python's
# weakref.WeakValueDictionary, we use a closure and not a weak
# reference to refer to self. Reason: We trust that we will not create
# sub-classes of keyed references or weak value dictionaries providing
# a __del__ method.
def callback(r):
#The situation is the following:
#in the underlying dictionary, we have stored a KeyedRef r
#under a key k. The attribute r.key is the hash of k.
cdef WeakValueDictionary cself = self
if cself._guard_level:
cself._pending_removals.append(r)
else:
del_dictitem_by_exact_value(cself, r, r.key)
self.callback = callback
self._guard_level = 0
self._pending_removals = []
self._iteration_context = _IterationContext(self)
for k,v in data:
self[k] = v
def __copy__(self):
"""
Return a copy of this weak dictionary.
EXAMPLES::
sage: import sage.misc.weak_dict
sage: D = sage.misc.weak_dict.WeakValueDictionary()
sage: D[1] = QQ
sage: D[2] = ZZ
sage: D[None] = CC
sage: E = copy(D) # indirect doctest
sage: set(E.items()) == set(D.items())
True
"""
return WeakValueDictionary(self.iteritems())
def __deepcopy__(self, memo):
"""
Return a copy of this dictionary using copies of the keys.
NOTE:
The values of the dictionary are not copied, since we can not copy the
external strong references to the values, which are decisive for
garbage collection.
EXAMPLES::
sage: class C(object): pass
sage: V = [C(),C()]
sage: D = sage.misc.weak_dict.WeakValueDictionary()
sage: D[C()] = V[0]
sage: D[C()] = V[1]
sage: E = deepcopy(D) # indirect doctest
The keys are copied (in this silly example, the copies of the keys are
actually not equal to the original keys)::
sage: set(E.keys()) == set(D.keys())
False
However, the values are not copied::
sage: set(E.values()) == set(D.values()) == set(V)
True
"""
out = WeakValueDictionary()
for k,v in self.iteritems():
out[deepcopy(k, memo)] = v
return out
def __repr__(self):
"""
EXAMPLES::
sage: import sage.misc.weak_dict
sage: repr(sage.misc.weak_dict.WeakValueDictionary([(1,ZZ),(2,QQ)])) # indirect doctest
''
"""
return "" % id(self)
def __str__(self):
"""
EXAMPLES::
sage: import sage.misc.weak_dict
sage: str(sage.misc.weak_dict.WeakValueDictionary([(1,ZZ),(2,QQ)])) # indirect doctest
''
"""
return "" % id(self)
def setdefault(self, k, default=None):
"""
Return the stored value for a given key; return and store a default
value if no previous value is stored.
EXAMPLES::
sage: import sage.misc.weak_dict
sage: L = [(p,GF(p)) for p in prime_range(10)]
sage: D = sage.misc.weak_dict.WeakValueDictionary(L)
sage: len(D)
4
The value for an existing key is returned and not overridden::
sage: D.setdefault(5, ZZ)
Finite Field of size 5
sage: D[5]
Finite Field of size 5
For a non-existing key, the default value is stored and returned::
sage: D.has_key(4)
False
sage: D.setdefault(4, ZZ)
Integer Ring
sage: D.has_key(4)
True
sage: D[4]
Integer Ring
sage: len(D)
5
"""
cdef PyObject* wr = PyDict_GetItem(self, k)
if wr != NULL:
out = PyWeakref_GetObject(wr)
if out != Py_None:
return