#*****************************************************************************
# Copyright (C) 2007 Robert Bradshaw
# 2012 Simon King
# 2013 Nils Bruin
#
# Distributed under the terms of the GNU General Public License (GPL)
#
# http://www.gnu.org/licenses/
#*****************************************************************************
"""
Containers for storing coercion data
This module provides :class:`TripleDict` and :class:`MonoDict`. These are
structures similar to :class:`~weakref.WeakKeyDictionary` in Python's weakref
module, and are optimized for lookup speed. The keys for :class:`TripleDict`
consist of triples (k1,k2,k3) and are looked up by identity rather than
equality. The keys are stored by weakrefs if possible. If any one of the
components k1, k2, k3 gets garbage collected, then the entry is removed from
the :class:`TripleDict`.
Key components that do not allow for weakrefs are stored via a normal
refcounted reference. That means that any entry stored using a triple
(k1,k2,k3) so that none of the k1,k2,k3 allows a weak reference behaves
as an entry in a normal dictionary: Its existence in :class:`TripleDict`
prevents it from being garbage collected.
That container currently is used to store coercion and conversion maps between
two parents (:trac:`715`) and to store homsets of pairs of objects of a
category (:trac:`11521`). In both cases, it is essential that the parent
structures remain garbage collectable, it is essential that the data access is
faster than with a usual :class:`~weakref.WeakKeyDictionary`, and we enforce
the "unique parent condition" in Sage (parent structures should be identical
if they are equal).
:class:`MonoDict` behaves similarly, but it takes a single item as a key. It
is used for caching the parents which allow a coercion map into a fixed other
parent (:trac:`12313`).
By :trac:`14159`, :class:`MonoDict` and :class:`TripleDict` can be optionally
used with weak references on the values.
"""
from cpython.list cimport *
from cpython.mem cimport *
from cpython.string cimport PyString_FromString
from libc.string cimport memset
from weakref import KeyedRef
from cpython cimport Py_XINCREF, Py_XDECREF
cdef extern from "Python.h":
Py_ssize_t PyInt_AsSsize_t(PyObject* io)
PyObject* PyWeakref_GetObject(object ref)
PyObject* Py_None
int PyList_SetItem(object list, Py_ssize_t index,PyObject * item) except -1
ctypedef int (*visitproc)(PyObject* ob, void* arg)
ctypedef struct PyTypeObject:
void * tp_traverse
void * tp_clear
cpdef inline Py_ssize_t signed_id(x):
"""
A function like Python's :func:`id` returning *signed* integers,
which are guaranteed to fit in a ``Py_ssize_t``.
Theoretically, there is no guarantee that two different Python
objects have different ``signed_id()`` values. However, under the
mild assumption that a C pointer fits in a ``Py_ssize_t``, this
is guaranteed.
TESTS::
sage: a = 1.23e45 # some object
sage: from sage.structure.coerce_dict import signed_id
sage: s = signed_id(a)
sage: id(a) == s or id(a) == s + 2**32 or id(a) == s + 2**64
True
sage: signed_id(a) <= sys.maxsize
True
"""
return (x)
#it's important that this is not an interned string: this object
#must be a unique sentinel. We could reuse the "dummy" sentinel
#that is defined in python's dictobject.c
cdef object dummy_object = PyString_FromString("dummy")
cdef PyObject* dummy = dummy_object
############################################
# A note about how to store "id" keys in python structures:
#
# We use the type Py_ssize_t to store "id"s generated by the signed_id
# function defined above. Assuming that Py_ssize_t is the same as a C
# long (which is true on most Unix-like systems), this also has the
# advantage that these Py_ssize_t values are stored as a Python "int"
# (as opposed to "long"), which allow for fast conversion to/from C
# types.
#
# There is one place where we have to be careful about signs:
# Our hash values most naturally live in Py_ssize_t. We convert those into
# an index into our bucket list by taking the hash modulo the number of buckets.
# However, the modulo operator in C preserves the sign of the number we take the
# modulus of, which is not what we want.
# The solution is to always do
# ( h) % modulus
# to ensure we're doing an unsigned modulus.
cdef struct mono_cell:
void* key_id
PyObject* key_weakref
PyObject* value
cdef object extract_mono_cell(mono_cell *cell):
#takes the refcounted components from a mono_cell
#and puts them in a list and returns it.
#The mono_cell itself is marked as "freed".
#The refcounts originally accounting for the
#presence in the mono_cell now account for the presence
#in the returned list.
#
#the returned result is only used to throw away:
#an advantage is that the containing list participates
#in CPython's trashcan, which prevents stack overflow
#on large dereffing cascades.
#
#a slight disadvantage is that this routine needs to
#allocate a list (mainly just to be thrown away)
if cell.key_id != NULL and cell.key_id != dummy :
L=PyList_New(2)
PyList_SetItem(L,0,cell.key_weakref)
PyList_SetItem(L,1,cell.value)
cell.key_id=dummy
cell.key_weakref=NULL
cell.value=NULL
return L
else:
raise RuntimeError("unused mono_cell")
cdef struct triple_cell:
void* key_id1
void* key_id2
void* key_id3
PyObject* key_weakref1
PyObject* key_weakref2
PyObject* key_weakref3
PyObject* value
cdef object extract_triple_cell(triple_cell *cell):
#see extract_mono_cell for the rationale
#behind this routine.
if cell.key_id1 != NULL and cell.key_id1 != dummy :
L=PyList_New(4)
PyList_SetItem(L,0,cell.key_weakref1)
PyList_SetItem(L,1,cell.key_weakref2)
PyList_SetItem(L,2,cell.key_weakref3)
PyList_SetItem(L,3,cell.value)
cell.key_id1=dummy
cell.key_id2=NULL
cell.key_id3=NULL
cell.key_weakref1=NULL
cell.key_weakref2=NULL
cell.key_weakref3=NULL
cell.value=NULL
return L
else:
raise RuntimeError("unused triple_cell")
cdef class MonoDict:
"""
This is a hashtable specifically designed for (read) speed in
the coercion model.
It differs from a python WeakKeyDictionary in the following important ways:
- Comparison is done using the 'is' rather than '==' operator.
- Only weak references to the keys are stored if at all possible.
Keys that do not allow for weak references are stored with a normal
refcounted reference.
- The callback of the weak references is safe against recursion, see below.
There are special cdef set/get methods for faster access.
It is bare-bones in the sense that not all dictionary methods are
implemented.
IMPLEMENTATION:
It is implemented as a hash table with open addressing, similar to python's
dict.
If ki supports weak references then ri is a weak reference to ki with a
callback to remove the entry from the dictionary if ki gets garbage
collected. If ki is does not support weak references then ri is identical to ki.
In the latter case the presence of the key in the dictionary prevents it from
being garbage collected.
INPUT:
- ``size`` -- unused parameter, present for backward compatibility.
- ``data`` -- optional iterable defining initial data.
- ``threshold`` -- unused parameter, present for backward compatibility.
- ``weak_values`` -- optional bool (default False). If it is true, weak references
to the values in this dictionary will be used, when possible.
EXAMPLES::
sage: from sage.structure.coerce_dict import MonoDict
sage: L = MonoDict(31)
sage: a = 'a'; b = 'ab'; c = -15
sage: L[a] = 1
sage: L[b] = 2
sage: L[c] = 3
The key is expected to be a unique object. Hence, the item stored for ``c``
can not be obtained by providing another equal number::
sage: L[a]
1
sage: L[b]
2
sage: L[c]
3
sage: L[-15]
Traceback (most recent call last):
...
KeyError: -15
Not all features of Python dictionaries are available, but iteration over
the dictionary items is possible::
sage: # for some reason the following failed in "make ptest"
sage: # on some installations, see #12313 for details
sage: sorted(L.iteritems()) # random layout
[(-15, 3), ('a', 1), ('ab', 2)]
sage: # the following seems to be more consistent
sage: set(L.iteritems())
set([('a', 1), ('ab', 2), (-15, 3)])
sage: del L[c]
sage: sorted(L.iteritems())
[('a', 1), ('ab', 2)]
sage: len(L)
2
sage: for i in range(1000):
... L[i] = i
sage: len(L)
1002
sage: L['a']
1
sage: L['c']
Traceback (most recent call last):
...
KeyError: 'c'
Note that this kind of dictionary is also used for caching actions
and coerce maps. In previous versions of Sage, the cache was by
strong references and resulted in a memory leak in the following
example. However, this leak was fixed by :trac:`715`, using
weak references::
sage: K = GF(1<<55,'t')
sage: for i in range(50):
... a = K.random_element()
... E = EllipticCurve(j=a)
... P = E.random_point()
... Q = 2*P
sage: import gc
sage: n = gc.collect()
sage: from sage.schemes.elliptic_curves.ell_finite_field import EllipticCurve_finite_field
sage: LE = [x for x in gc.get_objects() if isinstance(x, EllipticCurve_finite_field)]
sage: len(LE) # indirect doctest
1
TESTS:
Here, we demonstrate the use of weak values.
::
sage: M = MonoDict(13)
sage: MW = MonoDict(13, weak_values=True)
sage: class Foo: pass
sage: a = Foo()
sage: b = Foo()
sage: k = 1
sage: M[k] = a
sage: MW[k] = b
sage: M[k] is a
True
sage: MW[k] is b
True
sage: k in M
True
sage: k in MW
True
While ``M`` uses a strong reference to ``a``, ``MW`` uses a *weak*
reference to ``b``, and after deleting ``b``, the corresponding item of
``MW`` will be removed during the next garbage collection::
sage: import gc
sage: del a,b
sage: _ = gc.collect()
sage: k in M
True
sage: k in MW
False
sage: len(MW)
0
sage: len(M)
1
Note that ``MW`` also accepts values that do not allow for weak references::
sage: MW[k] = int(5)
sage: MW[k]
5
The following demonstrates that :class:`MonoDict` is safer than
:class:`~weakref.WeakKeyDictionary` against recursions created by nested
callbacks; compare :trac:`15069` (the mechanism used now is different, though)::
sage: M = MonoDict(11)
sage: class A: pass
sage: a = A()
sage: prev = a
sage: for i in range(1000):
....: newA = A()
....: M[prev] = newA
....: prev = newA
sage: len(M)
1000
sage: del a
sage: len(M)
0
The corresponding example with a Python :class:`weakref.WeakKeyDictionary`
would result in a too deep recursion during deletion of the dictionary
items::
sage: import weakref
sage: M = weakref.WeakKeyDictionary()
sage: a = A()
sage: prev = a
sage: for i in range(1000):
....: newA = A()
....: M[prev] = newA
....: prev = newA
sage: len(M)
1000
sage: del a
Exception RuntimeError: 'maximum recursion depth exceeded while calling a Python object' in ignored
sage: len(M)>0
True
Check that also in the presence of circular references, :class:`MonoDict`
gets properly collected::
sage: import gc
sage: def count_type(T):
....: return len([c for c in gc.get_objects() if isinstance(c,T)])
sage: _=gc.collect()
sage: N=count_type(MonoDict)
sage: for i in range(100):
....: V = [ MonoDict(11,{"id":j+100*i}) for j in range(100)]
....: n= len(V)
....: for i in range(n): V[i][V[(i+1)%n]]=(i+1)%n
....: del V
....: _=gc.collect()
....: assert count_type(MonoDict) == N
sage: count_type(MonoDict) == N
True
AUTHORS:
- Simon King (2012-01)
- Nils Bruin (2012-08)
- Simon King (2013-02)
- Nils Bruin (2013-11)
"""
cdef mono_cell* lookup(self, PyObject* key):
"""
This routine is used for all cases where a (potential) spot for
a key is looked up. The returned value is a pointer into the dictionary
store that either contains an entry with the requested key or a free spot
where an entry for that key should go.
"""
cdef size_t perturb
cdef size_t mask = self.mask
cdef mono_cell* table = self.table
cdef mono_cell* cursor
cdef mono_cell* first_freed = NULL
#We seed our starting probe using the higher bits of the key as well.
#Our hash is a memory location, so the bottom bits are likely 0.
cdef size_t i = ((key)>>8+(key))
if key == NULL or key == dummy:
print("Request to look up invalid key")
cursor = &(table[i & mask])
# if the cell was never used, the entry wasn't there
perturb = (key)>>3
#the probing algorithm is heavily inspired by python's own dict.
#there is always at least one NULL entry in the store, and the probe
#sequence eventually covers the entire store (see python's dictobject.c),
#so the loop below does terminate. Since this loop executes only
#straightforward C, we know the table will not change.
while (cursor.key_id != key):
if cursor.key_id == NULL:
return first_freed if (first_freed != NULL) else cursor
if first_freed == NULL and cursor.key_id == dummy:
first_freed = cursor
i = 5*i + perturb +1
cursor = &(table[i & mask])
perturb = perturb >> 5
return cursor
cdef resize(self):
"""
Resize dictionary. That can also mean shrink! Size is always a power of 2.
"""
cdef mono_cell* old_table=self.table
cdef size_t old_mask = self.mask
cdef size_t newsize = 8
cdef size_t minsize = 2*self.used
cdef size_t i
cdef mono_cell* cursor
cdef mono_cell* entry
while newsize < minsize:
newsize = newsize<<1
cdef mono_cell* table = PyMem_Malloc((newsize)*sizeof(mono_cell))
if table == NULL:
raise MemoryError()
memset(table,0,(newsize)*sizeof(mono_cell))
#we're done with memory activity. We can move the new (empty) table into place:
self.table = table
self.mask = newsize-1
self.used = 0
self.fill = 0
#and move all entries over. We're not changing any refcounts here, so this is a very
#tight loop that doesn't need to worry about tables changing.
for i in range(old_mask+1):
entry = &(old_table[i])
if entry.key_id != NULL and entry.key_id != dummy:
cursor=self.lookup((entry.key_id))
assert cursor.key_id == NULL
cursor[0]=entry[0]
self.used +=1
self.fill +=1
PyMem_Free(old_table)
def __init__(self, size=11, data=None, threshold=0.7, weak_values=False):
"""
Create a special dict using singletons for keys.
EXAMPLES::
sage: from sage.structure.coerce_dict import MonoDict
sage: L = MonoDict(31)
sage: a = 'a'
sage: L[a] = 1
sage: L[a]
1
sage: L = MonoDict({a: 1})
sage: L[a]
1
"""
#if only one argument is supplied and it's iterable, use it for data rather than
#for size. This way we're compatible with the old calling sequence (an integer is
#just ignored) and we can also use the more usual construction.
if data is None:
try:
data=size.iteritems()
except AttributeError:
try:
data=iter(size)
except TypeError:
pass
else:
try:
data=data.iteritems()
except AttributeError:
pass
if self.table != NULL:
raise RuntimeError("table already initialized. Called __init__ second time?")
cdef minsize = 8
cdef size_t newsize = 1<<3
while newsize < minsize:
newsize = newsize <<1
self.mask = newsize - 1
cdef mono_cell* table = PyMem_Malloc(newsize*sizeof(mono_cell))
if table == NULL:
raise MemoryError()
memset(table,0,newsize*sizeof(mono_cell))
self.table = table
self.used = 0
self.fill = 0
#our eraser is defined using a closure
def eraser(r):
cdef MonoDict cself = self
#upon table teardown the table could be set to NULL, in which case
#erasing is not required
if cself.table == NULL:
return
cdef mono_cell* cursor = cself.lookup(r.key)
if (cursor.key_id != NULL and cursor.key_id != dummy):
if (cursor.key_weakref == r or cursor.value == r):
L=extract_mono_cell(cursor)
self.used -= 1
else:
#this should probably never happen
raise RuntimeError("eraser: key match but no weakref match")
else:
#this is conceivable, but I haven't seen it happen.
#raise RuntimeError("eraser: no key match")
pass
self.eraser = eraser
self.weak_values = weak_values
if data:
for k,v in data:
self.set(k,v)
def __dealloc__(self):
"""
Ensure that the memory allocated by a MonoDict is properly freed.
"""
#is this required or does this get called anyway if we set tp_clear properly?
#at least it's safe, since MonoDict_clear check if it has already run.
MonoDict_clear(self)
def __len__(self):
"""
The number of items in self.
EXAMPLES::
sage: from sage.structure.coerce_dict import MonoDict
sage: L = MonoDict(37)
sage: a = 'a'; b = 'b'; c = 'c'
sage: L[a] = 1
sage: L[a] = -1 # re-assign
sage: L[b] = 1
sage: L[c] = None
sage: len(L)
3
"""
return self.used
def __contains__(self, k):
"""
Test if the dictionary contains a given key.
EXAMPLES::
sage: from sage.structure.coerce_dict import MonoDict
sage: L = MonoDict(31)
sage: a = 'a'; b = 'ab'; c = 15
sage: L[a] = 1
sage: L[b] = 2
sage: L[c] = 3
sage: c in L # indirect doctest
True
The keys are compared by identity, not by equality. Hence, we have::
sage: c == 15
True
sage: 15 in L
False
"""
cdef mono_cell* cursor = self.lookup(k)
if cursor.key_id == NULL or cursor.key_id == dummy:
return False
r =