|
@@ -0,0 +1,79 @@
|
|
|
+from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
|
|
|
+
|
|
|
+from lakesuperior.util.hash cimport HLEN_32, Hash32, hash32
|
|
|
+
|
|
|
+ctypedef void *SetItem
|
|
|
+ctypedef struct Index:
|
|
|
+ size_t *addr
|
|
|
+ size_t ct
|
|
|
+
|
|
|
+cdef class VarSet:
|
|
|
+ """
|
|
|
+ Variable-size set of variable-size values.
|
|
|
+ """
|
|
|
+ cdef:
|
|
|
+ # Data blob. Stored contibuously in memory, and found by index.
|
|
|
+ void *_data
|
|
|
+ # Total size of data.
|
|
|
+ size_t _data_sz
|
|
|
+ # Index used to find start and end of each item.
|
|
|
+ Index _index
|
|
|
+ # KeySet of hashes of the set items.
|
|
|
+ Keyset _hashes
|
|
|
+
|
|
|
+ def __cinit__(self):
|
|
|
+ self._data = PyMem_Malloc(0)
|
|
|
+ self._hashes = Keyset(0, sizeof(Hash32))
|
|
|
+ self._data_sz = 0
|
|
|
+
|
|
|
+
|
|
|
+ def __dealloc__(self):
|
|
|
+ PyMem_Free(self._data)
|
|
|
+
|
|
|
+
|
|
|
+ cdef int add(self, const SetItem data, Index *idx) except -1:
|
|
|
+ """
|
|
|
+ Add a number of items.
|
|
|
+
|
|
|
+ The items' content as a blob and their end boundaries must be given
|
|
|
+ as an array of ``size_t``.
|
|
|
+ """"
|
|
|
+ #cdef size_t grow_sz = idx.addr[idx.ct - 1]
|
|
|
+ # Last index indicates the position of the last byte
|
|
|
+ cdef:
|
|
|
+ size_t i, cur = 0, data_exp_sz, hash_exp_sz
|
|
|
+ void *_tmp_data
|
|
|
+ Hash32 hash
|
|
|
+ Buffer msg
|
|
|
+ SetItem *item
|
|
|
+
|
|
|
+ # Resize data sets to maximium possible size for this function call.
|
|
|
+ _tmp_data = PyMem_Realloc(self._data, idx.addr[idx.ct - 1])
|
|
|
+ if not _tmp_data:
|
|
|
+ raise MemoryError('Unable to allocate memory for set data.')
|
|
|
+ self._hashes.resize(self._hashes.ct + idx.ct)
|
|
|
+
|
|
|
+ for i in idx.ct:
|
|
|
+ # Iterate over the items in the index and verify if they can be
|
|
|
+ # added if they are not duplicates.
|
|
|
+ msg.addr = data + cur
|
|
|
+ msg.sz = idx[i] - cur
|
|
|
+ hash32(&msg, &hash)
|
|
|
+
|
|
|
+ if not self.hashes.contains(hash):
|
|
|
+ # Add to the data.
|
|
|
+ memcpy(_tmp_data + i * HLEN_32, msg.addr, msg.sz)
|
|
|
+ # Add to the hashes keyset.
|
|
|
+ memcpy(self._hashes + self._data_sz, hash, HLEN32)
|
|
|
+ # Record the memory expansion.
|
|
|
+ self._data_sz += msg.sz
|
|
|
+
|
|
|
+ cur = idx[i]
|
|
|
+
|
|
|
+ # Shrink data back to their actual size.
|
|
|
+ self.hashes.resize(cur)
|
|
|
+ _tmp_data = PyMem_Malloc(self._data_sz)
|
|
|
+ if not _tmp_data :
|
|
|
+ raise MemoryError('Unable to allocate memory for set data.')
|
|
|
+ self._data = _tmp_data
|
|
|
+
|