12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879 |
- from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
- from lakesuperior.util.hash cimport HLEN_32, Hash32, hash32
- ctypedef void *SetItem
- ctypedef struct Index:
- size_t *addr
- size_t ct
- cdef class VarSet:
- """
- Variable-size set of variable-size values.
- """
- cdef:
- # Data blob. Stored contibuously in memory, and found by index.
- void *_data
- # Total size of data.
- size_t _data_sz
- # Index used to find start and end of each item.
- Index _index
- # KeySet of hashes of the set items.
- Keyset _hashes
- def __cinit__(self):
- self._data = PyMem_Malloc(0)
- self._hashes = Keyset(0, sizeof(Hash32))
- self._data_sz = 0
- def __dealloc__(self):
- PyMem_Free(self._data)
- cdef int add(self, const SetItem data, Index *idx) except -1:
- """
- Add a number of items.
- The items' content as a blob and their end boundaries must be given
- as an array of ``size_t``.
- """"
- #cdef size_t grow_sz = idx.addr[idx.ct - 1]
- # Last index indicates the position of the last byte
- cdef:
- size_t i, cur = 0, data_exp_sz, hash_exp_sz
- void *_tmp_data
- Hash32 hash
- Buffer msg
- SetItem *item
- # Resize data sets to maximium possible size for this function call.
- _tmp_data = PyMem_Realloc(self._data, idx.addr[idx.ct - 1])
- if not _tmp_data:
- raise MemoryError('Unable to allocate memory for set data.')
- self._hashes.resize(self._hashes.ct + idx.ct)
- for i in idx.ct:
- # Iterate over the items in the index and verify if they can be
- # added if they are not duplicates.
- msg.addr = data + cur
- msg.sz = idx[i] - cur
- hash32(&msg, &hash)
- if not self.hashes.contains(hash):
- # Add to the data.
- memcpy(_tmp_data + i * HLEN_32, msg.addr, msg.sz)
- # Add to the hashes keyset.
- memcpy(self._hashes + self._data_sz, hash, HLEN32)
- # Record the memory expansion.
- self._data_sz += msg.sz
- cur = idx[i]
- # Shrink data back to their actual size.
- self.hashes.resize(cur)
- _tmp_data = PyMem_Malloc(self._data_sz)
- if not _tmp_data :
- raise MemoryError('Unable to allocate memory for set data.')
- self._data = _tmp_data
|