graph.pyx 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
  2. from lakesuperior.util.hash cimport HLEN_32, Hash32, hash32
  3. ctypedef void *SetItem
  4. ctypedef struct Index:
  5. size_t *addr
  6. size_t ct
  7. cdef class VarSet:
  8. """
  9. Variable-size set of variable-size values.
  10. """
  11. cdef:
  12. # Data blob. Stored contibuously in memory, and found by index.
  13. void *_data
  14. # Total size of data.
  15. size_t _data_sz
  16. # Index used to find start and end of each item.
  17. Index _index
  18. # KeySet of hashes of the set items.
  19. Keyset _hashes
  20. def __cinit__(self):
  21. self._data = PyMem_Malloc(0)
  22. self._hashes = Keyset(0, sizeof(Hash32))
  23. self._data_sz = 0
  24. def __dealloc__(self):
  25. PyMem_Free(self._data)
  26. cdef int add(self, const SetItem data, Index *idx) except -1:
  27. """
  28. Add a number of items.
  29. The items' content as a blob and their end boundaries must be given
  30. as an array of ``size_t``.
  31. """"
  32. #cdef size_t grow_sz = idx.addr[idx.ct - 1]
  33. # Last index indicates the position of the last byte
  34. cdef:
  35. size_t i, cur = 0, data_exp_sz, hash_exp_sz
  36. void *_tmp_data
  37. Hash32 hash
  38. Buffer msg
  39. SetItem *item
  40. # Resize data sets to maximium possible size for this function call.
  41. _tmp_data = PyMem_Realloc(self._data, idx.addr[idx.ct - 1])
  42. if not _tmp_data:
  43. raise MemoryError('Unable to allocate memory for set data.')
  44. self._hashes.resize(self._hashes.ct + idx.ct)
  45. for i in idx.ct:
  46. # Iterate over the items in the index and verify if they can be
  47. # added if they are not duplicates.
  48. msg.addr = data + cur
  49. msg.sz = idx[i] - cur
  50. hash32(&msg, &hash)
  51. if not self.hashes.contains(hash):
  52. # Add to the data.
  53. memcpy(_tmp_data + i * HLEN_32, msg.addr, msg.sz)
  54. # Add to the hashes keyset.
  55. memcpy(self._hashes + self._data_sz, hash, HLEN32)
  56. # Record the memory expansion.
  57. self._data_sz += msg.sz
  58. cur = idx[i]
  59. # Shrink data back to their actual size.
  60. self.hashes.resize(cur)
  61. _tmp_data = PyMem_Malloc(self._data_sz)
  62. if not _tmp_data :
  63. raise MemoryError('Unable to allocate memory for set data.')
  64. self._data = _tmp_data