keyset.pyx 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. import logging
  2. from libc.string cimport memcmp, memcpy
  3. from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
  4. cimport lakesuperior.model.structures.callbacks as cb
  5. from lakesuperior.model.base cimport TripleKey, TRP_KLEN
  6. logger = logging.getLogger(__name__)
  7. cdef class Keyset:
  8. """
  9. Pre-allocated array (not set, as the name may suggest) of ``TripleKey``s.
  10. """
  11. def __cinit__(self, size_t ct=0, expand_ratio=.5, *args, **kwargs):
  12. """
  13. Initialize and allocate memory for the data set.
  14. :param size_t ct: Number of elements to be accounted for.
  15. """
  16. self.ct = ct
  17. self.expand_ratio = expand_ratio
  18. self.data = <TripleKey*>PyMem_Malloc(self.ct * TRP_KLEN)
  19. if ct and not self.data:
  20. raise MemoryError('Error allocating Keyset data.')
  21. self._cur = 0
  22. self._free_i = 0
  23. def __dealloc__(self):
  24. """
  25. Free the memory.
  26. This is called when the Python instance is garbage collected, which
  27. makes it handy to safely pass a Keyset instance across functions.
  28. """
  29. PyMem_Free(self.data)
  30. # Access methods.
  31. cdef void seek(self, size_t idx=0):
  32. """
  33. Place the cursor at a certain index, 0 by default.
  34. """
  35. self._cur = idx
  36. cdef size_t tell(self):
  37. """
  38. Tell the position of the cursor in the keyset.
  39. """
  40. return self._cur
  41. cdef bint get_at(self, size_t i, TripleKey* item):
  42. """
  43. Get an item at a given index position. Cython-level method.
  44. :rtype: TripleKey
  45. """
  46. if i >= self._free_i:
  47. return False
  48. self._cur = i
  49. item[0] = self.data[i]
  50. return True
  51. cdef bint get_next(self, TripleKey* item):
  52. """
  53. Populate the current value and advance the cursor by 1.
  54. :param void *val: Addres of value returned. It is NULL if
  55. the end of the buffer was reached.
  56. :rtype: bint
  57. :return: True if a value was found, False if the end of the buffer
  58. has been reached.
  59. """
  60. if self._cur >= self._free_i:
  61. return False
  62. item[0] = self.data[self._cur]
  63. self._cur += 1
  64. return True
  65. cdef void add(self, const TripleKey* val, check_dup=False) except *:
  66. """
  67. Add a triple key to the array.
  68. """
  69. # Optionally check for duplicates.
  70. if check_dup and self.contains(val):
  71. return
  72. if self._free_i >= self.threshod:
  73. if self.expand_ratio > 0:
  74. # In some edge casees, a very small ratio may round down to a
  75. # zero increase, so the baseline increase is 1 element.
  76. self.resize(1 + self.ct * (1 + self.expand_ratio))
  77. else:
  78. raise MemoryError('No space left in key set.')
  79. self.data[self._free_i] = val[0]
  80. self._free_i += 1
  81. cdef bint contains(self, const TripleKey* val):
  82. """
  83. Whether a value exists in the set.
  84. """
  85. cdef TripleKey stored_val
  86. self.seek()
  87. while self.get_next(&stored_val):
  88. if memcmp(val, stored_val, TRP_KLEN) == 0:
  89. return True
  90. return False
  91. cdef Keyset copy(self):
  92. """
  93. Copy a Keyset.
  94. """
  95. cdef Keyset new_ks = Keyset(self.ct)
  96. memcpy(new_ks.data, self.data, self.ct * TRP_KLEN)
  97. new_ks.seek()
  98. return new_ks
  99. cdef void resize(self, size_t size=0) except *:
  100. """
  101. Change the array capacity.
  102. :param size_t size: The new capacity size. If not specified or 0, the
  103. array is shrunk to the last used item. The resulting size
  104. therefore will always be greater than 0. The only exception
  105. to this is if the specified size is 0 and no items have been added
  106. to the array, in which case the array will be effectively shrunk
  107. to 0.
  108. """
  109. if not size:
  110. size = self._free_i
  111. tmp = <TripleKey*>PyMem_Realloc(self.data, size * TRP_KLEN)
  112. if not tmp:
  113. raise MemoryError('Could not reallocate Keyset data.')
  114. self.data = tmp
  115. self.ct = size
  116. self.seek()
  117. cdef Keyset lookup(
  118. self, const Key* sk, const Key* pk, const Key* ok
  119. ):
  120. """
  121. Look up triple keys.
  122. This works in a similar way that the ``SimpleGraph`` and ``LmdbStore``
  123. methods work.
  124. Any and all the terms may be NULL. A NULL term is treated as unbound.
  125. :param const Key* sk: s key pointer.
  126. :param const Key* pk: p key pointer.
  127. :param const Key* ok: o key pointer.
  128. """
  129. cdef:
  130. TripleKey spok
  131. Keyset ret = Keyset(self.ct)
  132. Key* k1 = NULL
  133. Key* k2 = NULL
  134. key_cmp_fn_t cmp_fn
  135. if sk and pk and ok: # s p o
  136. pass # TODO
  137. elif sk:
  138. k1 = sk
  139. if pk: # s p ?
  140. k2 = pk
  141. cmp_fn = cb.lookup_skpk_cmp_fn
  142. elif ok: # s ? o
  143. k2 = ok
  144. cmp_fn = cb.lookup_skok_cmp_fn
  145. else: # s ? ?
  146. cmp_fn = cb.lookup_sk_cmp_fn
  147. elif pk:
  148. k1 = pk
  149. if ok: # ? p o
  150. k2 = ok
  151. cmp_fn = cb.lookup_pkok_cmp_fn
  152. else: # ? p ?
  153. cmp_fn = cb.lookup_pk_cmp_fn
  154. elif ok: # ? ? o
  155. k1 = ok
  156. cmp_fn = cb.lookup_ok_cmp_fn
  157. else: # ? ? ?
  158. return self.copy()
  159. self.seek()
  160. while self.get_next(&spok):
  161. if cmp_fn(<TripleKey*>spok, k1, k2):
  162. ret.add(&spok)
  163. ret.resize()
  164. return ret