keyset.pyx 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. import logging
  2. from libc.string cimport memcmp, memcpy
  3. from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
  4. cimport lakesuperior.model.structures.callbacks as cb
  5. from lakesuperior.model.base cimport TripleKey, TRP_KLEN
  6. logger = logging.getLogger(__name__)
  7. cdef class Keyset:
  8. """
  9. Pre-allocated array (not set, as the name may suggest) of ``TripleKey``s.
  10. """
  11. def __cinit__(self, size_t ct=0):
  12. """
  13. Initialize and allocate memory for the data set.
  14. :param size_t ct: Number of elements to be accounted for.
  15. """
  16. self.ct = ct
  17. self.data = <TripleKey*>PyMem_Malloc(self.ct * TRP_KLEN)
  18. if ct and not self.data:
  19. raise MemoryError('Error allocating Keyset data.')
  20. self._cur = 0
  21. self._free_i = 0
  22. def __dealloc__(self):
  23. """
  24. Free the memory.
  25. This is called when the Python instance is garbage collected, which
  26. makes it handy to safely pass a Keyset instance across functions.
  27. """
  28. #logger.debug(
  29. # 'Releasing {0} ({1}x{2}) bytes of Keyset @ {3:x}...'.format(
  30. # self.size, self.conf.capacity, self.itemsize,
  31. # <unsigned long>self.data))
  32. PyMem_Free(self.data)
  33. #logger.debug('...done releasing.')
  34. # Access methods.
  35. cdef void seek(self, size_t idx=0):
  36. """
  37. Place the cursor at a certain index, 0 by default.
  38. """
  39. self._cur = idx
  40. cdef size_t tell(self):
  41. """
  42. Tell the position of the cursor in the keyset.
  43. """
  44. return self._cur
  45. cdef bint get_at(self, size_t i, TripleKey* item):
  46. """
  47. Get an item at a given index position. Cython-level method.
  48. :rtype: TripleKey
  49. """
  50. if i >= self._free_i:
  51. return False
  52. self._cur = i
  53. item[0] = self.data[i]
  54. return True
  55. cdef bint get_next(self, TripleKey* item):
  56. """
  57. Populate the current value and advance the cursor by 1.
  58. :param void *val: Addres of value returned. It is NULL if
  59. the end of the buffer was reached.
  60. :rtype: bint
  61. :return: True if a value was found, False if the end of the buffer
  62. has been reached.
  63. """
  64. if self._cur >= self._free_i:
  65. return False
  66. item[0] = self.data[self._cur]
  67. self._cur += 1
  68. return True
  69. cdef void add(self, const TripleKey* val) except *:
  70. """
  71. Add a triple key to the array.
  72. """
  73. if self._free_i >= self.ct:
  74. raise MemoryError('No slots left in key set.')
  75. self.data[self._free_i] = val[0]
  76. self._free_i += 1
  77. cdef bint contains(self, const TripleKey* val):
  78. """
  79. Whether a value exists in the set.
  80. """
  81. cdef TripleKey stored_val
  82. self.seek()
  83. while self.get_next(&stored_val):
  84. if memcmp(val, stored_val, TRP_KLEN) == 0:
  85. return True
  86. return False
  87. cdef Keyset copy(self):
  88. """
  89. Copy a Keyset.
  90. """
  91. cdef Keyset new_ks = Keyset(self.ct)
  92. memcpy(new_ks.data, self.data, self.ct * TRP_KLEN)
  93. new_ks.seek()
  94. return new_ks
  95. cdef void resize(self, size_t size=0) except *:
  96. """
  97. Change the array capacity.
  98. :param size_t size: The new capacity size. If not specified or 0, the
  99. array is shrunk to the last used item. The resulting size
  100. therefore will always be greater than 0. The only exception
  101. to this is if the specified size is 0 and no items have been added
  102. to the array, in which case the array will be effectively shrunk
  103. to 0.
  104. """
  105. if not size:
  106. size = self._free_i
  107. tmp = <TripleKey*>PyMem_Realloc(self.data, size * TRP_KLEN)
  108. if not tmp:
  109. raise MemoryError('Could not reallocate Keyset data.')
  110. self.data = tmp
  111. self.ct = size
  112. self.seek()
  113. cdef Keyset lookup(
  114. self, const Key* sk, const Key* pk, const Key* ok
  115. ):
  116. """
  117. Look up triple keys.
  118. This works in a similar way that the ``SimpleGraph`` and ``LmdbStore``
  119. methods work.
  120. Any and all the terms may be NULL. A NULL term is treated as unbound.
  121. :param const Key* sk: s key pointer.
  122. :param const Key* pk: p key pointer.
  123. :param const Key* ok: o key pointer.
  124. """
  125. cdef:
  126. TripleKey spok
  127. Keyset ret = Keyset(self.ct)
  128. Key* k1 = NULL
  129. Key* k2 = NULL
  130. key_cmp_fn_t cmp_fn
  131. if sk and pk and ok: # s p o
  132. pass # TODO
  133. elif sk:
  134. k1 = sk
  135. if pk: # s p ?
  136. k2 = pk
  137. cmp_fn = cb.lookup_skpk_cmp_fn
  138. elif ok: # s ? o
  139. k2 = ok
  140. cmp_fn = cb.lookup_skok_cmp_fn
  141. else: # s ? ?
  142. cmp_fn = cb.lookup_sk_cmp_fn
  143. elif pk:
  144. k1 = pk
  145. if ok: # ? p o
  146. k2 = ok
  147. cmp_fn = cb.lookup_pkok_cmp_fn
  148. else: # ? p ?
  149. cmp_fn = cb.lookup_pk_cmp_fn
  150. elif ok: # ? ? o
  151. k1 = ok
  152. cmp_fn = cb.lookup_ok_cmp_fn
  153. else: # ? ? ?
  154. return self.copy()
  155. self.seek()
  156. while self.get_next(&spok):
  157. if cmp_fn(<TripleKey*>spok, k1, k2):
  158. ret.add(&spok)
  159. ret.resize()
  160. return ret