keyset.pyx 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. from libc.string cimport memcmp
  2. from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
  3. from lakesuperior.cy_includes cimport collections as cc
  4. from lakesuperior.model.base cimport (
  5. KeyIdx, Key, DoubleKey, TripleKey, Buffer
  6. )
  7. cdef class BaseKeyset:
  8. """
  9. Pre-allocated result set.
  10. """
  11. def __cinit__(self, size_t ct):
  12. """
  13. Initialize and allocate memory for the data set.
  14. :param size_t ct: Number of elements to be accounted for.
  15. """
  16. self.itemsize = self.get_itemsize() # Set this in concrete classes
  17. cc.array_conf_init(&self.conf)
  18. self.conf.capacity = ct
  19. self.conf.exp_factor = .5
  20. cc.array_init_conf(&self.conf, &self.data)
  21. if not self.data:
  22. raise MemoryError()
  23. def __dealloc__(self):
  24. """
  25. Free the memory.
  26. This is called when the Python instance is garbage collected, which
  27. makes it handy to safely pass a Keyset instance across functions.
  28. """
  29. PyMem_Free(self.data)
  30. # Access methods.
  31. cdef size_t get_itemsize(self):
  32. raise NotImplementedError()
  33. cdef unsigned char *get_item(self, i):
  34. """
  35. Get an item at a given index position. Cython-level method.
  36. The item size is known by the ``itemsize`` property of the object.
  37. :rtype: unsigned char*
  38. """
  39. self._cur = i
  40. return self.data + self.itemsize * i
  41. cdef bint iter_next(self, unsigned char** val):
  42. """
  43. Populate the current value and advance the cursor by 1.
  44. :param void *val: Addres of value returned. It is NULL if
  45. the end of the buffer was reached.
  46. :rtype: bint
  47. :return: True if a value was found, False if the end of the buffer
  48. has been reached.
  49. """
  50. if self._cur >= self.conf.capacity:
  51. val = NULL
  52. return False
  53. val[0] = self.data + self.itemsize * self._cur
  54. self._cur += 1
  55. return True
  56. cdef bint contains(self, const void *val):
  57. """
  58. Whether a value exists in the set.
  59. """
  60. cdef unsigned char* stored_val
  61. self.iter_init()
  62. while self.iter_next(&stored_val):
  63. if memcmp(val, stored_val, self.itemsize) == 0:
  64. return True
  65. return False
  66. class Keyset(BaseKeyset):
  67. cdef size_t get_itemsize():
  68. return KLEN
  69. class DoubleKeyset(BaseKeyset):
  70. cdef size_t get_itemsize():
  71. return DBL_KLEN
  72. class TripleKeyset(BaseKeyset):
  73. cdef size_t get_itemsize():
  74. return TRP_KLEN
  75. cdef TripleKeyset lookup(
  76. self, const KeyIdx* sk, const KeyIdx* pk, const KeyIdx* ok
  77. ):
  78. """
  79. Look up triple keys in a similar way that the ``SimpleGraph`` and
  80. ``LmdbStore`` methods work.
  81. Any and all the terms may be NULL. A NULL term is treated as unbound.
  82. :param const KeyIdx* sk: s key pointer.
  83. :param const KeyIdx* pk: p key pointer.
  84. :param const KeyIdx* ok: o key pointer.
  85. """
  86. cdef:
  87. void* cur
  88. cc.ArrayIter it
  89. TripleKey spok
  90. TripleKeyset ret
  91. KeyIdx bk1 = NULL, bk2 = NULL
  92. cc.array_iter_init(&it, self.data)
  93. if sk and pk and ok: # s p o
  94. pass # TODO
  95. elif sk:
  96. bt1 = sk[0]
  97. if pk: # s p ?
  98. bt2 = pk[0]
  99. cmp_fn = cb.lookup_skpk_cmp_fn
  100. elif ok: # s ? o
  101. bt2 = ok[0]
  102. cmp_fn = cb.lookup_skok_cmp_fn
  103. else: # s ? ?
  104. cmp_fn = cb.lookup_sk_cmp_fn
  105. elif pk:
  106. bt1 = pk[0]
  107. if ok: # ? p o
  108. bt2 = ok[0]
  109. cmp_fn = cb.lookup_pkok_cmp_fn
  110. else: # ? p ?
  111. cmp_fn = cb.lookup_pk_cmp_fn
  112. elif ok: # ? ? o
  113. bt1 = ok[0]
  114. cmp_fn = cb.lookup_ok_cmp_fn
  115. else: # ? ? ?
  116. return self # TODO Placeholder. This should actually return a copy.
  117. ret = TripleKeyset(256) # TODO Totally arbitrary.
  118. while cc.array_iter_next(&it, &cur) != cc.CC_ITER_END:
  119. if cmp_fn(<TripleKey*>spok, t1, t2):
  120. if cc.array_add(ret.data, spok) != cc.CC_OK:
  121. raise RuntimeError('Error adding triple key.')
  122. return ret