keyset.pyx 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. from libc.string cimport memcmp
  2. from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
  3. cdef class Keyset:
  4. """
  5. Pre-allocated result set.
  6. Data in the set are stored as a 1D contiguous array of characters.
  7. Access to elements at an arbitrary index position is achieved by using the
  8. ``itemsize`` property multiplied by the index number.
  9. Key properties:
  10. ``ct``: number of elements in the set.
  11. ``itemsize``: size of each element, in bytes. All elements have the same
  12. size.
  13. ``size``: Total size, in bytes, of the data set. This is the product of
  14. ``itemsize`` and ``ct``.
  15. """
  16. def __cinit__(self, size_t ct, unsigned char itemsize):
  17. """
  18. Initialize and allocate memory for the data set.
  19. :param size_t ct: Number of elements to be accounted for.
  20. :param unsigned char itemsize: Size of an individual item.
  21. Note that the ``itemsize`` is an unsigned char,
  22. i.e. an item can be at most 255 bytes. This is for economy reasons,
  23. since many multiplications are done between ``itemsize`` and other
  24. char variables.
  25. """
  26. self.ct = ct
  27. self.itemsize = itemsize
  28. self.size = self.itemsize * self.ct
  29. self._cur = 0
  30. #logger.debug('Got malloc sizes: {}, {}'.format(ct, itemsize))
  31. #logger.debug(
  32. # 'Allocating {0} ({1}x{2}) bytes of Keyset data...'.format(
  33. # self.size, self.ct, self.itemsize))
  34. self.data = <unsigned char *>PyMem_Malloc(ct * itemsize)
  35. if not self.data:
  36. raise MemoryError()
  37. #logger.debug('...done allocating @ {0:x}.'.format(
  38. # <unsigned long>self.data))
  39. def __dealloc__(self):
  40. """
  41. Free the memory.
  42. This is called when the Python instance is garbage collected, which
  43. makes it handy to safely pass a Keyset instance across functions.
  44. """
  45. #logger.debug(
  46. # 'Releasing {0} ({1}x{2}) bytes of Keyset @ {3:x}...'.format(
  47. # self.size, self.ct, self.itemsize,
  48. # <unsigned long>self.data))
  49. PyMem_Free(self.data)
  50. #logger.debug('...done releasing.')
  51. cdef void resize(self, size_t ct) except *:
  52. """
  53. Resize the result set. Uses ``PyMem_Realloc``.
  54. Note that resizing to a smaller size does not copy or reallocate the
  55. data, resizing to a larger size does.
  56. Also, note that only the number of items can be changed, the item size
  57. cannot.
  58. :param size_t ct: Number of items in the result set.
  59. """
  60. cdef unsigned char *tmp
  61. self.ct = ct
  62. self.size = self.itemsize * self.ct
  63. #logger.debug(
  64. # 'Resizing Keyset to {0} ({1}x{2}) bytes @ {3:x}...'.format(
  65. # self.itemsize * ct, ct, self.itemsize,
  66. # <unsigned long>self.data))
  67. tmp = <unsigned char *>PyMem_Realloc(self.data, ct * self.itemsize)
  68. if not tmp:
  69. raise MemoryError()
  70. #logger.debug('...done resizing.')
  71. self.data = tmp
  72. # Access methods.
  73. def to_tuple(self):
  74. """
  75. Return the data set as a Python tuple.
  76. :rtype: tuple
  77. """
  78. return tuple(
  79. self.data[i: i + self.itemsize]
  80. for i in range(0, self.size, self.itemsize))
  81. def get_item_obj(self, i):
  82. """
  83. Get an item at a given index position.
  84. :rtype: bytes
  85. """
  86. if i >= self.ct:
  87. raise ValueError(f'Index {i} out of range.')
  88. return self.get_item(i)[: self.itemsize]
  89. def reset(self):
  90. """
  91. Reset the cursor to the initial position.
  92. """
  93. self._cur = 0
  94. def tell(self):
  95. """
  96. Tell the position of the cursor in the keyset.
  97. """
  98. return self._cur
  99. cdef unsigned char *get_item(self, i):
  100. """
  101. Get an item at a given index position. Cython-level method.
  102. The item size is known by the ``itemsize`` property of the object.
  103. :rtype: unsigned char*
  104. """
  105. self._cur = i
  106. return self.data + self.itemsize * i
  107. cdef bint next(self, void *val):
  108. """
  109. Populate the current value and advance the cursor by 1.
  110. :param void *val: Addres of value returned. It is NULL if
  111. the end of the buffer was reached.
  112. :rtype: bint
  113. :return: True if a value was found, False if the end of the buffer
  114. has been reached.
  115. """
  116. if self._cur >= self.ct:
  117. val = NULL
  118. return False
  119. val = self.data + self.itemsize * self._cur
  120. self._cur += 1
  121. return True
  122. cdef bint contains(self, const void *val):
  123. """
  124. Whether a value exists in the set.
  125. """
  126. cdef void *stored_val
  127. self.reset()
  128. while self.next(stored_val):
  129. if memcmp(val, stored_val, self.itemsize) == 0:
  130. return True
  131. return False