callbacks.pyx 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. import logging
  2. from libc.stdint cimport uint32_t, uint64_t
  3. from libc.string cimport memcmp
  4. cimport lakesuperior.cy_include.collections as cc
  5. cimport lakesuperior.cy_include.spookyhash as sph
  6. from lakesuperior.model.base cimport Buffer, buffer_dump
  7. from lakesuperior.model.graph cimport graph
  8. from lakesuperior.model.graph.triple cimport BufferTriple
  9. logger = logging.getLogger(__name__)
  10. cdef inline int term_cmp_fn(const void* key1, const void* key2):
  11. """
  12. Compare function for two Buffer objects.
  13. :rtype: int
  14. :return: 0 if the byte streams are the same, another integer otherwise.
  15. """
  16. b1 = <Buffer *>key1
  17. b2 = <Buffer *>key2
  18. if b1.sz != b2.sz:
  19. #logger.info(f'Sizes differ: {b1.sz} != {b2.sz}. Return 1.')
  20. return 1
  21. return memcmp(b1.addr, b2.addr, b1.sz)
  22. cdef inline int trp_cmp_fn(const void* key1, const void* key2):
  23. """
  24. Compare function for two triples in a set.
  25. s, p, o byte data are compared literally.
  26. :rtype: int
  27. :return: 0 if all three terms point to byte-wise identical data in both
  28. triples.
  29. """
  30. t1 = <BufferTriple *>key1
  31. t2 = <BufferTriple *>key2
  32. # Compare in order of probability (largest sets first).
  33. return (
  34. term_cmp_fn(t1.o, t2.o) or
  35. term_cmp_fn(t1.s, t2.s) or
  36. term_cmp_fn(t1.p, t2.p)
  37. )
  38. #cdef int trp_cmp_fn(const void* key1, const void* key2):
  39. # """
  40. # Compare function for two triples in a set.
  41. #
  42. # Here, pointers to terms are compared for s, p, o. The pointers should be
  43. # guaranteed to point to unique values (i.e. no two pointers have the same
  44. # term value within a graph).
  45. #
  46. # :rtype: int
  47. # :return: 0 if the addresses of all terms are the same, 1 otherwise.
  48. # """
  49. # t1 = <BufferTriple *>key1
  50. # t2 = <BufferTriple *>key2
  51. #
  52. # cdef int is_not_equal = (
  53. # t1.s.addr != t2.s.addr or
  54. # t1.p.addr != t2.p.addr or
  55. # t1.o.addr != t2.o.addr
  56. # )
  57. #
  58. # logger.info(f'Triples match: {not(is_not_equal)}')
  59. # return is_not_equal
  60. cdef bint graph_eq_fn(graph.SimpleGraph g1, graph.SimpleGraph g2):
  61. """
  62. Compare 2 graphs for equality.
  63. Note that this returns the opposite value than the triple and term
  64. compare functions: 1 (True) if equal, 0 (False) if not.
  65. """
  66. cdef:
  67. void* el
  68. cc.HashSetIter it
  69. cc.hashset_iter_init(&it, g1._triples)
  70. while cc.hashset_iter_next(&it, &el) != cc.CC_ITER_END:
  71. if cc.hashset_contains(g2._triples, el):
  72. return False
  73. return True
  74. cdef size_t term_hash_fn(const void* key, int l, uint32_t seed):
  75. """
  76. Hash function for serialized terms (:py:class:`Buffer` objects)
  77. """
  78. return <size_t>spookyhash_64((<Buffer*>key).addr, (<Buffer*>key).sz, seed)
  79. cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
  80. """
  81. Hash function for sets of (serialized) triples.
  82. This function concatenates the literal terms of the triple as bytes
  83. and computes their hash.
  84. """
  85. trp = <BufferTriple*>key
  86. seed64 = <uint64_t>seed
  87. seed_dummy = seed64
  88. cdef sph.spookyhash_context ctx
  89. sph.spookyhash_context_init(&ctx, seed64, seed_dummy)
  90. sph.spookyhash_update(&ctx, trp.s.addr, trp.s.sz)
  91. sph.spookyhash_update(&ctx, trp.s.addr, trp.p.sz)
  92. sph.spookyhash_update(&ctx, trp.s.addr, trp.o.sz)
  93. sph.spookyhash_final(&ctx, &seed64, &seed_dummy)
  94. return <size_t>seed64
  95. #cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
  96. # """
  97. # Hash function for sets of (serialized) triples.
  98. #
  99. # This function computes the hash of the concatenated pointer values in the
  100. # s, p, o members of the triple. The triple structure is treated as a byte
  101. # string. This is safe in spite of byte-wise struct evaluation being a
  102. # frowned-upon practice (due to padding issues), because it is assumed that
  103. # the input value is always the same type of structure.
  104. # """
  105. # return <size_t>spookyhash_64(key, l, seed)
  106. #cdef size_t hash_ptr_passthrough(const void* key, int l, uint32_t seed):
  107. # """
  108. # No-op function that takes a pointer and does *not* hash it.
  109. #
  110. # The pointer value is used as the "hash".
  111. # """
  112. # return <size_t>key
  113. cdef inline bint lookup_none_cmp_fn(
  114. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  115. ):
  116. """
  117. Dummy callback for queries with all parameters unbound.
  118. This function always returns ``True``
  119. """
  120. return True
  121. cdef inline bint lookup_s_cmp_fn(
  122. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  123. ):
  124. """
  125. Lookup callback compare function for a given ``s`` in a triple.
  126. The function returns ``True`` if ``t1`` matches the first term.
  127. ``t2`` is not used and is declared only for compatibility with the
  128. other interchangeable functions.
  129. """
  130. return not term_cmp_fn(t1, trp[0].s)
  131. cdef inline bint lookup_p_cmp_fn(
  132. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  133. ):
  134. """
  135. Lookup callback compare function for a given ``p`` in a triple.
  136. """
  137. return not term_cmp_fn(t1, trp[0].p)
  138. cdef inline bint lookup_o_cmp_fn(
  139. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  140. ):
  141. """
  142. Lookup callback compare function for a given ``o`` in a triple.
  143. """
  144. return not term_cmp_fn(t1, trp[0].o)
  145. cdef inline bint lookup_sp_cmp_fn(
  146. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  147. ):
  148. """
  149. Lookup callback compare function for a given ``s`` and ``p`` pair.
  150. """
  151. return (
  152. not term_cmp_fn(t1, trp[0].s)
  153. and not term_cmp_fn(t2, trp[0].p))
  154. cdef inline bint lookup_so_cmp_fn(
  155. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  156. ):
  157. """
  158. Lookup callback compare function for a given ``s`` and ``o`` pair.
  159. """
  160. return (
  161. not term_cmp_fn(t1, trp[0].s)
  162. and not term_cmp_fn(t2, trp[0].o))
  163. cdef inline bint lookup_po_cmp_fn(
  164. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  165. ):
  166. """
  167. Lookup callback compare function for a given ``p`` and ``o`` pair.
  168. """
  169. return (
  170. not term_cmp_fn(t1, trp[0].p)
  171. and not term_cmp_fn(t2, trp[0].o))
  172. ## LOOKUP CALLBACK FUNCTIONS
  173. cdef inline void add_trp_callback(
  174. graph.SimpleGraph gr, const BufferTriple* trp, void* ctx
  175. ):
  176. """
  177. Add a triple to a graph as a result of a lookup callback.
  178. """
  179. gr.add_triple(trp, True)
  180. cdef inline void del_trp_callback(
  181. graph.SimpleGraph gr, const BufferTriple* trp, void* ctx
  182. ):
  183. """
  184. Remove a triple from a graph as a result of a lookup callback.
  185. """
  186. #logger.info('removing triple: {} {} {}'.format(
  187. #buffer_dump(trp.s), buffer_dump(trp.p), buffer_dump(trp.o)
  188. #))
  189. gr.remove_triple(trp)