callbacks.pyx 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. import logging
  2. from libc.stdint cimport uint32_t, uint64_t
  3. from libc.string cimport memcmp
  4. cimport lakesuperior.cy_include.collections as cc
  5. cimport lakesuperior.cy_include.spookyhash as sph
  6. from lakesuperior.model.base cimport Buffer, buffer_dump
  7. from lakesuperior.model.graph cimport graph
  8. from lakesuperior.model.graph.triple cimport BufferTriple
  9. logger = logging.getLogger(__name__)
  10. cdef int term_cmp_fn(const void* key1, const void* key2):
  11. """
  12. Compare function for two Buffer objects.
  13. :rtype: int
  14. :return: 0 if the byte streams are the same, another integer otherwise.
  15. """
  16. b1 = <Buffer *>key1
  17. b2 = <Buffer *>key2
  18. if b1.sz != b2.sz:
  19. return 1
  20. return memcmp(b1.addr, b2.addr, b1.sz)
  21. cdef int trp_cmp_fn(const void* key1, const void* key2):
  22. """
  23. Compare function for two triples in a set.
  24. s, p, o byte data are compared literally.
  25. :rtype: int
  26. :return: 0 if all three terms point to byte-wise identical data in both
  27. triples.
  28. """
  29. t1 = <BufferTriple *>key1
  30. t2 = <BufferTriple *>key2
  31. return (
  32. term_cmp_fn(t1.o, t2.o) or
  33. term_cmp_fn(t1.s, t2.s) or
  34. term_cmp_fn(t1.p, t2.p)
  35. )
  36. #cdef int trp_cmp_fn(const void* key1, const void* key2):
  37. # """
  38. # Compare function for two triples in a set.
  39. #
  40. # Here, pointers to terms are compared for s, p, o. The pointers should be
  41. # guaranteed to point to unique values (i.e. no two pointers have the same
  42. # term value within a graph).
  43. #
  44. # :rtype: int
  45. # :return: 0 if the addresses of all terms are the same, 1 otherwise.
  46. # """
  47. # t1 = <BufferTriple *>key1
  48. # t2 = <BufferTriple *>key2
  49. #
  50. # cdef int is_not_equal = (
  51. # t1.s.addr != t2.s.addr or
  52. # t1.p.addr != t2.p.addr or
  53. # t1.o.addr != t2.o.addr
  54. # )
  55. #
  56. # logger.info(f'Triples match: {not(is_not_equal)}')
  57. # return is_not_equal
  58. cdef bint graph_eq_fn(graph.Graph g1, graph.Graph g2):
  59. """
  60. Compare 2 graphs for equality.
  61. Note that this returns the opposite value than the triple and term
  62. compare functions: 1 (True) if equal, 0 (False) if not.
  63. """
  64. cdef:
  65. void* el
  66. cc.HashSetIter it
  67. cc.hashset_iter_init(&it, g1._triples)
  68. while cc.hashset_iter_next(&it, &el) != cc.CC_ITER_END:
  69. if cc.hashset_contains(g2._triples, el):
  70. return False
  71. return True
  72. cdef size_t term_hash_fn(const void* key, int l, uint32_t seed):
  73. """
  74. Hash function for serialized terms (:py:class:`Buffer` objects)
  75. """
  76. return <size_t>spookyhash_64((<Buffer*>key).addr, (<Buffer*>key).sz, seed)
  77. cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
  78. """
  79. Hash function for sets of (serialized) triples.
  80. This function concatenates the literal terms of the triple as bytes
  81. and computes their hash.
  82. """
  83. trp = <BufferTriple*>key
  84. seed64 = <uint64_t>seed
  85. seed_dummy = seed64
  86. cdef sph.spookyhash_context ctx
  87. sph.spookyhash_context_init(&ctx, seed64, seed_dummy)
  88. sph.spookyhash_update(&ctx, trp.s.addr, trp.s.sz)
  89. sph.spookyhash_update(&ctx, trp.s.addr, trp.p.sz)
  90. sph.spookyhash_update(&ctx, trp.s.addr, trp.o.sz)
  91. sph.spookyhash_final(&ctx, &seed64, &seed_dummy)
  92. return <size_t>seed64
  93. #cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
  94. # """
  95. # Hash function for sets of (serialized) triples.
  96. #
  97. # This function computes the hash of the concatenated pointer values in the
  98. # s, p, o members of the triple. The triple structure is treated as a byte
  99. # string. This is safe in spite of byte-wise struct evaluation being a
  100. # frowned-upon practice (due to padding issues), because it is assumed that
  101. # the input value is always the same type of structure.
  102. # """
  103. # return <size_t>spookyhash_64(key, l, seed)
  104. #cdef size_t hash_ptr_passthrough(const void* key, int l, uint32_t seed):
  105. # """
  106. # No-op function that takes a pointer and does *not* hash it.
  107. #
  108. # The pointer value is used as the "hash".
  109. # """
  110. # return <size_t>key
  111. cdef inline bint lookup_none_cmp_fn(
  112. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  113. ):
  114. """
  115. Dummy callback for queries with all parameters unbound.
  116. This function always returns ``True``
  117. """
  118. return True
  119. cdef inline bint lookup_s_cmp_fn(
  120. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  121. ):
  122. """
  123. Lookup callback compare function for a given ``s`` in a triple.
  124. The function returns ``True`` if ``t1`` matches the first term.
  125. ``t2`` is not used and is declared only for compatibility with the
  126. other interchangeable functions.
  127. """
  128. return not term_cmp_fn(t1, trp[0].s)
  129. cdef inline bint lookup_p_cmp_fn(
  130. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  131. ):
  132. """
  133. Lookup callback compare function for a given ``p`` in a triple.
  134. """
  135. return not term_cmp_fn(t1, trp[0].p)
  136. cdef inline bint lookup_o_cmp_fn(
  137. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  138. ):
  139. """
  140. Lookup callback compare function for a given ``o`` in a triple.
  141. """
  142. return not term_cmp_fn(t1, trp[0].o)
  143. cdef inline bint lookup_sp_cmp_fn(
  144. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  145. ):
  146. """
  147. Lookup callback compare function for a given ``s`` and ``p`` pair.
  148. """
  149. return (
  150. not term_cmp_fn(t1, trp[0].s)
  151. and not term_cmp_fn(t2, trp[0].p))
  152. cdef inline bint lookup_so_cmp_fn(
  153. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  154. ):
  155. """
  156. Lookup callback compare function for a given ``s`` and ``o`` pair.
  157. """
  158. return (
  159. not term_cmp_fn(t1, trp[0].s)
  160. and not term_cmp_fn(t2, trp[0].o))
  161. cdef inline bint lookup_po_cmp_fn(
  162. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  163. ):
  164. """
  165. Lookup callback compare function for a given ``p`` and ``o`` pair.
  166. """
  167. return (
  168. not term_cmp_fn(t1, trp[0].p)
  169. and not term_cmp_fn(t2, trp[0].o))
  170. ## LOOKUP CALLBACK FUNCTIONS
  171. cdef inline void add_trp_callback(
  172. graph.Graph gr, const TripleKey spok, void* ctx
  173. ):
  174. """
  175. Add a triple to a graph as a result of a lookup callback.
  176. """
  177. gr.add(trp)
  178. cdef inline void del_trp_callback(
  179. graph.Graph gr, const TripleKey spok, void* ctx
  180. ):
  181. """
  182. Remove a triple from a graph as a result of a lookup callback.
  183. """
  184. #logger.info('removing triple: {} {} {}'.format(
  185. #buffer_dump(trp.s), buffer_dump(trp.p), buffer_dump(trp.o)
  186. #))
  187. gr.remove(spok)