callbacks.pyx 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. import logging
  2. from libc.stdint cimport uint32_t, uint64_t
  3. from libc.string cimport memcmp
  4. from lakesuperior.cy_include cimport collections as cc
  5. from lakesuperior.cy_include cimport spookyhash as sph
  6. from lakesuperior.model.base cimport Buffer, buffer_dump
  7. from lakesuperior.model.graph cimport graph
  8. from lakesuperior.model.graph.triple cimport BufferTriple
  9. logger = logging.getLogger(__name__)
  10. cdef int term_cmp_fn(const void* key1, const void* key2):
  11. """
  12. Compare function for two Buffer objects.
  13. :rtype: int
  14. :return: 0 if the byte streams are the same, another integer otherwise.
  15. """
  16. b1 = <Buffer *>key1
  17. b2 = <Buffer *>key2
  18. if b1.sz != b2.sz:
  19. logger.info(f'Sizes differ: {b1.sz} != {b2.sz}. Return 1.')
  20. return 1
  21. cdef int cmp = memcmp(b1.addr, b2.addr, b1.sz)
  22. logger.info(f'term memcmp: {cmp}')
  23. return cmp
  24. cdef int trp_cmp_fn(const void* key1, const void* key2):
  25. """
  26. Compare function for two triples in a set.
  27. s, p, o byte data are compared literally.
  28. :rtype: int
  29. :return: 0 if all three terms point to byte-wise identical data in both
  30. triples.
  31. """
  32. t1 = <BufferTriple *>key1
  33. t2 = <BufferTriple *>key2
  34. diff = (
  35. term_cmp_fn(t1.o, t2.o) or
  36. term_cmp_fn(t1.s, t2.s) or
  37. term_cmp_fn(t1.p, t2.p)
  38. )
  39. logger.info(f'Triples match: {not(diff)}')
  40. return diff
  41. #cdef int trp_cmp_fn(const void* key1, const void* key2):
  42. # """
  43. # Compare function for two triples in a set.
  44. #
  45. # Here, pointers to terms are compared for s, p, o. The pointers should be
  46. # guaranteed to point to unique values (i.e. no two pointers have the same
  47. # term value within a graph).
  48. #
  49. # :rtype: int
  50. # :return: 0 if the addresses of all terms are the same, 1 otherwise.
  51. # """
  52. # t1 = <BufferTriple *>key1
  53. # t2 = <BufferTriple *>key2
  54. #
  55. # cdef int is_not_equal = (
  56. # t1.s.addr != t2.s.addr or
  57. # t1.p.addr != t2.p.addr or
  58. # t1.o.addr != t2.o.addr
  59. # )
  60. #
  61. # logger.info(f'Triples match: {not(is_not_equal)}')
  62. # return is_not_equal
  63. cdef bint graph_eq_fn(graph.SimpleGraph g1, graph.SimpleGraph g2):
  64. """
  65. Compare 2 graphs for equality.
  66. Note that this returns the opposite value than the triple and term
  67. compare functions: 1 (True) if equal, 0 (False) if not.
  68. """
  69. cdef:
  70. void* el
  71. cc.HashSetIter it
  72. cc.hashset_iter_init(&it, g1._triples)
  73. while cc.hashset_iter_next(&it, &el) != cc.CC_ITER_END:
  74. if cc.hashset_contains(g2._triples, el):
  75. return False
  76. return True
  77. cdef size_t term_hash_fn(const void* key, int l, uint32_t seed):
  78. """
  79. Hash function for serialized terms (:py:class:`Buffer` objects)
  80. """
  81. return <size_t>spookyhash_64((<Buffer*>key).addr, (<Buffer*>key).sz, seed)
  82. cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
  83. """
  84. Hash function for sets of (serialized) triples.
  85. This function concatenates the literal terms of the triple as bytes
  86. and computes their hash.
  87. """
  88. trp = <BufferTriple*>key
  89. seed64 = <uint64_t>seed
  90. seed_dummy = seed64
  91. cdef sph.spookyhash_context ctx
  92. sph.spookyhash_context_init(&ctx, seed64, seed_dummy)
  93. sph.spookyhash_update(&ctx, trp.s.addr, trp.s.sz)
  94. sph.spookyhash_update(&ctx, trp.s.addr, trp.p.sz)
  95. sph.spookyhash_update(&ctx, trp.s.addr, trp.o.sz)
  96. sph.spookyhash_final(&ctx, &seed64, &seed_dummy)
  97. return <size_t>seed64
  98. #cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
  99. # """
  100. # Hash function for sets of (serialized) triples.
  101. #
  102. # This function computes the hash of the concatenated pointer values in the
  103. # s, p, o members of the triple. The triple structure is treated as a byte
  104. # string. This is safe in spite of byte-wise struct evaluation being a
  105. # frowned-upon practice (due to padding issues), because it is assumed that
  106. # the input value is always the same type of structure.
  107. # """
  108. # return <size_t>spookyhash_64(key, l, seed)
  109. #cdef size_t hash_ptr_passthrough(const void* key, int l, uint32_t seed):
  110. # """
  111. # No-op function that takes a pointer and does *not* hash it.
  112. #
  113. # The pointer value is used as the "hash".
  114. # """
  115. # return <size_t>key
  116. cdef inline bint lookup_none_cmp_fn(
  117. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  118. ):
  119. """
  120. Dummy callback for queries with all parameters unbound.
  121. This function always returns ``True``
  122. """
  123. return True
  124. cdef inline bint lookup_s_cmp_fn(
  125. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  126. ):
  127. """
  128. Lookup callback compare function for a given ``s`` in a triple.
  129. The function returns ``True`` if ``t1`` matches the first term.
  130. ``t2`` is not used and is declared only for compatibility with the
  131. other interchangeable functions.
  132. """
  133. return not term_cmp_fn(t1, trp[0].s)
  134. cdef inline bint lookup_p_cmp_fn(
  135. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  136. ):
  137. """
  138. Lookup callback compare function for a given ``p`` in a triple.
  139. """
  140. return not term_cmp_fn(t1, trp[0].p)
  141. cdef inline bint lookup_o_cmp_fn(
  142. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  143. ):
  144. """
  145. Lookup callback compare function for a given ``o`` in a triple.
  146. """
  147. return not term_cmp_fn(t1, trp[0].o)
  148. cdef inline bint lookup_sp_cmp_fn(
  149. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  150. ):
  151. """
  152. Lookup callback compare function for a given ``s`` and ``p`` pair.
  153. """
  154. return (
  155. not term_cmp_fn(t1, trp[0].s)
  156. and not term_cmp_fn(t2, trp[0].p))
  157. cdef inline bint lookup_so_cmp_fn(
  158. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  159. ):
  160. """
  161. Lookup callback compare function for a given ``s`` and ``o`` pair.
  162. """
  163. return (
  164. not term_cmp_fn(t1, trp[0].s)
  165. and not term_cmp_fn(t2, trp[0].o))
  166. cdef inline bint lookup_po_cmp_fn(
  167. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  168. ):
  169. """
  170. Lookup callback compare function for a given ``p`` and ``o`` pair.
  171. """
  172. return (
  173. not term_cmp_fn(t1, trp[0].p)
  174. and not term_cmp_fn(t2, trp[0].o))
  175. ## LOOKUP CALLBACK FUNCTIONS
  176. cdef inline void add_trp_callback(
  177. graph.SimpleGraph gr, const BufferTriple* trp, void* ctx
  178. ):
  179. """
  180. Add a triple to a graph as a result of a lookup callback.
  181. """
  182. gr.add_triple(trp, True)
  183. cdef inline void del_trp_callback(
  184. graph.SimpleGraph gr, const BufferTriple* trp, void* ctx
  185. ):
  186. """
  187. Remove a triple from a graph as a result of a lookup callback.
  188. """
  189. logger.info('removing triple: {} {} {}'.format(
  190. buffer_dump(trp.s), buffer_dump(trp.p), buffer_dump(trp.o)
  191. ))
  192. gr.remove_triple(trp)