store_htable.c 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. #include "store_htable.h"
  2. // Assume VERY coarsly that the number of unique terms will be in general
  3. // 1.7 times the number of triples. This is conservative to maintain load
  4. // factor low.
  5. #define IDX_SIZE_RATIO 1.7
  6. /**
  7. * Callback type for key comparison.
  8. */
  9. typedef bool (*LSUP_key_eq_fn_t)(
  10. const LSUP_Key spok[], const LSUP_Key luk[]);
  11. typedef struct HTStore {
  12. LSUP_HTable *keys;
  13. LSUP_HTable *idx; // Dictionary of keys to serialized terms
  14. } HTStore;
  15. typedef struct HTIterator {
  16. HTStore * store; // Store being iterated.
  17. LSUP_HTable * ht; // Hash table to look up.
  18. htsize_t cur; // Lookup cursor.
  19. LSUP_Key luk[3]; // 0÷3 lookup keys.
  20. LSUP_key_eq_fn_t eq_fn; // Equality function to test triples.
  21. int rc; // Return code for *next* result.
  22. LSUP_Key * spok; // Retrieved SPO key.
  23. } HTIterator;
  24. /**
  25. * Identity hashing function.
  26. *
  27. * Since the key is already a strong hash, reuse it for bucket allocation.
  28. */
  29. static inline uint64_t id_hash_fn(const void *key, ksize_t size, uint64_t seed)
  30. { return *(uint64_t*)key; }
  31. /**
  32. * General XX64 hash. Strong (non-crypto) and extremely fast.
  33. */
  34. static inline uint64_t xx64_hash_fn(
  35. const void *key, ksize_t size, uint64_t seed)
  36. { return XXH64(key, size, seed); }
  37. static inline bool buffer_eq_fn(const void *a, const void *b, ksize_t size)
  38. { return memcmp(a, b, size) == 0; }
  39. /* * * CALLBACKS * * */
  40. /**
  41. * Dummy callback for queries with all parameters unbound. Returns true.
  42. */
  43. static bool lookup_none_eq_fn(
  44. const LSUP_Key spok[], const LSUP_Key luk[])
  45. { return true; }
  46. /**
  47. * Keyset lookup for S key.
  48. */
  49. static bool lookup_sk_eq_fn(
  50. const LSUP_Key spok[], const LSUP_Key luk[])
  51. { return spok[0] == luk[0]; }
  52. /**
  53. * Keyset lookup for P key.
  54. */
  55. static bool lookup_pk_eq_fn(
  56. const LSUP_Key spok[], const LSUP_Key luk[])
  57. { return spok[1] == luk[0]; }
  58. /**
  59. * Keyset lookup for O key.
  60. */
  61. static bool lookup_ok_eq_fn(
  62. const LSUP_Key spok[], const LSUP_Key luk[])
  63. { return spok[2] == luk[0]; }
  64. /**
  65. * Keyset lookup for S and P keys.
  66. */
  67. static bool lookup_spk_eq_fn(
  68. const LSUP_Key spok[], const LSUP_Key luk[])
  69. { return spok[0] == luk[0] && spok[1] == luk[1]; }
  70. /**
  71. * Keyset lookup for S and O keys.
  72. */
  73. static bool lookup_sok_eq_fn(
  74. const LSUP_Key spok[], const LSUP_Key luk[])
  75. { return spok[0] == luk[0] && spok[2] == luk[1]; }
  76. /**
  77. * Keyset lookup for P and O keys.
  78. */
  79. static bool lookup_pok_eq_fn(
  80. const LSUP_Key spok[], const LSUP_Key luk[])
  81. { return spok[1] == luk[0] && spok[2] == luk[1]; }
  82. /* * * Other prototypes. * * */
  83. static inline LSUP_rc htiter_next_key(HTIterator *it);
  84. /* * * API * * */
  85. LSUP_rc
  86. LSUP_htstore_new(size_t capacity, HTStore **ht_p)
  87. {
  88. HTStore *ht;
  89. CRITICAL(ht = malloc(sizeof(HTStore)));
  90. *ht_p = ht;
  91. LSUP_rc rc = LSUP_htable_new(
  92. capacity, TRP_KLEN, 0, xx64_hash_fn, buffer_eq_fn, &ht->keys);
  93. if (rc != LSUP_OK) return rc;
  94. rc = LSUP_htable_new(
  95. capacity * IDX_SIZE_RATIO, sizeof(uint64_t), sizeof(uintptr_t),
  96. xx64_hash_fn, buffer_eq_fn, &ht->idx);
  97. return rc;
  98. }
  99. void
  100. LSUP_htstore_free(HTStore *ht)
  101. {
  102. if (!ht) return;
  103. LSUP_htable_free(ht->keys);
  104. // Free up index entries and index.
  105. htsize_t cur = 0;
  106. LSUP_TripleKey spok;
  107. LSUP_Buffer *sterm;
  108. while(LSUP_htable_iter(
  109. ht->idx, &cur, (void**)&spok, (void**)&sterm) == LSUP_OK) {
  110. TRACE("Freeing indexed term buffer #%d at %p", cur, sterm);
  111. LSUP_buffer_done(sterm);
  112. }
  113. LSUP_htable_free(ht->idx);
  114. free(ht);
  115. }
  116. htsize_t
  117. LSUP_htstore_size(LSUP_HTStore *ht)
  118. { return LSUP_htable_size(ht->keys); }
  119. htsize_t
  120. LSUP_htstore_capacity(const LSUP_HTStore *ht)
  121. { return LSUP_htable_capacity(ht->keys); }
  122. LSUP_rc
  123. LSUP_htstore_resize(HTStore *ht, htsize_t size)
  124. {
  125. LSUP_rc rc = LSUP_htable_resize(ht->keys, size);
  126. if (rc != LSUP_OK) return rc;
  127. return LSUP_htable_resize(ht->idx, size * IDX_SIZE_RATIO);
  128. }
  129. LSUP_rc
  130. LSUP_htstore_add(HTStore *store, const LSUP_SerTriple *sspo)
  131. {
  132. LSUP_TripleKey spok = NULL_TRP;
  133. // Add term to index.
  134. for (int i = 0; i < 3; i++) {
  135. spok[i] = LSUP_sterm_to_key(LSUP_striple_pos(sspo, i));
  136. TRACE("Indexing term key %lu\n", spok[i]);
  137. // If term is already in the index, discard and free it.
  138. if (LSUP_htable_get(store->idx, spok + i, NULL) == LSUP_OK)
  139. LSUP_htable_put(store->idx, spok + i, LSUP_striple_pos(sspo, i));
  140. }
  141. // Add triple.
  142. TRACE("Inserting spok: {%lx, %lx, %lx}", spok[0], spok[1], spok[2]);
  143. return LSUP_htable_put(store->keys, spok, NULL);
  144. }
  145. LSUP_rc
  146. LSUP_htstore_remove(
  147. LSUP_HTStore *store, const LSUP_SerTriple *sspo, size_t *ct)
  148. {
  149. LSUP_HTIterator *it;
  150. LSUP_rc rc = LSUP_htstore_lookup(store, sspo, &it, ct);
  151. if (UNLIKELY (rc != LSUP_OK)) return rc;
  152. *ct = 0;
  153. while (htiter_next_key (it)) {
  154. rc = LSUP_htable_remove(store->keys, it->spok);
  155. if (UNLIKELY (rc < 0)) return rc;
  156. (*ct) ++;
  157. }
  158. // TODO clean up orphan indices in separate function.
  159. return LSUP_OK;
  160. }
  161. LSUP_rc
  162. LSUP_htstore_lookup(
  163. HTStore *store, const LSUP_SerTriple *sspo,
  164. HTIterator **it_p, size_t *ct)
  165. {
  166. if (LSUP_htable_size(store->keys) == 0)
  167. return LSUP_NOACTION;
  168. LSUP_TripleKey spok = {
  169. LSUP_sterm_to_key(sspo->s),
  170. LSUP_sterm_to_key(sspo->p),
  171. LSUP_sterm_to_key(sspo->o),
  172. };
  173. HTIterator *it;
  174. CRITICAL(it = malloc(sizeof(HTIterator)));
  175. it->store = store;
  176. it->cur = 0;
  177. *it_p = it;
  178. // s p o
  179. if (spok[0] != NULL_KEY && spok[1] != NULL_KEY && spok[2] != NULL_KEY) {
  180. memcpy(it->luk, spok, sizeof(LSUP_TripleKey));
  181. it->eq_fn = NULL;
  182. } else if (spok[0] != NULL_KEY) {
  183. it->luk[0] = spok[0];
  184. // s p ?
  185. if (spok[1] != NULL_KEY) {
  186. it->luk[1] = spok[1];
  187. it->eq_fn = lookup_spk_eq_fn;
  188. // s ? o
  189. } else if (spok[2] != NULL_KEY) {
  190. it->luk[1] = spok[2];
  191. it->eq_fn = lookup_sok_eq_fn;
  192. // s ? ?
  193. } else {
  194. it->eq_fn = lookup_sk_eq_fn;
  195. }
  196. } else if (spok[1] != NULL_KEY) {
  197. it->luk[0] = spok[1];
  198. // ? p o
  199. if (spok[2] != NULL_KEY) {
  200. it->luk[1] = spok[2];
  201. it->eq_fn = lookup_pok_eq_fn;
  202. // ? p ?
  203. } else it->eq_fn = lookup_pk_eq_fn;
  204. // ? ? o
  205. } else if (spok[2] != NULL_KEY) {
  206. it->luk[0] = spok[2];
  207. it->eq_fn = lookup_ok_eq_fn;
  208. // ? ? ?
  209. } else it->eq_fn = lookup_none_eq_fn;
  210. it->rc = LSUP_htable_iter(
  211. it->store->keys, &it->cur, (void**)&it->spok, NULL);
  212. return it->rc >= 0 ? LSUP_OK : it->rc;
  213. }
  214. static inline LSUP_rc
  215. htiter_next_key(HTIterator *it)
  216. {
  217. for (;;) {
  218. if (it->rc != LSUP_OK) return it->rc;
  219. if (it->eq_fn(it->spok, it->luk)) return LSUP_OK;
  220. it->rc = LSUP_htable_iter(
  221. it->store->keys, &it->cur, (void**)&it->spok, NULL);
  222. }
  223. }
  224. LSUP_rc
  225. LSUP_htiter_next(HTIterator *it, LSUP_SerTriple *sspo)
  226. {
  227. LSUP_rc rc = htiter_next_key(it);
  228. if (UNLIKELY (rc != LSUP_OK)) return rc;
  229. for (int i = 0; i < 3; i++)
  230. LSUP_htable_get(
  231. it->store->idx, (void*)it->spok[i],
  232. (void**)(LSUP_striple_pos(sspo, i)));
  233. return rc;
  234. }
  235. void
  236. LSUP_htiter_free(LSUP_HTIterator *it)
  237. { free(it); }
  238. LSUP_rc
  239. LSUP_htstore_bool_op(
  240. const LSUP_bool_op op, const HTStore *s1, const HTStore *s2,
  241. HTStore **dest_p)
  242. {
  243. HTStore *dest;
  244. htsize_t cur;
  245. void *key, *val;
  246. LSUP_htstore_new(0, &dest);
  247. *dest_p = dest;
  248. if (UNLIKELY (
  249. op != LSUP_BOOL_UNION
  250. && op != LSUP_BOOL_SUBTRACTION
  251. && op != LSUP_BOOL_INTERSECTION
  252. && op != LSUP_BOOL_XOR)) return LSUP_VALUE_ERR;
  253. if (op == LSUP_BOOL_UNION) {
  254. LSUP_htable_copy(s1->keys, &dest->keys);
  255. while (LSUP_htable_iter(s2->keys, &cur, &key, NULL) != LSUP_END)
  256. LSUP_htable_put(dest->keys, key, NULL);
  257. LSUP_htable_copy(s1->idx, &dest->idx);
  258. while (LSUP_htable_iter(s2->idx, &cur, &key, &val) != LSUP_END)
  259. LSUP_htable_put(dest->idx, key, val);
  260. } else {
  261. if (op == LSUP_BOOL_XOR) {
  262. while (LSUP_htable_iter(s2->keys, &cur, &key, NULL) != LSUP_END) {
  263. LSUP_rc get_rc = LSUP_htable_get(s1->keys, key, NULL);
  264. if (get_rc == LSUP_NORESULT) {
  265. LSUP_htable_put(dest->keys, key, NULL);
  266. if (LSUP_htable_get(s2->idx, key, &val) == LSUP_OK)
  267. LSUP_htable_put(dest->idx, key, val);
  268. } else if (UNLIKELY(get_rc < 0)) return get_rc;
  269. }
  270. }
  271. while (LSUP_htable_iter(s1->keys, &cur, &key, NULL) != LSUP_END) {
  272. LSUP_rc get_rc = LSUP_htable_get(s2->keys, key, NULL);
  273. if (
  274. (op == LSUP_BOOL_INTERSECTION && get_rc == LSUP_OK)
  275. || get_rc == LSUP_NORESULT
  276. ) {
  277. LSUP_htable_put(dest->keys, key, NULL);
  278. if (LSUP_htable_get(s1->idx, key, &val) == LSUP_OK)
  279. LSUP_htable_put(dest->idx, key, val);
  280. } else if (UNLIKELY(get_rc < 0)) return get_rc;
  281. }
  282. }
  283. return LSUP_OK;
  284. }