store_htable.c.disabled 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. #include "store_htable.h"
  2. // Assume VERY coarsly that the number of unique terms will be in general
  3. // 1.7 times the number of triples. This is conservative to maintain load
  4. // factor low.
  5. #define IDX_SIZE_RATIO 1.7
  6. /**
  7. * Callback type for key comparison.
  8. */
  9. typedef bool (*LSUP_key_eq_fn_t)(
  10. const LSUP_Key spok[], const LSUP_Key luk[]);
  11. typedef struct HTStore {
  12. LSUP_HTable *keys;
  13. LSUP_HTable *idx; // Dictionary of keys to serialized terms
  14. } HTStore;
  15. typedef struct HTIterator {
  16. HTStore * store; // Store being iterated.
  17. LSUP_HTable * ht; // Hash table to look up.
  18. htsize_t cur; // Lookup cursor.
  19. LSUP_Key luk[3]; // 0÷3 lookup keys.
  20. LSUP_key_eq_fn_t eq_fn; // Equality function to test triples.
  21. int rc; // Return code for *next* result.
  22. LSUP_Key * spok; // Retrieved SPO key.
  23. } HTIterator;
  24. /**
  25. * Identity hashing function.
  26. *
  27. * Since the key is already a strong hash, reuse it for bucket allocation.
  28. */
  29. static inline uint64_t id_hash_fn(const void *key, ksize_t size, uint64_t seed)
  30. { return *(uint64_t*)key; }
  31. /**
  32. * General XX64 hash. Strong (non-crypto) and extremely fast.
  33. */
  34. static inline uint64_t xx64_hash_fn(
  35. const void *key, ksize_t size, uint64_t seed)
  36. { return XXH64(key, size, seed); }
  37. static inline bool buffer_eq_fn(const void *a, const void *b, ksize_t size)
  38. { return memcmp(a, b, size) == 0; }
  39. /* * * CALLBACKS * * */
  40. /**
  41. * Dummy callback for queries with all parameters unbound. Returns true.
  42. */
  43. static bool lookup_none_eq_fn(
  44. const LSUP_Key spok[], const LSUP_Key luk[])
  45. { return true; }
  46. /**
  47. * Keyset lookup for S key.
  48. */
  49. static bool lookup_sk_eq_fn(
  50. const LSUP_Key spok[], const LSUP_Key luk[])
  51. { return spok[0] == luk[0]; }
  52. /**
  53. * Keyset lookup for P key.
  54. */
  55. static bool lookup_pk_eq_fn(
  56. const LSUP_Key spok[], const LSUP_Key luk[])
  57. { return spok[1] == luk[0]; }
  58. /**
  59. * Keyset lookup for O key.
  60. */
  61. static bool lookup_ok_eq_fn(
  62. const LSUP_Key spok[], const LSUP_Key luk[])
  63. { return spok[2] == luk[0]; }
  64. /**
  65. * Keyset lookup for S and P keys.
  66. */
  67. static bool lookup_spk_eq_fn(
  68. const LSUP_Key spok[], const LSUP_Key luk[])
  69. { return spok[0] == luk[0] && spok[1] == luk[1]; }
  70. /**
  71. * Keyset lookup for S and O keys.
  72. */
  73. static bool lookup_sok_eq_fn(
  74. const LSUP_Key spok[], const LSUP_Key luk[])
  75. { return spok[0] == luk[0] && spok[2] == luk[1]; }
  76. /**
  77. * Keyset lookup for P and O keys.
  78. */
  79. static bool lookup_pok_eq_fn(
  80. const LSUP_Key spok[], const LSUP_Key luk[])
  81. { return spok[1] == luk[0] && spok[2] == luk[1]; }
  82. /* * * Other prototypes. * * */
  83. static inline LSUP_rc htiter_next_key(HTIterator *it);
  84. /* * * API * * */
  85. HTStore *
  86. LSUP_htstore_new(size_t capacity)
  87. {
  88. HTStore *ht;
  89. CRITICAL(ht = malloc (sizeof (*ht)));
  90. ht->keys = LSUP_htable_new(
  91. capacity, TRP_KLEN, 0, xx64_hash_fn, buffer_eq_fn);
  92. ht->idx = LSUP_htable_new(
  93. capacity * IDX_SIZE_RATIO, sizeof(uint64_t), sizeof(uintptr_t),
  94. xx64_hash_fn, buffer_eq_fn);
  95. return ht;
  96. }
  97. void
  98. LSUP_htstore_free(HTStore *ht)
  99. {
  100. if (!ht) return;
  101. LSUP_htable_free(ht->keys);
  102. // Free up index entries and index.
  103. htsize_t cur = 0;
  104. LSUP_TripleKey spok;
  105. LSUP_Buffer *sterm;
  106. while(LSUP_htable_iter(
  107. ht->idx, &cur, (void**)&spok, (void**)&sterm) == LSUP_OK) {
  108. TRACE("Freeing indexed term buffer #%d at %p", cur, sterm);
  109. LSUP_buffer_done(sterm);
  110. }
  111. LSUP_htable_free(ht->idx);
  112. free(ht);
  113. }
  114. htsize_t
  115. LSUP_htstore_size(LSUP_HTStore *ht)
  116. { return LSUP_htable_size(ht->keys); }
  117. htsize_t
  118. LSUP_htstore_capacity(const LSUP_HTStore *ht)
  119. { return LSUP_htable_capacity(ht->keys); }
  120. LSUP_rc
  121. LSUP_htstore_resize(HTStore *ht, htsize_t size)
  122. {
  123. LSUP_rc rc = LSUP_htable_resize(ht->keys, size);
  124. if (rc != LSUP_OK) return rc;
  125. return LSUP_htable_resize(ht->idx, size * IDX_SIZE_RATIO);
  126. }
  127. LSUP_rc
  128. LSUP_htstore_add(HTStore *store, const LSUP_SerTriple *sspo)
  129. {
  130. LSUP_TripleKey spok = NULL_TRP;
  131. // Add term to index.
  132. for (int i = 0; i < 3; i++) {
  133. spok[i] = LSUP_sterm_to_key(LSUP_striple_pos(sspo, i));
  134. TRACE("Indexing term key %lu\n", spok[i]);
  135. // If term is already in the index, discard and free it.
  136. if (LSUP_htable_get(store->idx, spok + i, NULL) == LSUP_OK)
  137. LSUP_htable_put(store->idx, spok + i, LSUP_striple_pos(sspo, i));
  138. }
  139. // Add triple.
  140. TRACE("Inserting spok: {%lx, %lx, %lx}", spok[0], spok[1], spok[2]);
  141. return LSUP_htable_put(store->keys, spok, NULL);
  142. }
  143. LSUP_rc
  144. LSUP_htstore_remove(
  145. LSUP_HTStore *store, const LSUP_SerTriple *sspo, size_t *ct)
  146. {
  147. LSUP_HTIterator *it = LSUP_htstore_lookup(store, sspo, ct);
  148. if (UNLIKELY (!it)) return LSUP_DB_ERR;
  149. *ct = 0;
  150. while (htiter_next_key (it)) {
  151. LSUP_rc rc = LSUP_htable_remove(store->keys, it->spok);
  152. if (UNLIKELY (rc < 0)) return rc;
  153. (*ct) ++;
  154. }
  155. // TODO clean up orphan indices in separate function.
  156. return LSUP_OK;
  157. }
  158. HTIterator *
  159. LSUP_htstore_lookup(HTStore *store, const LSUP_SerTriple *sspo, size_t *ct)
  160. {
  161. HTIterator *it;
  162. CRITICAL(it = malloc (sizeof (*it)));
  163. it->store = store;
  164. it->cur = 0;
  165. it->rc = LSUP_END;
  166. if (LSUP_htable_size(store->keys) == 0) return it;
  167. LSUP_TripleKey spok = {
  168. LSUP_sterm_to_key(sspo->s),
  169. LSUP_sterm_to_key(sspo->p),
  170. LSUP_sterm_to_key(sspo->o),
  171. };
  172. // s p o
  173. if (spok[0] != NULL_KEY && spok[1] != NULL_KEY && spok[2] != NULL_KEY) {
  174. memcpy(it->luk, spok, sizeof(LSUP_TripleKey));
  175. it->eq_fn = NULL;
  176. } else if (spok[0] != NULL_KEY) {
  177. it->luk[0] = spok[0];
  178. // s p ?
  179. if (spok[1] != NULL_KEY) {
  180. it->luk[1] = spok[1];
  181. it->eq_fn = lookup_spk_eq_fn;
  182. // s ? o
  183. } else if (spok[2] != NULL_KEY) {
  184. it->luk[1] = spok[2];
  185. it->eq_fn = lookup_sok_eq_fn;
  186. // s ? ?
  187. } else {
  188. it->eq_fn = lookup_sk_eq_fn;
  189. }
  190. } else if (spok[1] != NULL_KEY) {
  191. it->luk[0] = spok[1];
  192. // ? p o
  193. if (spok[2] != NULL_KEY) {
  194. it->luk[1] = spok[2];
  195. it->eq_fn = lookup_pok_eq_fn;
  196. // ? p ?
  197. } else it->eq_fn = lookup_pk_eq_fn;
  198. // ? ? o
  199. } else if (spok[2] != NULL_KEY) {
  200. it->luk[0] = spok[2];
  201. it->eq_fn = lookup_ok_eq_fn;
  202. // ? ? ?
  203. } else it->eq_fn = lookup_none_eq_fn;
  204. it->rc = LSUP_htable_iter(
  205. it->store->keys, &it->cur, (void**)&it->spok, NULL);
  206. return it->rc >= 0 ? it : NULL;
  207. }
  208. static inline LSUP_rc
  209. htiter_next_key(HTIterator *it)
  210. {
  211. for (;;) {
  212. if (it->rc != LSUP_OK) return it->rc;
  213. if (it->eq_fn(it->spok, it->luk)) return LSUP_OK;
  214. it->rc = LSUP_htable_iter(
  215. it->store->keys, &it->cur, (void**)&it->spok, NULL);
  216. }
  217. }
  218. LSUP_rc
  219. LSUP_htiter_next(HTIterator *it, LSUP_SerTriple *sspo)
  220. {
  221. LSUP_rc rc = htiter_next_key(it);
  222. if (UNLIKELY (rc != LSUP_OK)) return rc;
  223. for (int i = 0; i < 3; i++)
  224. LSUP_htable_get(
  225. it->store->idx, (void*)it->spok[i],
  226. (void**)(LSUP_striple_pos(sspo, i)));
  227. return rc;
  228. }
  229. void
  230. LSUP_htiter_free(LSUP_HTIterator *it)
  231. { free(it); }
  232. HTStore *
  233. LSUP_htstore_bool_op(
  234. const LSUP_bool_op op, const HTStore *s1, const HTStore *s2)
  235. {
  236. HTStore *dest;
  237. htsize_t cur;
  238. void *key, *val;
  239. dest = LSUP_htstore_new(0);
  240. if (UNLIKELY (
  241. op != LSUP_BOOL_UNION
  242. && op != LSUP_BOOL_SUBTRACTION
  243. && op != LSUP_BOOL_INTERSECTION
  244. && op != LSUP_BOOL_XOR)) {
  245. fprintf(stderr, "Operation not supported.\n");
  246. goto fail;
  247. }
  248. if (op == LSUP_BOOL_UNION) {
  249. dest->keys = LSUP_htable_copy(s1->keys);
  250. while (LSUP_htable_iter(s2->keys, &cur, &key, NULL) != LSUP_END)
  251. LSUP_htable_put(dest->keys, key, NULL);
  252. dest->idx = LSUP_htable_copy(s1->idx);
  253. while (LSUP_htable_iter(s2->idx, &cur, &key, &val) != LSUP_END)
  254. LSUP_htable_put(dest->idx, key, val);
  255. } else {
  256. if (op == LSUP_BOOL_XOR) {
  257. while (LSUP_htable_iter(s2->keys, &cur, &key, NULL) != LSUP_END) {
  258. LSUP_rc get_rc = LSUP_htable_get(s1->keys, key, NULL);
  259. if (get_rc == LSUP_NORESULT) {
  260. LSUP_htable_put(dest->keys, key, NULL);
  261. if (LSUP_htable_get(s2->idx, key, &val) == LSUP_OK)
  262. LSUP_htable_put(dest->idx, key, val);
  263. } else if (UNLIKELY(get_rc < 0)) goto fail;
  264. }
  265. }
  266. while (LSUP_htable_iter(s1->keys, &cur, &key, NULL) != LSUP_END) {
  267. LSUP_rc get_rc = LSUP_htable_get(s2->keys, key, NULL);
  268. if (
  269. (op == LSUP_BOOL_INTERSECTION && get_rc == LSUP_OK)
  270. || get_rc == LSUP_NORESULT
  271. ) {
  272. LSUP_htable_put(dest->keys, key, NULL);
  273. if (LSUP_htable_get(s1->idx, key, &val) == LSUP_OK)
  274. LSUP_htable_put(dest->idx, key, val);
  275. } else if (UNLIKELY(get_rc < 0)) goto fail;
  276. }
  277. }
  278. return dest;
  279. fail:
  280. LSUP_htstore_free(dest);
  281. return NULL;
  282. }