store_htable.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. #include "uthash.h"
  2. #include "namespace.h"
  3. #include "store_htable.h"
  4. /**
  5. * Callback type for key comparison.
  6. */
  7. typedef bool (*LSUP_key_eq_fn_t)(
  8. const LSUP_Key spok[], const LSUP_Key luk[]);
  9. typedef struct triple_entry_t {
  10. LSUP_TripleKey key;
  11. UT_hash_handle hh;
  12. } TripleEntry;
  13. typedef struct idx_entry_t {
  14. LSUP_Key key;
  15. LSUP_Buffer * sterm;
  16. UT_hash_handle hh;
  17. } IndexEntry;
  18. typedef struct ht_store_t {
  19. TripleEntry * keys; // Triple keys (set).
  20. IndexEntry * idx; // Dictionary of keys to serialized terms.
  21. } HTStore;
  22. typedef struct ht_iterator_t {
  23. HTStore * store; // Store being iterated.
  24. size_t i; // Number of records found at any point of
  25. // a lookup iteration, or number of records
  26. // added at any point of an add loop.
  27. LSUP_Key luk[3]; // 0÷3 lookup keys.
  28. LSUP_key_eq_fn_t eq_fn; // Equality function to test triples.
  29. int rc; // Return code for *next* result.
  30. TripleEntry * entry; // Retrieved SPO key.
  31. } HTIterator;
  32. /**
  33. * Identity hashing function.
  34. *
  35. * Since the key is already a strong hash, reuse it for entry allocation.
  36. */
  37. //#define HASH_FUNCTION (s,len,hashv) (hashv) = (unsigned)(s)
  38. /* * * CALLBACKS * * */
  39. /**
  40. * Dummy callback for queries with all parameters unbound. Returns true.
  41. */
  42. static bool lookup_none_eq_fn(
  43. const LSUP_Key spok[], const LSUP_Key luk[])
  44. { return true; }
  45. /**
  46. * Keyset lookup for S key.
  47. */
  48. static bool lookup_sk_eq_fn(
  49. const LSUP_Key spok[], const LSUP_Key luk[])
  50. { return spok[0] == luk[0]; }
  51. /**
  52. * Keyset lookup for P key.
  53. */
  54. static bool lookup_pk_eq_fn(
  55. const LSUP_Key spok[], const LSUP_Key luk[])
  56. { return spok[1] == luk[0]; }
  57. /**
  58. * Keyset lookup for O key.
  59. */
  60. static bool lookup_ok_eq_fn(
  61. const LSUP_Key spok[], const LSUP_Key luk[])
  62. { return spok[2] == luk[0]; }
  63. /**
  64. * Keyset lookup for S and P keys.
  65. */
  66. static bool lookup_spk_eq_fn(
  67. const LSUP_Key spok[], const LSUP_Key luk[])
  68. { return spok[0] == luk[0] && spok[1] == luk[1]; }
  69. /**
  70. * Keyset lookup for S and O keys.
  71. */
  72. static bool lookup_sok_eq_fn(
  73. const LSUP_Key spok[], const LSUP_Key luk[])
  74. { return spok[0] == luk[0] && spok[2] == luk[1]; }
  75. /**
  76. * Keyset lookup for P and O keys.
  77. */
  78. static bool lookup_pok_eq_fn(
  79. const LSUP_Key spok[], const LSUP_Key luk[])
  80. { return spok[1] == luk[0] && spok[2] == luk[1]; }
  81. /**
  82. * Keyset lookup for S, P and O keys.
  83. */
  84. static bool lookup_spok_eq_fn(
  85. const LSUP_Key spok[], const LSUP_Key luk[])
  86. { return spok[0] == luk[0] && spok[1] == luk[1] && spok[2] == luk[2]; }
  87. /* * * Other prototypes. * * */
  88. static inline LSUP_rc tkey_to_strp (
  89. const HTStore *store, const LSUP_Key spok[], LSUP_SerTriple *sspo);
  90. /* * * API * * */
  91. HTStore *
  92. LSUP_htstore_new (void)
  93. {
  94. HTStore *ht;
  95. MALLOC_GUARD (ht, NULL);
  96. ht->keys = NULL;
  97. ht->idx = NULL;
  98. return ht;
  99. }
  100. HTStore *
  101. LSUP_htstore_copy (const HTStore *src)
  102. {
  103. HTStore *dest = LSUP_htstore_new();
  104. if (UNLIKELY (!dest)) return NULL;
  105. if (LSUP_htstore_copy_contents (dest, src) < 0) {
  106. LSUP_htstore_free (dest);
  107. return NULL;
  108. }
  109. return dest;
  110. }
  111. LSUP_rc
  112. LSUP_htstore_copy_contents (HTStore *dest, const HTStore *src)
  113. {
  114. TripleEntry *dest_trp = malloc (
  115. HASH_COUNT (src->keys) * sizeof (*dest_trp));
  116. if (UNLIKELY (!dest_trp)) return LSUP_MEM_ERR;
  117. IndexEntry *dest_idx = malloc (
  118. HASH_COUNT (src->idx) * sizeof (*dest_idx));
  119. if (UNLIKELY (!dest_idx)) {
  120. free (dest_trp);
  121. return LSUP_MEM_ERR;
  122. }
  123. TripleEntry *src_trp;
  124. IndexEntry *src_idx;
  125. unsigned i;
  126. // Copy triple data.
  127. for (
  128. src_trp = src->keys, i = 0; src_trp != NULL;
  129. src_trp = src->keys->hh.next
  130. ) {
  131. memcpy (dest_trp + i, src_trp, sizeof (*dest_trp));
  132. HASH_ADD (hh, dest->keys, key, TRP_KLEN, dest_trp + i);
  133. i++;
  134. }
  135. // Copy index data.
  136. for (
  137. src_idx = src->idx, i = 0; src_idx != NULL;
  138. src_idx = src->idx->hh.next
  139. ) {
  140. memcpy (dest_idx + i, src_idx, sizeof (*dest_idx));
  141. HASH_ADD (hh, dest->idx, key, KLEN, dest_idx + i);
  142. i++;
  143. }
  144. return LSUP_OK;
  145. }
  146. HTStore *
  147. LSUP_htstore_bool_op(
  148. const LSUP_bool_op op, const HTStore *s1, const HTStore *s2)
  149. {
  150. HTStore *dest;
  151. TripleEntry *trp_cur;
  152. dest = LSUP_htstore_new();
  153. if (UNLIKELY (
  154. op != LSUP_BOOL_UNION
  155. && op != LSUP_BOOL_SUBTRACTION
  156. && op != LSUP_BOOL_INTERSECTION
  157. && op != LSUP_BOOL_XOR)) {
  158. fprintf (stderr, "Operation not supported.\n");
  159. goto fail;
  160. }
  161. if (op == LSUP_BOOL_UNION) {
  162. dest = LSUP_htstore_copy (s1);
  163. if (UNLIKELY (!dest) || LSUP_htstore_copy_contents (dest, s2) < 0)
  164. goto fail;
  165. return dest;
  166. }
  167. LSUP_SerTriple *sspo = STRP_DUMMY;
  168. LSUP_HTIterator *it = LSUP_htstore_add_init(dest);
  169. if (op == LSUP_BOOL_XOR) {
  170. // Add triples from s2 if not found in s1.
  171. for (
  172. trp_cur = s2->keys; trp_cur != NULL;
  173. trp_cur = s2->keys->hh.next
  174. ) {
  175. TripleEntry *ins = NULL;
  176. HASH_FIND (hh, s1->keys, trp_cur->key, TRP_KLEN, ins);
  177. if (!ins) {
  178. tkey_to_strp (s2, trp_cur->key, sspo);
  179. LSUP_htstore_add_iter (it, sspo);
  180. }
  181. }
  182. }
  183. for (
  184. trp_cur = s1->keys; trp_cur != NULL;
  185. trp_cur = s1->keys->hh.next
  186. ) {
  187. TripleEntry *ins = NULL;
  188. HASH_FIND (hh, s2->keys, trp_cur->key, TRP_KLEN, ins);
  189. // For XOR and subtraction, add if not found.
  190. // For intersection, add if found.
  191. if ((op == LSUP_BOOL_INTERSECTION) ^ (ins == NULL)) {
  192. tkey_to_strp (s1, trp_cur->key, sspo);
  193. LSUP_htstore_add_iter (it, sspo);
  194. }
  195. }
  196. LSUP_striple_free (sspo);
  197. LSUP_htstore_add_done (it);
  198. return dest;
  199. fail:
  200. LSUP_htstore_free (dest);
  201. return NULL;
  202. }
  203. void
  204. LSUP_htstore_free (HTStore *ht)
  205. {
  206. IndexEntry *idx_entry, *idx_tmp;
  207. size_t ct = 0;
  208. HASH_ITER (hh, ht->idx, idx_entry, idx_tmp) {
  209. HASH_DEL (ht->idx, idx_entry);
  210. LSUP_buffer_free (idx_entry->sterm);
  211. free (idx_entry);
  212. ct++;
  213. }
  214. TripleEntry *trp_entry, *trp_tmp;
  215. HASH_ITER (hh, ht->keys, trp_entry, trp_tmp) {
  216. HASH_DEL (ht->keys, trp_entry);
  217. free (trp_entry);
  218. }
  219. free (ht);
  220. }
  221. size_t
  222. LSUP_htstore_size (LSUP_HTStore *ht)
  223. { return HASH_COUNT (ht->keys); }
  224. LSUP_HTIterator *
  225. LSUP_htstore_add_init (HTStore *store)
  226. {
  227. HTIterator *it;
  228. MALLOC_GUARD (it, NULL);
  229. it->store = store;
  230. it->i = 0;
  231. return it;
  232. }
  233. LSUP_rc
  234. LSUP_htstore_add_iter (HTIterator *it, const LSUP_SerTriple *sspo)
  235. {
  236. LSUP_TripleKey spok = {
  237. LSUP_buffer_hash (sspo->s),
  238. LSUP_buffer_hash (sspo->p),
  239. LSUP_buffer_hash (sspo->o),
  240. };
  241. // Add triple.
  242. TRACE ("Inserting spok: {%lx, %lx, %lx}", spok[0], spok[1], spok[2]);
  243. TripleEntry *k_ins = NULL;
  244. HASH_FIND (hh, it->store->keys, spok, TRP_KLEN, k_ins);
  245. if (k_ins == NULL) {
  246. TRACE (STR, "Triple not found, inserting.");
  247. MALLOC_GUARD (k_ins, LSUP_MEM_ERR);
  248. memcpy (k_ins->key, spok, TRP_KLEN);
  249. HASH_ADD (hh, it->store->keys, key, TRP_KLEN, k_ins);
  250. it->i++;
  251. } else {
  252. TRACE (STR, "Triple found. Skipping.");
  253. return LSUP_NOACTION;
  254. }
  255. // Add terms to index.
  256. for (int i = 0; i < 3; i++) {
  257. spok[i] = LSUP_buffer_hash (LSUP_striple_pos (sspo, i));
  258. //TRACE ("Indexing term key %lx\n", spok[i]);
  259. IndexEntry *ins = NULL;
  260. HASH_FIND (hh, it->store->idx, spok + i, KLEN, ins);
  261. if (ins == NULL) {
  262. MALLOC_GUARD (ins, LSUP_MEM_ERR);
  263. ins->key = spok[i];
  264. ins->sterm = LSUP_buffer_new (
  265. (LSUP_striple_pos (sspo, i))->size,
  266. (LSUP_striple_pos (sspo, i))->addr);
  267. HASH_ADD (hh, it->store->idx, key, KLEN, ins);
  268. }
  269. }
  270. return LSUP_OK;
  271. }
  272. void
  273. LSUP_htstore_add_done (HTIterator *it)
  274. { LSUP_htiter_free (it); }
  275. LSUP_rc
  276. LSUP_htstore_remove(
  277. LSUP_HTStore *store, const LSUP_SerTriple *sspo, size_t *ct)
  278. {
  279. LSUP_HTIterator *it = LSUP_htstore_lookup (store, sspo);
  280. if (UNLIKELY (!it)) return LSUP_DB_ERR;
  281. *ct = 0;
  282. TripleEntry *tmp = malloc (sizeof (*tmp));
  283. HASH_ITER (hh, store->keys, it->entry, tmp) {
  284. if (it->eq_fn (it->entry->key, it->luk)) {
  285. HASH_DEL (store->keys, it->entry);
  286. free (it->entry);
  287. (*ct)++;
  288. }
  289. }
  290. // TODO clean up orphan indices in separate (async, scheduled) function.
  291. return LSUP_OK;
  292. }
  293. HTIterator *
  294. LSUP_htstore_lookup (HTStore *store, const LSUP_SerTriple *sspo)
  295. {
  296. HTIterator *it;
  297. MALLOC_GUARD (it, NULL);
  298. it->store = store;
  299. //it->cur = 0;
  300. it->rc = LSUP_END;
  301. if (HASH_COUNT (store->keys) == 0) return it;
  302. LSUP_TripleKey spok = {
  303. LSUP_buffer_hash (sspo->s),
  304. LSUP_buffer_hash (sspo->p),
  305. LSUP_buffer_hash (sspo->o),
  306. };
  307. // s p o
  308. if (spok[0] != NULL_KEY && spok[1] != NULL_KEY && spok[2] != NULL_KEY) {
  309. memcpy (it->luk, spok, sizeof (LSUP_TripleKey));
  310. it->eq_fn = lookup_spok_eq_fn;
  311. } else if (spok[0] != NULL_KEY) {
  312. it->luk[0] = spok[0];
  313. // s p ?
  314. if (spok[1] != NULL_KEY) {
  315. it->luk[1] = spok[1];
  316. it->eq_fn = lookup_spk_eq_fn;
  317. // s ? o
  318. } else if (spok[2] != NULL_KEY) {
  319. it->luk[1] = spok[2];
  320. it->eq_fn = lookup_sok_eq_fn;
  321. // s ? ?
  322. } else {
  323. it->eq_fn = lookup_sk_eq_fn;
  324. }
  325. //it->cur = 0;
  326. } else if (spok[1] != NULL_KEY) {
  327. it->luk[0] = spok[1];
  328. // ? p o
  329. if (spok[2] != NULL_KEY) {
  330. it->luk[1] = spok[2];
  331. it->eq_fn = lookup_pok_eq_fn;
  332. // ? p ?
  333. } else it->eq_fn = lookup_pk_eq_fn;
  334. // ? ? o
  335. } else if (spok[2] != NULL_KEY) {
  336. it->luk[0] = spok[2];
  337. it->eq_fn = lookup_ok_eq_fn;
  338. // ? ? ?
  339. } else it->eq_fn = lookup_none_eq_fn;
  340. it->entry = it->store->keys; // First record in hash table.
  341. it->rc = it->entry == NULL ? LSUP_END : LSUP_OK;
  342. it->i = 0;
  343. return it;
  344. }
  345. void
  346. LSUP_htiter_free (LSUP_HTIterator *it)
  347. { free (it); }
  348. size_t
  349. LSUP_htiter_cur (LSUP_HTIterator *it)
  350. { return it->i; }
  351. LSUP_rc
  352. LSUP_htiter_next (HTIterator *it, LSUP_SerTriple *sspo)
  353. {
  354. // If the previous iteration hit the end, return.
  355. if (it->rc != LSUP_OK) return it->rc;
  356. it->rc = LSUP_NORESULT;
  357. while (it->rc == LSUP_NORESULT) {
  358. if (!it->entry) it->rc = LSUP_END;
  359. else {
  360. if (it->eq_fn (it->entry->key, it->luk)) {
  361. tkey_to_strp (it->store, it->entry->key, sspo);
  362. if (!sspo->s || !sspo->p || !sspo->o) return LSUP_DB_ERR;
  363. TRACE (
  364. "Found spok: {%lx, %lx, %lx}",
  365. it->entry->key[0], it->entry->key[1], it->entry->key[2]
  366. );
  367. it->rc = LSUP_OK;
  368. it->i++;
  369. }
  370. it->entry = it->entry->hh.next;
  371. }
  372. }
  373. return it->rc;
  374. }
  375. /* * * Statics * * */
  376. inline static LSUP_rc
  377. tkey_to_strp (
  378. const HTStore *store, const LSUP_Key spok[], LSUP_SerTriple *sspo)
  379. {
  380. // owned by the store.
  381. IndexEntry *tmp = NULL;
  382. HASH_FIND (hh, store->idx, spok + 0, KLEN, tmp);
  383. sspo->s = tmp->sterm;
  384. sspo->s->size = tmp->sterm->size;
  385. HASH_FIND (hh, store->idx, spok + 1, KLEN, tmp);
  386. sspo->p = tmp->sterm;
  387. sspo->s->size = tmp->sterm->size;
  388. HASH_FIND (hh, store->idx, spok + 2, KLEN, tmp);
  389. if (UNLIKELY (!tmp)) return LSUP_ERROR;
  390. sspo->o = tmp->sterm;
  391. return LSUP_OK;
  392. }