store_htable.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. #include "uthash.h"
  2. #include "store_htable.h"
  3. /**
  4. * Callback type for key comparison.
  5. */
  6. typedef bool (*LSUP_key_eq_fn_t)(
  7. const LSUP_Key spok[], const LSUP_Key luk[]);
  8. typedef struct triple_entry_t {
  9. LSUP_TripleKey key;
  10. UT_hash_handle hh;
  11. } TripleEntry;
  12. typedef struct idx_entry_t {
  13. LSUP_Key key;
  14. LSUP_Buffer * sterm;
  15. UT_hash_handle hh;
  16. } IndexEntry;
  17. typedef struct ht_store_t {
  18. TripleEntry * keys; // Triple keys (set).
  19. IndexEntry * idx; // Dictionary of keys to serialized terms.
  20. } HTStore;
  21. typedef struct ht_iterator_t {
  22. HTStore * store; // Store being iterated.
  23. size_t i; // Number of records found at any point of
  24. // a lookup iteration, or number of records
  25. // added at any point of an add loop.
  26. LSUP_Key luk[3]; // 0÷3 lookup keys.
  27. LSUP_key_eq_fn_t eq_fn; // Equality function to test triples.
  28. int rc; // Return code for *next* result.
  29. TripleEntry * entry; // Retrieved SPO key.
  30. } HTIterator;
  31. /**
  32. * Identity hashing function.
  33. *
  34. * Since the key is already a strong hash, reuse it for entry allocation.
  35. */
  36. //#define HASH_FUNCTION (s,len,hashv) (hashv) = (unsigned)(s)
  37. /* * * CALLBACKS * * */
  38. /**
  39. * Dummy callback for queries with all parameters unbound. Returns true.
  40. */
  41. static bool lookup_none_eq_fn(
  42. const LSUP_Key spok[], const LSUP_Key luk[])
  43. { return true; }
  44. /**
  45. * Keyset lookup for S key.
  46. */
  47. static bool lookup_sk_eq_fn(
  48. const LSUP_Key spok[], const LSUP_Key luk[])
  49. { return spok[0] == luk[0]; }
  50. /**
  51. * Keyset lookup for P key.
  52. */
  53. static bool lookup_pk_eq_fn(
  54. const LSUP_Key spok[], const LSUP_Key luk[])
  55. { return spok[1] == luk[0]; }
  56. /**
  57. * Keyset lookup for O key.
  58. */
  59. static bool lookup_ok_eq_fn(
  60. const LSUP_Key spok[], const LSUP_Key luk[])
  61. { return spok[2] == luk[0]; }
  62. /**
  63. * Keyset lookup for S and P keys.
  64. */
  65. static bool lookup_spk_eq_fn(
  66. const LSUP_Key spok[], const LSUP_Key luk[])
  67. { return spok[0] == luk[0] && spok[1] == luk[1]; }
  68. /**
  69. * Keyset lookup for S and O keys.
  70. */
  71. static bool lookup_sok_eq_fn(
  72. const LSUP_Key spok[], const LSUP_Key luk[])
  73. { return spok[0] == luk[0] && spok[2] == luk[1]; }
  74. /**
  75. * Keyset lookup for P and O keys.
  76. */
  77. static bool lookup_pok_eq_fn(
  78. const LSUP_Key spok[], const LSUP_Key luk[])
  79. { return spok[1] == luk[0] && spok[2] == luk[1]; }
  80. /**
  81. * Keyset lookup for S, P and O keys.
  82. */
  83. static bool lookup_spok_eq_fn(
  84. const LSUP_Key spok[], const LSUP_Key luk[])
  85. { return spok[0] == luk[0] && spok[1] == luk[1] && spok[2] == luk[2]; }
  86. /* * * Other prototypes. * * */
  87. static inline LSUP_rc tkey_to_strp (
  88. const HTStore *store, const LSUP_Key spok[], LSUP_BufferTriple *sspo);
  89. /* * * API * * */
  90. HTStore *
  91. LSUP_htstore_new (void)
  92. {
  93. HTStore *ht;
  94. MALLOC_GUARD (ht, NULL);
  95. ht->keys = NULL;
  96. ht->idx = NULL;
  97. return ht;
  98. }
  99. HTStore *
  100. LSUP_htstore_copy (const HTStore *src)
  101. {
  102. HTStore *dest = LSUP_htstore_new();
  103. if (UNLIKELY (!dest)) return NULL;
  104. if (LSUP_htstore_copy_contents (dest, src) < 0) {
  105. LSUP_htstore_free (dest);
  106. return NULL;
  107. }
  108. return dest;
  109. }
  110. LSUP_rc
  111. LSUP_htstore_copy_contents (HTStore *dest, const HTStore *src)
  112. {
  113. TripleEntry *dest_trp = malloc (
  114. HASH_COUNT (src->keys) * sizeof (*dest_trp));
  115. if (UNLIKELY (!dest_trp)) return LSUP_MEM_ERR;
  116. IndexEntry *dest_idx = malloc (
  117. HASH_COUNT (src->idx) * sizeof (*dest_idx));
  118. if (UNLIKELY (!dest_idx)) {
  119. free (dest_trp);
  120. return LSUP_MEM_ERR;
  121. }
  122. TripleEntry *src_trp;
  123. IndexEntry *src_idx;
  124. unsigned i;
  125. // Copy triple data.
  126. for (
  127. src_trp = src->keys, i = 0; src_trp != NULL;
  128. src_trp = src->keys->hh.next
  129. ) {
  130. memcpy (dest_trp + i, src_trp, sizeof (*dest_trp));
  131. HASH_ADD (hh, dest->keys, key, TRP_KLEN, dest_trp + i);
  132. i++;
  133. }
  134. // Copy index data.
  135. for (
  136. src_idx = src->idx, i = 0; src_idx != NULL;
  137. src_idx = src->idx->hh.next
  138. ) {
  139. memcpy (dest_idx + i, src_idx, sizeof (*dest_idx));
  140. HASH_ADD (hh, dest->idx, key, KLEN, dest_idx + i);
  141. i++;
  142. }
  143. return LSUP_OK;
  144. }
  145. HTStore *
  146. LSUP_htstore_bool_op(
  147. const LSUP_bool_op op, const HTStore *s1, const HTStore *s2)
  148. {
  149. HTStore *dest;
  150. TripleEntry *trp_cur;
  151. dest = LSUP_htstore_new();
  152. if (UNLIKELY (
  153. op != LSUP_BOOL_UNION
  154. && op != LSUP_BOOL_SUBTRACTION
  155. && op != LSUP_BOOL_INTERSECTION
  156. && op != LSUP_BOOL_XOR)) {
  157. fprintf (stderr, "Operation not supported.\n");
  158. goto fail;
  159. }
  160. if (op == LSUP_BOOL_UNION) {
  161. dest = LSUP_htstore_copy (s1);
  162. if (UNLIKELY (!dest) || LSUP_htstore_copy_contents (dest, s2) < 0)
  163. goto fail;
  164. return dest;
  165. }
  166. LSUP_BufferTriple *sspo = STRP_DUMMY;
  167. LSUP_HTIterator *it = LSUP_htstore_add_init(dest);
  168. if (op == LSUP_BOOL_XOR) {
  169. // Add triples from s2 if not found in s1.
  170. for (
  171. trp_cur = s2->keys; trp_cur != NULL;
  172. trp_cur = s2->keys->hh.next
  173. ) {
  174. TripleEntry *ins = NULL;
  175. HASH_FIND (hh, s1->keys, trp_cur->key, TRP_KLEN, ins);
  176. if (!ins) {
  177. tkey_to_strp (s2, trp_cur->key, sspo);
  178. LSUP_htstore_add_iter (it, sspo);
  179. }
  180. }
  181. }
  182. for (
  183. trp_cur = s1->keys; trp_cur != NULL;
  184. trp_cur = s1->keys->hh.next
  185. ) {
  186. TripleEntry *ins = NULL;
  187. HASH_FIND (hh, s2->keys, trp_cur->key, TRP_KLEN, ins);
  188. // For XOR and subtraction, add if not found.
  189. // For intersection, add if found.
  190. if ((op == LSUP_BOOL_INTERSECTION) ^ (ins == NULL)) {
  191. tkey_to_strp (s1, trp_cur->key, sspo);
  192. LSUP_htstore_add_iter (it, sspo);
  193. }
  194. }
  195. LSUP_btriple_free (sspo);
  196. LSUP_htstore_add_done (it);
  197. return dest;
  198. fail:
  199. LSUP_htstore_free (dest);
  200. return NULL;
  201. }
  202. void
  203. LSUP_htstore_free (HTStore *ht)
  204. {
  205. IndexEntry *idx_entry, *idx_tmp;
  206. size_t ct = 0;
  207. HASH_ITER (hh, ht->idx, idx_entry, idx_tmp) {
  208. HASH_DEL (ht->idx, idx_entry);
  209. LSUP_buffer_free (idx_entry->sterm);
  210. free (idx_entry);
  211. ct++;
  212. }
  213. TripleEntry *trp_entry, *trp_tmp;
  214. HASH_ITER (hh, ht->keys, trp_entry, trp_tmp) {
  215. HASH_DEL (ht->keys, trp_entry);
  216. free (trp_entry);
  217. }
  218. free (ht);
  219. }
  220. size_t
  221. LSUP_htstore_size (LSUP_HTStore *ht)
  222. { return HASH_COUNT (ht->keys); }
  223. LSUP_HTIterator *
  224. LSUP_htstore_add_init (HTStore *store)
  225. {
  226. HTIterator *it;
  227. MALLOC_GUARD (it, NULL);
  228. it->store = store;
  229. it->i = 0;
  230. return it;
  231. }
  232. LSUP_rc
  233. LSUP_htstore_add_iter (HTIterator *it, const LSUP_BufferTriple *sspo)
  234. {
  235. LSUP_TripleKey spok = {
  236. LSUP_buffer_hash (sspo->s),
  237. LSUP_buffer_hash (sspo->p),
  238. LSUP_buffer_hash (sspo->o),
  239. };
  240. // Add triple.
  241. log_trace ("Inserting spok: {%lx, %lx, %lx}", spok[0], spok[1], spok[2]);
  242. TripleEntry *k_ins = NULL;
  243. HASH_FIND (hh, it->store->keys, spok, TRP_KLEN, k_ins);
  244. if (k_ins == NULL) {
  245. log_trace ("Triple not found, inserting.");
  246. MALLOC_GUARD (k_ins, LSUP_MEM_ERR);
  247. memcpy (k_ins->key, spok, TRP_KLEN);
  248. HASH_ADD (hh, it->store->keys, key, TRP_KLEN, k_ins);
  249. it->i++;
  250. } else {
  251. log_trace ("Triple found. Skipping.");
  252. return LSUP_NOACTION;
  253. }
  254. // Add terms to index.
  255. for (int i = 0; i < 3; i++) {
  256. IndexEntry *ins = NULL;
  257. HASH_FIND (hh, it->store->idx, spok + i, KLEN, ins);
  258. if (ins == NULL) {
  259. MALLOC_GUARD (ins, LSUP_MEM_ERR);
  260. ins->key = spok[i];
  261. ins->sterm = LSUP_buffer_new (
  262. (LSUP_btriple_pos (sspo, i))->size,
  263. (LSUP_btriple_pos (sspo, i))->addr);
  264. HASH_ADD (hh, it->store->idx, key, KLEN, ins);
  265. }
  266. }
  267. return LSUP_OK;
  268. }
  269. void
  270. LSUP_htstore_add_done (HTIterator *it)
  271. { LSUP_htiter_free (it); }
  272. LSUP_rc
  273. LSUP_htstore_remove(
  274. LSUP_HTStore *store, const LSUP_Buffer *ss, const LSUP_Buffer *sp,
  275. const LSUP_Buffer *so, size_t *ct)
  276. {
  277. LSUP_HTIterator *it = LSUP_htstore_lookup (store, ss, sp, so);
  278. if (UNLIKELY (!it)) return LSUP_DB_ERR;
  279. if (ct) *ct = 0;
  280. TripleEntry *tmp;
  281. HASH_ITER (hh, store->keys, it->entry, tmp) {
  282. if (it->eq_fn (it->entry->key, it->luk)) {
  283. HASH_DEL (store->keys, it->entry);
  284. free (it->entry);
  285. if (ct) (*ct)++;
  286. }
  287. }
  288. LSUP_htiter_free (it);
  289. // TODO clean up orphan indices in separate (async, scheduled) function.
  290. return LSUP_OK;
  291. }
  292. HTIterator *
  293. LSUP_htstore_lookup (HTStore *store, const LSUP_Buffer *ss,
  294. const LSUP_Buffer *sp, const LSUP_Buffer *so)
  295. {
  296. HTIterator *it;
  297. MALLOC_GUARD (it, NULL);
  298. it->store = store;
  299. //it->cur = 0;
  300. it->rc = LSUP_END;
  301. if (HASH_COUNT (store->keys) == 0) return it;
  302. LSUP_TripleKey spok = {
  303. LSUP_buffer_hash (ss),
  304. LSUP_buffer_hash (sp),
  305. LSUP_buffer_hash (so),
  306. };
  307. // s p o
  308. if (spok[0] != NULL_KEY && spok[1] != NULL_KEY && spok[2] != NULL_KEY) {
  309. memcpy (it->luk, spok, sizeof (LSUP_TripleKey));
  310. it->eq_fn = lookup_spok_eq_fn;
  311. } else if (spok[0] != NULL_KEY) {
  312. it->luk[0] = spok[0];
  313. // s p ?
  314. if (spok[1] != NULL_KEY) {
  315. it->luk[1] = spok[1];
  316. it->eq_fn = lookup_spk_eq_fn;
  317. // s ? o
  318. } else if (spok[2] != NULL_KEY) {
  319. it->luk[1] = spok[2];
  320. it->eq_fn = lookup_sok_eq_fn;
  321. // s ? ?
  322. } else {
  323. it->eq_fn = lookup_sk_eq_fn;
  324. }
  325. //it->cur = 0;
  326. } else if (spok[1] != NULL_KEY) {
  327. it->luk[0] = spok[1];
  328. // ? p o
  329. if (spok[2] != NULL_KEY) {
  330. it->luk[1] = spok[2];
  331. it->eq_fn = lookup_pok_eq_fn;
  332. // ? p ?
  333. } else it->eq_fn = lookup_pk_eq_fn;
  334. // ? ? o
  335. } else if (spok[2] != NULL_KEY) {
  336. it->luk[0] = spok[2];
  337. it->eq_fn = lookup_ok_eq_fn;
  338. // ? ? ?
  339. } else it->eq_fn = lookup_none_eq_fn;
  340. it->entry = it->store->keys; // First record in hash table.
  341. it->rc = it->entry == NULL ? LSUP_END : LSUP_OK;
  342. it->i = 0;
  343. return it;
  344. }
  345. void
  346. LSUP_htiter_free (LSUP_HTIterator *it)
  347. { free (it); }
  348. size_t
  349. LSUP_htiter_cur (LSUP_HTIterator *it)
  350. { return it->i; }
  351. LSUP_rc
  352. LSUP_htiter_next (HTIterator *it, LSUP_BufferTriple *sspo)
  353. {
  354. if (UNLIKELY (!it)) return LSUP_VALUE_ERR;
  355. // If the previous iteration hit the end, return.
  356. if (it->rc != LSUP_OK) return it->rc;
  357. it->rc = LSUP_NORESULT;
  358. /*
  359. #ifdef DEBUG
  360. log_trace ("Lookup key: ");
  361. IndexEntry *tmp = NULL;
  362. HASH_FIND (hh, it->store->idx, it->luk + 0, KLEN, tmp);
  363. if (tmp) LSUP_buffer_print(tmp->sterm); else printf("NULL\n");
  364. HASH_FIND (hh, it->store->idx, it->luk + 1, KLEN, tmp);
  365. if (tmp) LSUP_buffer_print(tmp->sterm); else printf("NULL\n");
  366. HASH_FIND (hh, it->store->idx, it->luk + 2, KLEN, tmp);
  367. if (tmp) LSUP_buffer_print(tmp->sterm); else printf("NULL\n");
  368. #endif
  369. */
  370. do {
  371. if (!it->entry) it->rc = LSUP_END;
  372. else {
  373. if (it->eq_fn (it->entry->key, it->luk)) {
  374. tkey_to_strp (it->store, it->entry->key, sspo);
  375. if (!sspo->s || !sspo->p || !sspo->o) return LSUP_DB_ERR;
  376. log_trace (
  377. "Found spok: {%lx, %lx, %lx}",
  378. it->entry->key[0], it->entry->key[1], it->entry->key[2]
  379. );
  380. /*
  381. #ifdef DEBUG
  382. IndexEntry *tmp = NULL;
  383. HASH_FIND (hh, it->store->idx, it->entry->key + 0, KLEN, tmp);
  384. LSUP_buffer_print(tmp->sterm);
  385. HASH_FIND (hh, it->store->idx, it->entry->key + 1, KLEN, tmp);
  386. LSUP_buffer_print(tmp->sterm);
  387. HASH_FIND (hh, it->store->idx, it->entry->key + 2, KLEN, tmp);
  388. LSUP_buffer_print(tmp->sterm);
  389. #endif
  390. */
  391. it->rc = LSUP_OK;
  392. it->i++;
  393. }
  394. it->entry = it->entry->hh.next;
  395. }
  396. } while (it->rc == LSUP_NORESULT);
  397. return it->rc;
  398. }
  399. /* * * Statics * * */
  400. inline static LSUP_rc
  401. tkey_to_strp (
  402. const HTStore *store, const LSUP_Key spok[], LSUP_BufferTriple *sspo)
  403. {
  404. // Data owned by the store.
  405. IndexEntry *tmp = NULL;
  406. HASH_FIND (hh, store->idx, spok + 0, KLEN, tmp);
  407. if (UNLIKELY (!tmp)) return LSUP_ERROR;
  408. sspo->s = tmp->sterm;
  409. HASH_FIND (hh, store->idx, spok + 1, KLEN, tmp);
  410. if (UNLIKELY (!tmp)) return LSUP_ERROR;
  411. sspo->p = tmp->sterm;
  412. HASH_FIND (hh, store->idx, spok + 2, KLEN, tmp);
  413. if (UNLIKELY (!tmp)) return LSUP_ERROR;
  414. sspo->o = tmp->sterm;
  415. return LSUP_OK;
  416. }