graph.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545
  1. #include "htable.h"
  2. #include "graph.h"
  3. // Initial size of lookup graph. It will double each time capacity is reached.
  4. #define LOOKUP_GR_INIT_SIZE 64
  5. // Assume VERY coarsly that the number of unique terms will be in general
  6. // 1.7 times the number of triples. This is conservative to maintain load
  7. // factor low.
  8. #define IDX_SIZE_RATIO 1.7
  9. typedef enum KSetFlag {
  10. LSUP_KS_NONE = 0,
  11. LSUP_KS_CHECK_CAP = 1 << 0,
  12. LSUP_KS_CHECK_DUP = 1 << 1,
  13. } KSetFlag;
  14. /**
  15. * Static handles.
  16. */
  17. static const char *default_ctx_label = "urn:lsup:default";
  18. static LSUP_Buffer *default_ctx = NULL; // Default context URI for quad store.
  19. /**
  20. * Identity hashing function.
  21. *
  22. * Since the key is already a strong hash, reuse it for bucket allocation.
  23. */
  24. static inline uint64_t id_hash_fn(const void *key, ksize_t size, uint64_t seed)
  25. { return *(uint64_t*)key; }
  26. /**
  27. * General XX64 hash. Strong (non-crypto) and extremely fast.
  28. */
  29. static inline uint64_t xx64_hash_fn(
  30. const void *key, ksize_t size, uint64_t seed)
  31. { return XXH64(key, size, seed); }
  32. static inline bool buffer_eq_fn(const void *a, const void *b, ksize_t size)
  33. { return memcmp(a, b, size) == 0; }
  34. typedef struct Graph {
  35. LSUP_store_type store_type; // In-memory or MDB-backed
  36. LSUP_Term *uri; // Graph "name" (URI)
  37. LSUP_HTable *keys;
  38. LSUP_HTable *idx; // Dictionary of keys to serialized terms
  39. } Graph;
  40. /**
  41. * Extern inline functions.
  42. */
  43. size_t LSUP_graph_size(const LSUP_Graph *gr);
  44. size_t LSUP_graph_capacity(const LSUP_Graph *gr);
  45. /**
  46. * Callback type for key comparison.
  47. */
  48. typedef bool (*LSUP_key_cmp_fn_t)(
  49. const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2);
  50. /**
  51. * Dummy callback for queries with all parameters unbound. Returns true.
  52. static bool lookup_none_cmp_fn(
  53. const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
  54. { return true; }
  55. */
  56. /**
  57. * Keyset lookup for S key.
  58. */
  59. static bool lookup_sk_cmp_fn(
  60. const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
  61. { return spok[0][0] == k1; }
  62. /**
  63. * Keyset lookup for P key.
  64. */
  65. static bool lookup_pk_cmp_fn(
  66. const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
  67. { return spok[0][1] == k1; }
  68. /**
  69. * Keyset lookup for O key.
  70. */
  71. static bool lookup_ok_cmp_fn(
  72. const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
  73. { return spok[0][2] == k1; }
  74. /**
  75. * Keyset lookup for S and P keys.
  76. */
  77. static bool lookup_skpk_cmp_fn(
  78. const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
  79. { return spok[0][0] == k1 && spok[0][1] == k2; }
  80. /**
  81. * Keyset lookup for S and O keys.
  82. */
  83. static bool lookup_skok_cmp_fn(
  84. const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
  85. { return spok[0][0] == k1 && spok[0][2] == k2; }
  86. /**
  87. * Keyset lookup for P and O keys.
  88. */
  89. static bool lookup_pkok_cmp_fn(
  90. const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
  91. { return spok[0][1] == k1 && spok[0][2] == k2; }
  92. /* * * Post-lookup callback prototypes * * */
  93. int match_add_fn(
  94. LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
  95. void *ctx);
  96. int match_rm_fn(
  97. LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
  98. void *ctx);
  99. /* * * KEYSETS * * */
  100. static inline bool is_null_trp(const LSUP_TripleKey *trp)
  101. {
  102. return (
  103. *trp[0] == NULL_KEY &&
  104. *trp[1] == NULL_KEY &&
  105. *trp[2] == NULL_KEY);
  106. }
  107. /* * * GRAPH * * */
  108. int
  109. LSUP_graph_init(
  110. LSUP_Graph *gr, size_t capacity, char *uri_str,
  111. LSUP_store_type store_type)
  112. {
  113. if (uri_str == NULL) {
  114. gr->uri = LSUP_term_new(
  115. LSUP_TERM_URI, LSUP_term_gen_random_str(), NULL, NULL);
  116. } else {
  117. gr->uri = LSUP_term_new(LSUP_TERM_URI, uri_str, NULL, NULL);
  118. }
  119. gr->keys = LSUP_htable_new(
  120. capacity, TRP_KLEN, 0, xx64_hash_fn, buffer_eq_fn, 0);
  121. switch (store_type ) {
  122. case LSUP_STORE_MEM:
  123. gr->idx = LSUP_htable_new(
  124. capacity * IDX_SIZE_RATIO, sizeof(uint64_t), sizeof(uintptr_t),
  125. xx64_hash_fn, buffer_eq_fn, 0);
  126. break;
  127. case LSUP_STORE_MDB:
  128. // TODO
  129. default:
  130. return -1;
  131. }
  132. return LSUP_OK;
  133. }
  134. LSUP_Graph *
  135. LSUP_graph_new(size_t capacity, char *uri_str, LSUP_store_type store_type)
  136. {
  137. LSUP_Graph *gr;
  138. CRITICAL(gr = malloc(sizeof(LSUP_Graph)));
  139. LSUP_graph_init(gr, capacity, uri_str, store_type);
  140. return gr;
  141. }
  142. /**
  143. * Copy triples from a source graph into a destination one.
  144. *
  145. * The destination graph is not initialized, so the copy is cumulative.
  146. */
  147. static int graph_copy_contents(LSUP_Graph *src, LSUP_Graph *dest)
  148. {
  149. LSUP_Triple trp;
  150. trp.s = NULL;
  151. trp.p = NULL;
  152. trp.o = NULL;
  153. return LSUP_graph_match_callback(
  154. src, dest, &trp, &match_add_fn, true, NULL);
  155. }
  156. int
  157. LSUP_graph_copy(LSUP_Graph *dest, LSUP_Graph *src)
  158. {
  159. LSUP_graph_init(dest, LSUP_graph_size(src), NULL, src->store_type);
  160. return graph_copy_contents(src, dest);
  161. }
  162. int
  163. LSUP_graph_resize(LSUP_Graph *gr, size_t size)
  164. {
  165. LSUP_htable_resize(gr->keys, size);
  166. LSUP_htable_resize(gr->idx, size * IDX_SIZE_RATIO);
  167. return LSUP_OK;
  168. }
  169. size_t
  170. LSUP_graph_capacity(const LSUP_Graph *gr)
  171. { return LSUP_htable_capacity(gr->keys); }
  172. LSUP_Term *
  173. LSUP_graph_uri(const LSUP_Graph *gr) { return gr->uri; }
  174. size_t
  175. LSUP_graph_size(const LSUP_Graph *gr) { return LSUP_htable_size(gr->keys); }
  176. int
  177. LSUP_graph_add_triple(LSUP_Graph *gr, const LSUP_Triple *spo)
  178. {
  179. LSUP_SerTerm sspo[3];
  180. LSUP_term_serialize(spo->s, sspo);
  181. LSUP_term_serialize(spo->p, sspo + 1);
  182. LSUP_term_serialize(spo->o, sspo + 2);
  183. LSUP_TripleKey spok = NULL_TRP;
  184. // Add term to index.
  185. for (int i = 0; i < 3; i++) {
  186. spok[i] = LSUP_sterm_to_key(sspo + i);
  187. TRACE("Indexing term key %lu\n", spok[i]);
  188. // If term is already in the index, discard and free it.
  189. if (LSUP_htable_get(gr->idx, spok + i, NULL) == LSUP_OK) {
  190. //LSUP_SerTerm *sterm = sspo + i;
  191. //CRITICAL(sterm = malloc(sizeof(LSUP_Buffer)));
  192. LSUP_htable_put(gr->idx, spok + i, sspo + i);
  193. } else {
  194. TRACE("%s", "Term is already indexed.");
  195. LSUP_buffer_done(sspo + i);
  196. }
  197. }
  198. // Add triple.
  199. TRACE("Inserting spok: {%lx, %lx, %lx}", spok[0], spok[1], spok[2]);
  200. return LSUP_htable_put(gr->keys, spok, NULL);
  201. }
  202. int
  203. LSUP_graph_add(LSUP_Graph *gr, const LSUP_Triple data[], size_t data_size)
  204. {
  205. // TODO Decouple this and build interface for memory and MDB integration.
  206. // Resize all at once if needed.
  207. if (LSUP_graph_capacity(gr) < LSUP_graph_size(gr) + data_size)
  208. LSUP_graph_resize(gr, LSUP_graph_size(gr) + data_size);
  209. int rc = LSUP_NOACTION;
  210. for (size_t i = 0; i < data_size; i++) {
  211. TRACE("Inserting triple #%lu\n", i);
  212. if (LIKELY(LSUP_graph_add_triple(gr, data + i) == LSUP_OK))
  213. rc = LSUP_OK;
  214. }
  215. return rc;
  216. }
  217. bool
  218. LSUP_graph_contains(const LSUP_Graph *gr, const LSUP_Triple *spo)
  219. {
  220. LSUP_TripleKey spok = {
  221. LSUP_term_to_key(spo->s),
  222. LSUP_term_to_key(spo->p),
  223. LSUP_term_to_key(spo->o),
  224. };
  225. return LSUP_htable_get(gr->keys, spok, NULL) == LSUP_OK;
  226. }
  227. int
  228. LSUP_graph_match_callback(
  229. LSUP_Graph *gr, LSUP_Graph *res, const LSUP_Triple *spo,
  230. keyset_match_fn_t callback_fn, bool match_cond, void *ctx)
  231. {
  232. if (LSUP_htable_size(gr->keys) == 0)
  233. return LSUP_NOACTION;
  234. htsize_t cur = 0;
  235. LSUP_Key k1, k2;
  236. LSUP_key_cmp_fn_t cmp_fn;
  237. LSUP_TripleKey i_spok;
  238. LSUP_TripleKey spok = {
  239. LSUP_term_to_key(spo->s),
  240. LSUP_term_to_key(spo->p),
  241. LSUP_term_to_key(spo->o),
  242. };
  243. if (spok[0] != NULL_KEY && spok[1] != NULL_KEY && spok[2] != NULL_KEY) {
  244. if (match_cond == true) {
  245. // Shortcut for 3-term match—only if match_cond is true.
  246. LSUP_graph_init(res, 1, NULL, LSUP_STORE_MEM);
  247. int rc = LSUP_htable_get(gr->keys, spok, NULL);
  248. if(rc == LSUP_OK) {
  249. callback_fn(gr, res, &spok, ctx);
  250. return LSUP_OK;
  251. } else {
  252. return LSUP_NOACTION;
  253. }
  254. } else {
  255. // For negative condition (i.e. "apply this function to all triples
  256. // except the matching one")
  257. int rc = LSUP_NOACTION;
  258. while (LSUP_htable_iter(
  259. gr->keys, &cur, (void**)&i_spok, NULL) == LSUP_OK) {
  260. if (LIKELY(
  261. i_spok[2] != spok[2] ||
  262. i_spok[0] != spok[0] ||
  263. i_spok[1] != spok[1]
  264. )) {
  265. rc = callback_fn(gr, res, &i_spok, ctx);
  266. }
  267. }
  268. return rc;
  269. }
  270. } else if (spok[0] != NULL_KEY) {
  271. k1 = spok[0];
  272. if (spok[1] != NULL_KEY) { // s p ?
  273. k2 = spok[1];
  274. cmp_fn = lookup_skpk_cmp_fn;
  275. } else if (spok[2] != NULL_KEY) { // s ? o
  276. k2 = spok[2];
  277. cmp_fn = lookup_skok_cmp_fn;
  278. } else { // s ? ?
  279. cmp_fn = lookup_sk_cmp_fn;
  280. }
  281. } else if (spok[1] != NULL_KEY) {
  282. k1 = spok[1];
  283. if (spok[2] != NULL_KEY) { // ? p o
  284. k2 = spok[2];
  285. cmp_fn = lookup_pkok_cmp_fn;
  286. } else { // ? p ?
  287. cmp_fn = lookup_pk_cmp_fn;
  288. }
  289. } else if (spok[2] != NULL_KEY) { // ? ? o
  290. k1 = spok[2];
  291. cmp_fn = lookup_ok_cmp_fn;
  292. } else {
  293. printf("WARNING: no bound terms, making a compact copy.\n");
  294. return LSUP_graph_copy(res, gr);
  295. }
  296. while (LSUP_htable_iter(gr->keys, &cur, (void**)&i_spok, NULL) == LSUP_OK) {
  297. if (cmp_fn(&i_spok, k1, k2) == match_cond)
  298. callback_fn(gr, res, &i_spok, ctx);
  299. }
  300. return LSUP_OK;
  301. }
  302. int LSUP_graph_lookup(LSUP_Graph *gr, LSUP_Graph *res, const LSUP_Triple *spo)
  303. {
  304. LSUP_graph_init(res, LOOKUP_GR_INIT_SIZE, NULL, LSUP_STORE_MEM);
  305. return LSUP_graph_match_callback(gr, res, spo, &match_add_fn, true, NULL);
  306. }
  307. int LSUP_graph_join(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
  308. {
  309. LSUP_graph_copy(res, gr1);
  310. return graph_copy_contents(gr2, res);
  311. }
  312. int LSUP_graph_subtract(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
  313. {
  314. if (LSUP_htable_size(gr2->keys) == 0) return LSUP_graph_copy(gr1, res);
  315. LSUP_graph_init(res, LSUP_graph_capacity(gr1), NULL, LSUP_STORE_MEM);
  316. if (LSUP_htable_size(gr1->keys) == 0) return LSUP_OK;
  317. htsize_t cur = 0;
  318. LSUP_TripleKey spok;
  319. while(LSUP_htable_iter(gr1->keys, &cur, (void**)&spok, NULL) == LSUP_OK) {
  320. if (LSUP_htable_get(gr2->keys, (void**)&spok, NULL) == LSUP_NORESULT)
  321. match_add_fn(res, gr1, &spok, NULL);
  322. }
  323. return LSUP_OK;
  324. }
  325. int LSUP_graph_intersect(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
  326. {
  327. LSUP_graph_init(res, LSUP_graph_capacity(gr1), NULL, LSUP_STORE_MEM);
  328. if (LSUP_htable_size(gr1->keys) == 0 || LSUP_htable_size(gr2->keys) == 0)
  329. return LSUP_OK;
  330. htsize_t cur = 0;
  331. LSUP_TripleKey spok;
  332. while(LSUP_htable_iter(gr1->keys, &cur, (void**)&spok, NULL) == LSUP_OK) {
  333. if (LSUP_htable_get(gr2->keys, (void**)&spok, NULL) == LSUP_OK)
  334. match_add_fn(res, gr1, &spok, NULL);
  335. }
  336. return LSUP_OK;
  337. }
  338. int LSUP_graph_xor(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
  339. {
  340. if (LSUP_htable_size(gr1->keys) == 0) return LSUP_graph_copy(gr2, res);
  341. if (LSUP_htable_size(gr2->keys) == 0) return LSUP_graph_copy(gr1, res);
  342. LSUP_graph_init(
  343. res, min(LSUP_graph_capacity(gr1), LSUP_graph_capacity(gr2)),
  344. NULL, LSUP_STORE_MEM);
  345. htsize_t cur = 0;
  346. LSUP_TripleKey spok;
  347. while(LSUP_htable_iter(gr1->keys, &cur, (void**)&spok, NULL) == LSUP_OK) {
  348. if (LSUP_htable_get(gr2->keys, (void**)&spok, NULL) == LSUP_NORESULT)
  349. match_add_fn(res, gr1, &spok, NULL);
  350. }
  351. cur = 0;
  352. while(LSUP_htable_iter(gr2->keys, &cur, (void**)&spok, NULL) == LSUP_OK) {
  353. if (LSUP_htable_get(gr1->keys, (void**)&spok, NULL) == LSUP_NORESULT)
  354. match_add_fn(res, gr2, &spok, NULL);
  355. }
  356. return LSUP_OK;
  357. }
  358. void
  359. LSUP_graph_free(LSUP_Graph *gr)
  360. {
  361. if (LIKELY(gr != NULL)) {
  362. LSUP_term_free(gr->uri);
  363. // Free up triples.
  364. LSUP_htable_free(gr->keys);
  365. // Free up index entries and index.
  366. htsize_t cur = 0;
  367. LSUP_TripleKey spok;
  368. LSUP_Buffer *sterm;
  369. while(LSUP_htable_iter(
  370. gr->idx, &cur, (void**)&spok, (void**)&sterm) == LSUP_OK) {
  371. TRACE("Freeing indexed term buffer #%d at %p", cur, sterm);
  372. LSUP_buffer_done(sterm);
  373. }
  374. LSUP_htable_free(gr->idx);
  375. free(gr);
  376. }
  377. }
  378. /* * CALLBACKS * */
  379. /**
  380. * Callback for adding a matched triple.
  381. *
  382. * Adds the current triple in src to dest. No duplicate check.
  383. *
  384. * The source graph cursor must be set to the triple to be copied.
  385. */
  386. int match_add_fn(
  387. LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
  388. void *ctx)
  389. {
  390. // Add term to index.
  391. for (int i = 0; i < 3; i++) {
  392. // Index terms if not yet presents in destination.
  393. void *src_val, *dest_val;
  394. if(LSUP_htable_get(src->idx, *spok + i, &src_val) == LSUP_OK) {
  395. CRITICAL(dest_val = malloc(sizeof(LSUP_Buffer)));
  396. LSUP_buffer_copy(dest_val, src_val);
  397. LSUP_htable_put(dest->idx, *spok + i, dest_val);
  398. }
  399. }
  400. // Add triple.
  401. return LSUP_htable_put(dest->keys, spok, NULL);
  402. }
  403. /**
  404. * Callback for removing a matched triple.
  405. */
  406. int match_rm_fn(
  407. LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
  408. void *ctx)
  409. { return LSUP_htable_del(dest->keys, spok); }