graph.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. #include "store_htable.h"
  2. #include "store_mdb.h"
  3. #include "graph.h"
  4. // Initial size of lookup graph. It will double each time capacity is reached.
  5. #define LOOKUP_GR_INIT_SIZE 64
  6. // Expand hash table memory by this factor to keep a good load factor.
  7. #define PREALLOC_FACTOR 1.4
  8. // Assume VERY coarsly that the number of unique terms will be in general
  9. // 1.7 times the number of triples. This is conservative to maintain load
  10. // factor low.
  11. #define IDX_SIZE_RATIO 1.7
  12. typedef enum KSetFlag {
  13. LSUP_KS_NONE = 0,
  14. LSUP_KS_CHECK_CAP = 1 << 0,
  15. LSUP_KS_CHECK_DUP = 1 << 1,
  16. } KSetFlag;
  17. typedef struct Graph {
  18. const LSUP_Env * env; // LSUP environment.
  19. LSUP_store_type store_type; // Back end type: in-memory or MDB.
  20. LSUP_Term *uri; // Graph "name" (URI)
  21. union { // Back end, defined by store_type.
  22. LSUP_HTStore * ht_store;
  23. LSUP_MDBStore * mdb_store;
  24. };
  25. LSUP_NSMap * nsm; // Namespace map.
  26. } Graph;
  27. typedef struct GraphIterator {
  28. const Graph * graph; // Parent graph.
  29. union { // Internal store iterator.
  30. LSUP_HTIterator * ht_iter;
  31. LSUP_MDBIterator * mdb_iter;
  32. };
  33. size_t ct; // Total matches.
  34. } GraphIterator;
  35. /**
  36. * Extern inline functions.
  37. */
  38. size_t LSUP_graph_size (const LSUP_Graph *gr);
  39. /* * * Post-lookup callback prototypes * * */
  40. int match_add_fn(
  41. LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
  42. void *ctx);
  43. int match_rm_fn(
  44. LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
  45. void *ctx);
  46. static LSUP_rc
  47. graph_iter_next_buffer (GraphIterator *it, LSUP_SerTriple *sspo);
  48. static inline bool is_null_trp (const LSUP_TripleKey *trp)
  49. {
  50. return (
  51. *trp[0] == NULL_KEY &&
  52. *trp[1] == NULL_KEY &&
  53. *trp[2] == NULL_KEY);
  54. }
  55. #define ENTRY(a, b) (be) == (LSUP_STORE_##a) ||
  56. static inline bool
  57. check_backend (LSUP_store_type be)
  58. { return (BACKEND_TBL false); }
  59. #undef ENTRY
  60. /* * * GRAPH * * */
  61. Graph *
  62. LSUP_graph_new_env (const LSUP_Env *env, const LSUP_store_type store_type)
  63. {
  64. if (UNLIKELY (!env)) return NULL;
  65. if (UNLIKELY (!check_backend (store_type))) return NULL;
  66. LSUP_Graph *gr;
  67. MALLOC_GUARD (gr, NULL);
  68. gr->uri = LSUP_uri_new (NULL);
  69. gr->store_type = store_type;
  70. gr->env = env;
  71. gr->nsm = env->nsm;
  72. if (gr->store_type == LSUP_STORE_MEM) {
  73. gr->ht_store = LSUP_htstore_new();
  74. if (UNLIKELY (!gr->ht_store)) return NULL;
  75. } else if (gr->store_type == LSUP_STORE_MDB) {
  76. gr->mdb_store = env->mdbstore;
  77. } else { // LSUP_STORE_MDB_TMP
  78. gr->mdb_store = env->mdbstore_ramdisk;
  79. }
  80. log_debug ("Graph created.");
  81. return gr;
  82. }
  83. /**
  84. * Copy triples from a source graph into a destination one.
  85. *
  86. * The destination graph is not initialized here, so the copy is cumulative.
  87. */
  88. static LSUP_rc
  89. graph_copy_contents (const LSUP_Graph *src, LSUP_Graph *dest)
  90. {
  91. LSUP_rc rc = LSUP_NOACTION;
  92. GraphIterator *it = LSUP_graph_lookup (src, NULL, NULL, NULL, NULL);
  93. LSUP_SerTriple sspo;
  94. LSUP_GraphIterator *add_it = LSUP_graph_add_init (dest);
  95. while (graph_iter_next_buffer (it, &sspo) != LSUP_END) {
  96. log_debug ("Inserting triple #%lu", LSUP_graph_iter_cur (it));
  97. LSUP_rc add_rc = LSUP_graph_add_iter (add_it, &sspo);
  98. if (LIKELY (add_rc == LSUP_OK)) rc = LSUP_OK;
  99. else if (add_rc < 0) return add_rc;
  100. }
  101. LSUP_graph_add_done (it);
  102. return rc;
  103. }
  104. LSUP_Graph *
  105. LSUP_graph_copy (const Graph *src)
  106. {
  107. LSUP_Graph *dest = LSUP_graph_new_env (src->env, src->store_type);
  108. if (UNLIKELY (!dest)) return NULL;
  109. LSUP_rc rc = graph_copy_contents (src, dest);
  110. if (UNLIKELY (rc != LSUP_OK)) return NULL;
  111. return dest;
  112. }
  113. // TODO support boolean ops between any types of graphs.
  114. Graph *
  115. LSUP_graph_bool_op(
  116. const LSUP_bool_op op, const Graph *gr1, const Graph *gr2)
  117. {
  118. if (UNLIKELY (gr1->store_type != LSUP_STORE_MEM)) {
  119. fprintf(
  120. stderr,
  121. "First operand %s is not an in-memory graph. "
  122. "Cannot perform boolean operation.",
  123. gr1->uri->data);
  124. return NULL;
  125. }
  126. if (UNLIKELY (gr2->store_type != LSUP_STORE_MEM)) {
  127. fprintf(
  128. stderr,
  129. "Second operand %s is not an in-memory graph. "
  130. "Cannot perform boolean operation.",
  131. gr2->uri->data);
  132. return NULL;
  133. }
  134. LSUP_Graph *res = LSUP_graph_new (LSUP_STORE_MEM);
  135. res->ht_store = LSUP_htstore_bool_op (op, gr1->ht_store, gr2->ht_store);
  136. return res;
  137. }
  138. void
  139. LSUP_graph_free (LSUP_Graph *gr)
  140. {
  141. if (LIKELY (gr != NULL)) {
  142. LSUP_term_free (gr->uri);
  143. if (gr->store_type == LSUP_STORE_MEM)
  144. LSUP_htstore_free (gr->ht_store);
  145. /*
  146. // NOTE This is not required bacause MDB store is only one for now,
  147. // cleared at exit.
  148. else
  149. LSUP_mdbstore_free (gr->mdb_store);
  150. */
  151. free (gr);
  152. }
  153. }
  154. LSUP_Term *
  155. LSUP_graph_uri (const LSUP_Graph *gr) { return gr->uri; }
  156. LSUP_rc
  157. LSUP_graph_set_uri (LSUP_Graph *gr, const char *uri)
  158. { return LSUP_uri_init (gr->uri, uri); }
  159. LSUP_NSMap *
  160. LSUP_graph_namespace (const Graph *gr)
  161. { return gr->nsm; }
  162. void
  163. LSUP_graph_set_namespace (Graph *gr, LSUP_NSMap *nsm)
  164. { if (gr->store_type == LSUP_STORE_MEM) gr->nsm = nsm; }
  165. size_t
  166. LSUP_graph_size (const Graph *gr)
  167. {
  168. if (gr->store_type == LSUP_STORE_MEM)
  169. return LSUP_htstore_size (gr->ht_store);
  170. else {
  171. size_t ct;
  172. LSUP_GraphIterator *it = LSUP_graph_lookup (gr, NULL, NULL, NULL, &ct);
  173. LSUP_graph_iter_free (it);
  174. return ct;
  175. }
  176. }
  177. bool
  178. LSUP_graph_equals (const Graph *gr1, const Graph *gr2)
  179. {
  180. LSUP_Graph *res = LSUP_graph_bool_op (LSUP_BOOL_XOR, gr1, gr2);
  181. return (LSUP_graph_size (res) == 0);
  182. }
  183. GraphIterator *
  184. LSUP_graph_add_init (LSUP_Graph *gr)
  185. {
  186. GraphIterator *it;
  187. CALLOC_GUARD (it, NULL);
  188. if (gr->store_type == LSUP_STORE_MEM) {
  189. it->ht_iter = LSUP_htstore_add_init (gr->ht_store);
  190. } else {
  191. LSUP_Buffer *sc = LSUP_buffer_new_from_term (gr->uri);
  192. it->mdb_iter = LSUP_mdbstore_add_init (gr->mdb_store, sc);
  193. LSUP_buffer_free (sc);
  194. }
  195. it->graph = gr;
  196. return it;
  197. }
  198. LSUP_rc
  199. LSUP_graph_add_iter (LSUP_GraphIterator *it, const LSUP_SerTriple *sspo)
  200. {
  201. if (it->graph->store_type == LSUP_STORE_MEM) {
  202. return LSUP_htstore_add_iter (it->ht_iter, sspo);
  203. } else {
  204. return LSUP_mdbstore_add_iter (it->mdb_iter, sspo);
  205. }
  206. }
  207. void
  208. LSUP_graph_add_done (LSUP_GraphIterator *it)
  209. {
  210. if (it->graph->store_type == LSUP_STORE_MEM) {
  211. LSUP_htstore_add_done (it->ht_iter);
  212. } else {
  213. LSUP_mdbstore_add_done (it->mdb_iter);
  214. }
  215. free (it);
  216. log_trace ("Done adding.");
  217. }
  218. LSUP_rc
  219. LSUP_graph_add (
  220. Graph *gr, const LSUP_Triple trp[],
  221. const LSUP_SerTriple strp[], size_t *inserted)
  222. {
  223. /*
  224. * NOTE It is possible to pass both sets of RDF triples and buffer triples.
  225. */
  226. LSUP_rc rc = LSUP_NOACTION;
  227. // Initialize iterator.
  228. LSUP_GraphIterator *it = LSUP_graph_add_init (gr);
  229. // Serialize and insert RDF triples.
  230. LSUP_SerTriple *sspo = LSUP_striple_new (BUF_DUMMY, BUF_DUMMY, BUF_DUMMY);
  231. if (UNLIKELY (!sspo)) return LSUP_MEM_ERR;
  232. if (trp) {
  233. for (size_t i = 0; trp[i].s != NULL; i++) {
  234. log_trace ("Inserting triple #%lu", i);
  235. LSUP_triple_serialize (trp + i, sspo);
  236. LSUP_rc db_rc = LSUP_graph_add_iter (it, sspo);
  237. if (db_rc == LSUP_OK) rc = LSUP_OK;
  238. if (UNLIKELY (db_rc < 0)) return db_rc;
  239. }
  240. }
  241. LSUP_striple_free (sspo);
  242. // Insert serialized triples.
  243. if (strp) {
  244. for (size_t i = 0; strp[i].s != NULL; i++) {
  245. log_trace ("Inserting serialized triple #%lu", i);
  246. LSUP_rc db_rc = LSUP_graph_add_iter (it, strp + i);
  247. if (db_rc == LSUP_OK) rc = LSUP_OK;
  248. if (UNLIKELY (db_rc < 0)) return db_rc;
  249. }
  250. }
  251. if (inserted) {
  252. if (gr->store_type == LSUP_STORE_MEM) {
  253. *inserted = LSUP_htiter_cur (it->ht_iter);
  254. } else {
  255. *inserted = LSUP_mdbiter_cur (it->mdb_iter);
  256. }
  257. }
  258. LSUP_graph_add_done (it);
  259. return rc;
  260. }
  261. LSUP_rc
  262. LSUP_graph_remove (
  263. LSUP_Graph *gr, const LSUP_Term *s, const LSUP_Term *p,
  264. const LSUP_Term *o, size_t *ct)
  265. {
  266. LSUP_rc rc;
  267. LSUP_Buffer *ss = LSUP_buffer_new_from_term (s);
  268. LSUP_Buffer *sp = LSUP_buffer_new_from_term (p);
  269. LSUP_Buffer *so = LSUP_buffer_new_from_term (o);
  270. LSUP_Buffer *sc = LSUP_buffer_new_from_term (gr->uri);
  271. if (gr->store_type == LSUP_STORE_MEM)
  272. rc = LSUP_htstore_remove (gr->ht_store, ss, sp, so, ct);
  273. else
  274. rc = LSUP_mdbstore_remove (gr->mdb_store, ss, sp, so, sc, ct);
  275. LSUP_buffer_free (ss);
  276. LSUP_buffer_free (sp);
  277. LSUP_buffer_free (so);
  278. LSUP_buffer_free (sc);
  279. return rc;
  280. }
  281. GraphIterator *
  282. LSUP_graph_lookup (const Graph *gr, const LSUP_Term *s, const LSUP_Term *p,
  283. const LSUP_Term *o, size_t *ct)
  284. {
  285. GraphIterator *it;
  286. MALLOC_GUARD (it, NULL);
  287. it->graph = gr;
  288. LSUP_Buffer *ss = LSUP_buffer_new_from_term (s);
  289. LSUP_Buffer *sp = LSUP_buffer_new_from_term (p);
  290. LSUP_Buffer *so = LSUP_buffer_new_from_term (o);
  291. LSUP_Buffer *sc = LSUP_buffer_new_from_term (gr->uri);
  292. if (it->graph->store_type == LSUP_STORE_MEM) {
  293. it->ht_iter = LSUP_htstore_lookup (it->graph->ht_store, ss, sp, so);
  294. if (ct) *ct = it->ct;
  295. } else it->mdb_iter = LSUP_mdbstore_lookup (
  296. it->graph->mdb_store, ss, sp, so, sc, ct);
  297. LSUP_buffer_free (ss);
  298. LSUP_buffer_free (sp);
  299. LSUP_buffer_free (so);
  300. LSUP_buffer_free (sc);
  301. return it;
  302. }
  303. /** @brief Advance iterator and return serialized triple.
  304. *
  305. * This is an internal function to pass raw buffers between higher-level
  306. * functions without serializing and deserializing triples.
  307. */
  308. inline static LSUP_rc
  309. graph_iter_next_buffer (GraphIterator *it, LSUP_SerTriple *sspo)
  310. {
  311. LSUP_rc rc;
  312. if (it->graph->store_type == LSUP_STORE_MEM)
  313. rc = LSUP_htiter_next (it->ht_iter, sspo);
  314. else rc = LSUP_mdbiter_next (it->mdb_iter, sspo);
  315. return rc;
  316. }
  317. LSUP_rc
  318. LSUP_graph_iter_next (GraphIterator *it, LSUP_Triple *spo)
  319. {
  320. /*
  321. * NOTE: Memory and MDB back ends treat sspo differently, whereas the
  322. * memory one owns the whole buffer structure, while the MDB one owns only
  323. * the data. Therefore they must be initialized and freed differently.
  324. */
  325. LSUP_SerTriple *sspo;
  326. LSUP_Buffer *ss, *sp, *so;
  327. if (it->graph->store_type == LSUP_STORE_MEM) {
  328. ss = sp = so = NULL;
  329. } else {
  330. // Craft buffers manually so that their addresses are NULL and need not
  331. // be freed.
  332. CALLOC_GUARD (ss, LSUP_MEM_ERR);
  333. CALLOC_GUARD (sp, LSUP_MEM_ERR);
  334. CALLOC_GUARD (so, LSUP_MEM_ERR);
  335. }
  336. sspo = LSUP_striple_new (ss, sp, so);
  337. LSUP_rc rc = graph_iter_next_buffer (it, sspo);
  338. if (rc == LSUP_OK) {
  339. spo->s = LSUP_term_new_from_buffer (sspo->s);
  340. spo->p = LSUP_term_new_from_buffer (sspo->p);
  341. spo->o = LSUP_term_new_from_buffer (sspo->o);
  342. }
  343. if (it->graph->store_type == LSUP_STORE_MEM) free (sspo);
  344. else LSUP_striple_free_shallow (sspo);
  345. return rc;
  346. }
  347. void
  348. LSUP_graph_iter_free (GraphIterator *it)
  349. {
  350. if (it->graph->store_type == LSUP_STORE_MEM)
  351. LSUP_htiter_free (it->ht_iter);
  352. else
  353. LSUP_mdbiter_free (it->mdb_iter);
  354. free (it);
  355. }
  356. size_t
  357. LSUP_graph_iter_cur (GraphIterator *it)
  358. { return LSUP_mdbiter_cur (it->mdb_iter); }
  359. bool
  360. LSUP_graph_contains (const LSUP_Graph *gr, const LSUP_Triple *spo)
  361. {
  362. GraphIterator *it = LSUP_graph_lookup (
  363. gr, spo->s, spo->p, spo->o, NULL);
  364. LSUP_Triple *tmp_spo = LSUP_triple_new (TERM_DUMMY, TERM_DUMMY, TERM_DUMMY);
  365. bool rc = LSUP_graph_iter_next (it, tmp_spo) != LSUP_END;
  366. LSUP_triple_free (tmp_spo);
  367. LSUP_graph_iter_free (it);
  368. return rc;
  369. }
  370. /* * * Static functions * * */