graph.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. #include "graph.h"
  2. // Initial size of lookup graph. It will double each time capacity is reached.
  3. #define LOOKUP_GR_INIT_SIZE 64
  4. // Expand hash table memory by this factor to keep a good load factor.
  5. #define PREALLOC_FACTOR 1.4
  6. // Assume VERY coarsly that the number of unique terms will be in general
  7. // 1.7 times the number of triples. This is conservative to maintain load
  8. // factor low.
  9. #define IDX_SIZE_RATIO 1.7
  10. // RAMdisk path for MDB volatile store.
  11. #define MDB_RAMDISK_PATH TMPDIR "/lsup_mem_graph"
  12. typedef enum KSetFlag {
  13. LSUP_KS_NONE = 0,
  14. LSUP_KS_CHECK_CAP = 1 << 0,
  15. LSUP_KS_CHECK_DUP = 1 << 1,
  16. } KSetFlag;
  17. /**
  18. * Static handles.
  19. */
  20. static const char *default_ctx_label = "urn:lsup:default";
  21. static LSUP_Buffer *default_ctx = NULL;
  22. static LSUP_MDBStore *default_store = NULL, *default_tmp_store = NULL;
  23. typedef struct Graph {
  24. LSUP_store_type store_type; // In-memory or MDB-backed
  25. LSUP_Term *uri; // Graph "name" (URI)
  26. union {
  27. LSUP_HTStore * ht_store;
  28. LSUP_MDBStore * mdb_store;
  29. };
  30. } Graph;
  31. typedef struct GraphIterator {
  32. const Graph * graph; // Parent graph.
  33. union { // Internal store iterator.
  34. LSUP_HTIterator * ht_iter;
  35. LSUP_MDBIterator * mdb_iter;
  36. };
  37. size_t ct; // Total matches.
  38. } GraphIterator;
  39. /**
  40. * Extern inline functions.
  41. */
  42. size_t LSUP_graph_size (const LSUP_Graph *gr);
  43. /* * * Static prototypes. * * */
  44. static inline LSUP_rc mdbstore_init();
  45. /* * * Post-lookup callback prototypes * * */
  46. int match_add_fn(
  47. LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
  48. void *ctx);
  49. int match_rm_fn(
  50. LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
  51. void *ctx);
  52. static LSUP_rc
  53. graph_iter_next_buffer (GraphIterator *it, LSUP_SerTriple *sspo);
  54. /* Atexit functions. */
  55. void ctx_cleanup()
  56. {
  57. /*@ @brief Close LMDB environment.
  58. *
  59. * Run at exit.
  60. */
  61. LSUP_mdbstore_free (default_store);
  62. LSUP_mdbstore_free (default_tmp_store);
  63. LSUP_buffer_free (default_ctx);
  64. }
  65. static inline bool is_null_trp (const LSUP_TripleKey *trp)
  66. {
  67. return (
  68. *trp[0] == NULL_KEY &&
  69. *trp[1] == NULL_KEY &&
  70. *trp[2] == NULL_KEY);
  71. }
  72. #define ENTRY(a, b) (be) == (LSUP_STORE_##a) ||
  73. static inline bool
  74. check_backend (LSUP_store_type be)
  75. { return (BACKEND_TBL false); }
  76. #undef ENTRY
  77. /* * * GRAPH * * */
  78. Graph *
  79. LSUP_graph_new (const LSUP_store_type store_type)
  80. {
  81. if (!check_backend (store_type)) return NULL;
  82. LSUP_Graph *gr = malloc (sizeof (LSUP_Graph));
  83. if (UNLIKELY (!gr)) return NULL;
  84. gr->uri = LSUP_uri_new (NULL);
  85. gr->store_type = store_type;
  86. if (UNLIKELY (mdbstore_init() != LSUP_OK)) return NULL;
  87. if (gr->store_type == LSUP_STORE_MEM) {
  88. gr->ht_store = LSUP_htstore_new();
  89. if (UNLIKELY (!gr->ht_store)) return NULL;
  90. } else if (gr->store_type == LSUP_STORE_MDB) {
  91. gr->mdb_store = default_store;
  92. if (UNLIKELY (!gr->mdb_store)) return NULL;
  93. } else { // LSUP_STORE_MDB_TMP
  94. gr->mdb_store = default_tmp_store;
  95. }
  96. return gr;
  97. }
  98. /**
  99. * Copy triples from a source graph into a destination one.
  100. *
  101. * The destination graph is not initialized here, so the copy is cumulative.
  102. */
  103. static LSUP_rc
  104. graph_copy_contents (const LSUP_Graph *src, LSUP_Graph *dest)
  105. {
  106. LSUP_rc rc = LSUP_NOACTION;
  107. const LSUP_Triple trp = {NULL, NULL, NULL};
  108. GraphIterator *it = LSUP_graph_lookup (src, &trp, NULL);
  109. LSUP_SerTriple sspo;
  110. while (graph_iter_next_buffer (it, &sspo) != LSUP_END) {
  111. TRACE ("Inserting triple #%lu\n", LSUP_graph_iter_cur (it));
  112. LSUP_rc add_rc = LSUP_graph_add (dest, NULL, 0, &sspo, 1, NULL);
  113. if (LIKELY (add_rc == LSUP_OK)) rc = LSUP_OK;
  114. else if (add_rc < 0) return add_rc;
  115. }
  116. return rc;
  117. }
  118. LSUP_Graph *
  119. LSUP_graph_copy (const Graph *src)
  120. {
  121. LSUP_Graph *dest = LSUP_graph_new (src->store_type);
  122. if (UNLIKELY (!dest)) return NULL;
  123. LSUP_rc rc = graph_copy_contents (src, dest);
  124. if (UNLIKELY (rc != LSUP_OK)) return NULL;
  125. return dest;
  126. }
  127. // TODO support boolean ops between any types of graphs.
  128. Graph *
  129. LSUP_graph_bool_op(
  130. const LSUP_bool_op op, const Graph *gr1, const Graph *gr2)
  131. {
  132. if (UNLIKELY (gr1->store_type != LSUP_STORE_MEM)) {
  133. fprintf(
  134. stderr,
  135. "First operand %s is not an in-memory graph. "
  136. "Cannot perform boolean operation.",
  137. gr1->uri->data);
  138. return NULL;
  139. }
  140. if (UNLIKELY (gr2->store_type != LSUP_STORE_MEM)) {
  141. fprintf(
  142. stderr,
  143. "Second operand %s is not an in-memory graph. "
  144. "Cannot perform boolean operation.",
  145. gr2->uri->data);
  146. return NULL;
  147. }
  148. LSUP_Graph *res = LSUP_graph_new (LSUP_STORE_MEM);
  149. res->ht_store = LSUP_htstore_bool_op (op, gr1->ht_store, gr2->ht_store);
  150. return res;
  151. }
  152. void
  153. LSUP_graph_free (LSUP_Graph *gr)
  154. {
  155. if (LIKELY (gr != NULL)) {
  156. LSUP_term_free (gr->uri);
  157. if (gr->store_type == LSUP_STORE_MEM)
  158. LSUP_htstore_free (gr->ht_store);
  159. /*
  160. // NOTE This is not required bacause MDB store is only one for now,
  161. // cleared at exit.
  162. else
  163. LSUP_mdbstore_free (gr->mdb_store);
  164. */
  165. free (gr);
  166. }
  167. }
  168. LSUP_Term *
  169. LSUP_graph_uri (const LSUP_Graph *gr) { return gr->uri; }
  170. LSUP_rc
  171. LSUP_graph_set_uri (LSUP_Graph *gr, const char *uri)
  172. { return LSUP_uri_init (gr->uri, uri); }
  173. size_t
  174. LSUP_graph_size (const Graph *gr)
  175. {
  176. TRACE ("Store type: %d\n", gr->store_type);
  177. if (gr->store_type == LSUP_STORE_MEM)
  178. LSUP_htstore_size (gr->ht_store);
  179. return LSUP_mdbstore_size (gr->mdb_store);
  180. }
  181. bool
  182. LSUP_graph_equals (const Graph *gr1, const Graph *gr2)
  183. {
  184. LSUP_Graph *res = LSUP_graph_bool_op (LSUP_BOOL_XOR, gr1, gr2);
  185. return (LSUP_graph_size (res) == 0);
  186. }
  187. GraphIterator *
  188. LSUP_graph_add_init (LSUP_Graph *gr)
  189. {
  190. GraphIterator *it = malloc (sizeof (*it));
  191. if (UNLIKELY (!it)) return NULL;
  192. if (gr->store_type == LSUP_STORE_MEM) {
  193. it->ht_iter = LSUP_htstore_add_init (gr->ht_store);
  194. } else {
  195. LSUP_Buffer *sc = LSUP_buffer_new_from_term (gr->uri);
  196. it->mdb_iter = LSUP_mdbstore_add_init (gr->mdb_store, sc);
  197. LSUP_buffer_free (sc);
  198. }
  199. it->graph = gr;
  200. return it;
  201. }
  202. LSUP_rc
  203. LSUP_graph_add_iter (LSUP_GraphIterator *it, const LSUP_SerTriple *sspo)
  204. {
  205. if (it->graph->store_type == LSUP_STORE_MEM) {
  206. return LSUP_htstore_add_iter (it->ht_iter, sspo);
  207. } else {
  208. return LSUP_mdbstore_add_iter (it->mdb_iter, sspo);
  209. }
  210. }
  211. void
  212. LSUP_graph_add_done (LSUP_GraphIterator *it)
  213. {
  214. if (it->graph->store_type == LSUP_STORE_MEM) {
  215. LSUP_htstore_add_done (it->ht_iter);
  216. } else {
  217. LSUP_mdbstore_add_done (it->mdb_iter);
  218. }
  219. }
  220. LSUP_rc
  221. LSUP_graph_add(
  222. LSUP_Graph *gr,
  223. const LSUP_Triple trp[], size_t trp_ct,
  224. const LSUP_SerTriple strp[], size_t strp_ct, size_t *inserted)
  225. {
  226. /*
  227. * NOTE It is possible to pass both sets of RDF triples and buffer triples.
  228. */
  229. LSUP_rc rc = LSUP_NOACTION;
  230. // Initialize iterator.
  231. LSUP_GraphIterator *it = LSUP_graph_add_init (gr);
  232. // Serialize and insert RDF triples.
  233. LSUP_SerTriple *sspo = STRP_DUMMY;
  234. for (size_t i = 0; i < trp_ct; i++) {
  235. TRACE ("Inserting triple #%lu\n", i);
  236. LSUP_triple_serialize (trp + i, sspo);
  237. LSUP_rc db_rc = LSUP_graph_add_iter (it, sspo);
  238. if (db_rc == LSUP_OK) rc = LSUP_OK;
  239. if (UNLIKELY (db_rc < 0)) return db_rc;
  240. }
  241. LSUP_striple_free (sspo);
  242. // Insert serialized triples.
  243. for (size_t i = 0; i < strp_ct; i++) {
  244. TRACE ("Inserting serialized triple #%lu\n", i);
  245. LSUP_rc db_rc = LSUP_graph_add_iter (it, strp + i);
  246. if (db_rc == LSUP_OK) rc = LSUP_OK;
  247. if (UNLIKELY (db_rc < 0)) return db_rc;
  248. }
  249. LSUP_graph_add_done (it);
  250. if (inserted) {
  251. if (gr->store_type == LSUP_STORE_MEM) {
  252. *inserted = LSUP_htiter_cur (it->ht_iter);
  253. } else {
  254. *inserted = LSUP_mdbiter_cur (it->mdb_iter);
  255. }
  256. }
  257. return rc;
  258. }
  259. LSUP_rc
  260. LSUP_graph_remove (Graph *gr, const LSUP_Triple *spo, size_t *ct)
  261. {
  262. LSUP_rc rc;
  263. LSUP_SerTriple *sspo = LSUP_striple_new_from_triple (spo);
  264. LSUP_Buffer *sc = LSUP_buffer_new_from_term (gr->uri);
  265. if (gr->store_type == LSUP_STORE_MEM)
  266. rc = 0;// TODO uncomment LSUP_htstore_remove (gr->ht_store, &sspo, ct);
  267. else
  268. rc = LSUP_mdbstore_remove (gr->mdb_store, sspo, sc, ct);
  269. LSUP_striple_free (sspo);
  270. LSUP_buffer_free (sc);
  271. return rc;
  272. }
  273. GraphIterator *
  274. LSUP_graph_lookup (const Graph *gr, const LSUP_Triple *spo, size_t *ct)
  275. {
  276. GraphIterator *it = malloc (sizeof (GraphIterator));
  277. if (UNLIKELY (!it)) return NULL;
  278. it->graph = gr;
  279. LSUP_SerTriple *sspo = LSUP_striple_new_from_triple (spo);
  280. LSUP_Buffer *sc = LSUP_buffer_new_from_term (gr->uri);
  281. if (gr->store_type == LSUP_STORE_MEM) {
  282. // TODO uncomment it->ht_iter = LSUP_htstore_lookup (gr->ht_store, sspo, &it->ct);
  283. } else {
  284. it->mdb_iter = LSUP_mdbstore_lookup (gr->mdb_store, sspo, sc, ct);
  285. }
  286. LSUP_striple_free (sspo);
  287. LSUP_buffer_free (sc);
  288. return it;
  289. }
  290. /** @brief Advance iterator and return serialized triple.
  291. *
  292. * This is an internal function to pass raw buffers between higher-level
  293. * functions without serializing and deserializing triples.
  294. */
  295. inline static LSUP_rc
  296. graph_iter_next_buffer (GraphIterator *it, LSUP_SerTriple *sspo)
  297. {
  298. LSUP_rc rc;
  299. if (it->graph->store_type == LSUP_STORE_MEM)
  300. rc = 0;// TODO uncomment LSUP_htiter_next (it->ht_iter, sspo);
  301. else rc = LSUP_mdbiter_next (it->mdb_iter, sspo);
  302. return rc;
  303. }
  304. LSUP_rc
  305. LSUP_graph_iter_next (GraphIterator *it, LSUP_Triple *spo)
  306. {
  307. LSUP_SerTriple *sspo = NULL;
  308. if (spo) sspo = STRP_DUMMY;
  309. LSUP_rc rc = graph_iter_next_buffer (it, sspo);
  310. if (rc == LSUP_OK) {
  311. if (spo) {
  312. LSUP_term_deserialize (sspo->s, spo->s);
  313. LSUP_term_deserialize (sspo->p, spo->p);
  314. LSUP_term_deserialize (sspo->o, spo->o);
  315. }
  316. }
  317. // Addresses of triple buffers are owned by the DB. Do not free.
  318. if (sspo) {
  319. free (sspo->s);
  320. free (sspo->p);
  321. free (sspo->o);
  322. free (sspo);
  323. }
  324. return rc;
  325. }
  326. void
  327. LSUP_graph_iter_free (GraphIterator *it)
  328. {
  329. if (it->graph->store_type == LSUP_STORE_MEM)
  330. NULL;// TODO uncomment LSUP_htiter_free (it->ht_iter);
  331. else
  332. LSUP_mdbiter_free (it->mdb_iter);
  333. free (it);
  334. }
  335. size_t
  336. LSUP_graph_iter_cur (GraphIterator *it)
  337. { return LSUP_mdbiter_cur (it->mdb_iter); }
  338. bool
  339. LSUP_graph_contains (const LSUP_Graph *gr, const LSUP_Triple *spo)
  340. {
  341. GraphIterator *it = LSUP_graph_lookup (gr, spo, NULL);
  342. bool rc = LSUP_graph_iter_next (it, NULL) != LSUP_NORESULT;
  343. LSUP_graph_iter_free (it);
  344. return rc;
  345. }
  346. /* * * Static functions * * */
  347. /** @brief Initialize default context and MDB environments.
  348. *
  349. * This is done only once per process.
  350. *
  351. * The ramdisk store persists after the application is closed, but will be
  352. * wiped clean the next time this function is called.
  353. */
  354. static inline LSUP_rc
  355. mdbstore_init()
  356. {
  357. char *path;
  358. // RAM disk store.
  359. if (UNLIKELY (!default_tmp_store)) {
  360. printf ("Initializing RAM disk back end.\n");
  361. path = MDB_RAMDISK_PATH;
  362. if (LSUP_mdbstore_setup (path, true) != LSUP_OK) return LSUP_DB_ERR;
  363. default_tmp_store = LSUP_mdbstore_new (path, default_ctx);
  364. if (UNLIKELY (!default_tmp_store)) return LSUP_DB_ERR;
  365. }
  366. // Permanent store.
  367. if (UNLIKELY (!default_store)) {
  368. printf ("Initializing persistent back end.\n");
  369. // NOTE This method will only allow one persistent disk back end per
  370. // application. TODO maybe later allow multiple backends if useful.
  371. path = getenv ("LSUP_MDB_STORE_PATH");
  372. if (!path) {
  373. path = DEFAULT_ENV_PATH;
  374. fprintf(
  375. stderr,
  376. "WARNING: `LSUP_MDB_STORE_PATH' environment variable is not "
  377. "set. The default location %s will be used as the graph "
  378. "store.\n", path
  379. );
  380. }
  381. if (LSUP_mdbstore_setup (path, false) != LSUP_OK) return LSUP_DB_ERR;
  382. default_store = LSUP_mdbstore_new (path, default_ctx);
  383. if (UNLIKELY (!default_store)) return LSUP_DB_ERR;
  384. }
  385. if (UNLIKELY (!default_ctx)) {
  386. LSUP_Term *default_ctx_uri = LSUP_uri_new (default_ctx_label);
  387. default_ctx = LSUP_buffer_new_from_term (default_ctx_uri);
  388. LSUP_term_free (default_ctx_uri);
  389. atexit (ctx_cleanup);
  390. }
  391. return LSUP_OK;
  392. }