graph.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. #include "graph.h"
  2. // Initial size of lookup graph. It will double each time capacity is reached.
  3. #define LOOKUP_GR_INIT_SIZE 64
  4. // Expand hash table memory by this factor to keep a good load factor.
  5. #define PREALLOC_FACTOR 1.4
  6. // Assume VERY coarsly that the number of unique terms will be in general
  7. // 1.7 times the number of triples. This is conservative to maintain load
  8. // factor low.
  9. #define IDX_SIZE_RATIO 1.7
  10. // RAMdisk path for MDB volatile store.
  11. #define MDB_RAMDISK_PATH TMPDIR "/lsup_mem_graph"
  12. typedef enum KSetFlag {
  13. LSUP_KS_NONE = 0,
  14. LSUP_KS_CHECK_CAP = 1 << 0,
  15. LSUP_KS_CHECK_DUP = 1 << 1,
  16. } KSetFlag;
  17. /**
  18. * Static handles.
  19. */
  20. static const char *default_ctx_label = "urn:lsup:default";
  21. static LSUP_Buffer *default_ctx = NULL;
  22. typedef struct Graph {
  23. LSUP_store_type store_type; // In-memory or MDB-backed
  24. LSUP_Term *uri; // Graph "name" (URI)
  25. union {
  26. //LSUP_HTStore * ht_store;
  27. LSUP_MDBStore * mdb_store;
  28. };
  29. } Graph;
  30. typedef struct GraphIterator {
  31. const Graph * graph; // Parent graph.
  32. union { // Internal store iterator.
  33. //LSUP_HTIterator * ht_iter;
  34. LSUP_MDBIterator * mdb_iter;
  35. };
  36. size_t ct; // Total matches.
  37. size_t i; // Cursor.
  38. } GraphIterator;
  39. /**
  40. * Extern inline functions.
  41. */
  42. size_t LSUP_graph_size (const LSUP_Graph *gr);
  43. /* * * Static prototypes. * * */
  44. static inline LSUP_rc mdbstore_init();
  45. /* * * Post-lookup callback prototypes * * */
  46. int match_add_fn(
  47. LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
  48. void *ctx);
  49. int match_rm_fn(
  50. LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
  51. void *ctx);
  52. static LSUP_rc
  53. graph_iter_next_buffer (GraphIterator *it, LSUP_SerTriple *sspo);
  54. /* Atexit functions. */
  55. void ctx_cleanup() { LSUP_buffer_free (default_ctx); }
  56. static inline bool is_null_trp (const LSUP_TripleKey *trp)
  57. {
  58. return (
  59. *trp[0] == NULL_KEY &&
  60. *trp[1] == NULL_KEY &&
  61. *trp[2] == NULL_KEY);
  62. }
  63. #define ENTRY(a, b) (be) == (LSUP_STORE_##a) ||
  64. static inline bool
  65. check_backend (LSUP_store_type be)
  66. { return (BACKEND_TBL false); }
  67. #undef ENTRY
  68. /* * * GRAPH * * */
  69. Graph *
  70. LSUP_graph_new (const LSUP_store_type store_type)
  71. {
  72. if (!check_backend (store_type)) return NULL;
  73. LSUP_Graph *gr;
  74. CRITICAL (gr = malloc (sizeof (LSUP_Graph)));
  75. gr->uri = LSUP_uri_new (NULL);
  76. gr->store_type = store_type;
  77. if (mdbstore_init() != LSUP_OK) return NULL;
  78. if (gr->store_type == LSUP_STORE_MEM) {
  79. // TODO uncomment gr->ht_store = LSUP_htstore_new (0);
  80. // if (!gr->ht_store) return NULL;
  81. } else if (gr->store_type == LSUP_STORE_MDB) {
  82. gr->mdb_store = LSUP_mdbstore_new(
  83. getenv ("LSUP_MDB_STORE_PATH"), default_ctx);
  84. if (!gr->mdb_store) return NULL;
  85. } else {
  86. gr->mdb_store = LSUP_mdbstore_new(
  87. MDB_RAMDISK_PATH, default_ctx);
  88. if (!gr->mdb_store) return NULL;
  89. }
  90. return gr;
  91. }
  92. /**
  93. * Copy triples from a source graph into a destination one.
  94. *
  95. * The destination graph is not initialized here, so the copy is cumulative.
  96. */
  97. static LSUP_rc
  98. graph_copy_contents (const LSUP_Graph *src, LSUP_Graph *dest)
  99. {
  100. LSUP_rc rc = LSUP_NOACTION;
  101. const LSUP_Triple trp = {NULL, NULL, NULL};
  102. GraphIterator *it = LSUP_graph_lookup (src, &trp);
  103. LSUP_SerTriple sspo;
  104. while (graph_iter_next_buffer (it, &sspo) != LSUP_END) {
  105. TRACE ("Inserting triple #%lu\n", it->i);
  106. LSUP_rc add_rc = LSUP_graph_add (dest, NULL, 0, &sspo, 1, NULL);
  107. if (LIKELY (add_rc == LSUP_OK)) rc = LSUP_OK;
  108. else if (add_rc < 0) return add_rc;
  109. }
  110. return rc;
  111. }
  112. LSUP_Graph *
  113. LSUP_graph_copy (const Graph *src)
  114. {
  115. LSUP_Graph *dest = LSUP_graph_new (src->store_type);
  116. if (UNLIKELY (!dest)) return NULL;
  117. LSUP_rc rc = graph_copy_contents (src, dest);
  118. if (UNLIKELY (rc != LSUP_OK)) return NULL;
  119. return dest;
  120. }
  121. /* TODO uncomment
  122. Graph *
  123. LSUP_graph_bool_op(
  124. const LSUP_bool_op op, const Graph *gr1, const Graph *gr2)
  125. {
  126. if (UNLIKELY (gr1->store_type != LSUP_STORE_MEM)) {
  127. fprintf(
  128. stderr,
  129. "First operand %s is not an in-memory graph. "
  130. "Cannot perform boolean operation.",
  131. gr1->uri->data);
  132. return NULL;
  133. }
  134. if (UNLIKELY (gr2->store_type != LSUP_STORE_MEM)) {
  135. fprintf(
  136. stderr,
  137. "Second operand %s is not an in-memory graph. "
  138. "Cannot perform boolean operation.",
  139. gr2->uri->data);
  140. return NULL;
  141. }
  142. LSUP_Graph *res = LSUP_graph_new (LSUP_STORE_MEM);
  143. res->ht_store = LSUP_htstore_bool_op (op, gr1->ht_store, gr2->ht_store);
  144. return res;
  145. }
  146. */
  147. void
  148. LSUP_graph_free (LSUP_Graph *gr)
  149. {
  150. if (LIKELY (gr != NULL)) {
  151. LSUP_term_free (gr->uri);
  152. if (gr->store_type == LSUP_STORE_MEM)
  153. NULL;// TODO uncomment LSUP_htstore_free (gr->ht_store);
  154. else
  155. LSUP_mdbstore_free (gr->mdb_store);
  156. free (gr);
  157. }
  158. }
  159. LSUP_Term *
  160. LSUP_graph_uri (const LSUP_Graph *gr) { return gr->uri; }
  161. LSUP_rc
  162. LSUP_graph_set_uri (LSUP_Graph *gr, const char *uri)
  163. { return LSUP_uri_init (gr->uri, uri); }
  164. LSUP_rc
  165. LSUP_graph_resize (LSUP_Graph *gr, size_t size)
  166. {
  167. return LSUP_OK; // TODO remove line.
  168. if (gr->store_type == LSUP_STORE_MEM)
  169. return 0;// TODO uncomment LSUP_htstore_resize (gr->ht_store, size);
  170. return LSUP_VALUE_ERR;
  171. }
  172. size_t
  173. LSUP_graph_capacity (const Graph *gr)
  174. {
  175. if (gr->store_type == LSUP_STORE_MEM)
  176. return 0;// TODO uncomment LSUP_htstore_capacity (gr->ht_store);
  177. return LSUP_NOT_IMPL_ERR;
  178. }
  179. size_t
  180. LSUP_graph_size (const Graph *gr)
  181. {
  182. TRACE ("Store type: %d\n", gr->store_type);
  183. if (gr->store_type == LSUP_STORE_MEM)
  184. return 0;// TODO uncomment LSUP_htstore_size (gr->ht_store);
  185. return LSUP_mdbstore_size (gr->mdb_store);
  186. }
  187. // TODO Add add_stream_init, add_stream_iter and add_stream_done for streaming
  188. // functions.
  189. GraphIterator *
  190. LSUP_graph_add_stream_init (LSUP_Graph *gr)
  191. {
  192. GraphIterator *it = malloc (sizeof (*it));
  193. if (!it) return NULL;
  194. LSUP_Buffer *sc = LSUP_buffer_new_from_term (gr->uri);
  195. it->mdb_iter = LSUP_mdbstore_add_init (gr->mdb_store, sc);
  196. LSUP_buffer_free (sc);
  197. it->graph = gr;
  198. it->i = 0;
  199. return it;
  200. }
  201. LSUP_rc
  202. LSUP_graph_add_stream_iter (LSUP_GraphIterator *it, const LSUP_SerTriple *sspo)
  203. { return LSUP_mdbstore_add_iter (it->mdb_iter, sspo); }
  204. void
  205. LSUP_graph_add_stream_done (LSUP_GraphIterator *it)
  206. { LSUP_mdbstore_add_done(it->mdb_iter); }
  207. LSUP_rc
  208. LSUP_graph_add(
  209. LSUP_Graph *gr,
  210. const LSUP_Triple trp[], size_t trp_ct,
  211. const LSUP_SerTriple strp[], size_t strp_ct, size_t *inserted)
  212. {
  213. LSUP_rc rc;
  214. if (inserted) *inserted = 0;
  215. /*
  216. * NOTE It is possible to pass both sets of RDF triples and buffer triples.
  217. */
  218. if (gr->store_type == LSUP_STORE_MEM) {
  219. return LSUP_NOT_IMPL_ERR;
  220. /* TODO
  221. // Resize all at once if needed.
  222. htsize_t prealloc = LSUP_htstore_size (gr->ht_store) + trp_ct + strp_ct;
  223. if (LSUP_htstore_capacity (gr->ht_store) < prealloc) {
  224. rc = LSUP_htstore_resize (gr->ht_store, prealloc * PREALLOC_FACTOR);
  225. if (UNLIKELY (rc != LSUP_OK)) return rc;
  226. }
  227. rc = LSUP_NOACTION;
  228. // Serialize and insert RDF triples.
  229. if (trp_ct > 0) {
  230. for (size_t i = 0; i < trp_ct; i++) {
  231. LSUP_SerTriple sspo;
  232. LSUP_triple_serialize (trp + i, &sspo);
  233. TRACE ("Inserting triple #%lu\n", i);
  234. LSUP_rc db_rc = LSUP_htstore_add (gr->ht_store, &sspo);
  235. if (LIKELY (db_rc == LSUP_OK)) {
  236. rc = LSUP_OK;
  237. if (inserted) (*inserted) ++;
  238. }
  239. LSUP_striple_done (&sspo);
  240. if (UNLIKELY (db_rc < 0)) return db_rc;
  241. }
  242. }
  243. // Insert serialized triples.
  244. for (size_t i = 0; i < strp_ct; i++) {
  245. TRACE ("Inserting triple #%lu\n", i);
  246. LSUP_rc db_rc = LSUP_htstore_add (gr->ht_store, strp + i);
  247. if (LIKELY (db_rc == LSUP_OK)) {
  248. rc = LSUP_OK;
  249. if (inserted) (*inserted) ++;
  250. }
  251. if (UNLIKELY (db_rc < 0)) return db_rc;
  252. }
  253. return rc;
  254. */
  255. } else {
  256. rc = LSUP_NOACTION;
  257. // Initialize iterator.
  258. LSUP_GraphIterator *it = LSUP_graph_add_stream_init (gr);
  259. // Serialize and insert RDF triples.
  260. if (trp_ct > 0) {
  261. LSUP_SerTriple *sspo = STRP_DUMMY;
  262. for (size_t i = 0; i < trp_ct; i++) {
  263. TRACE ("Inserting triple #%lu\n", i);
  264. LSUP_triple_serialize (trp + i, sspo);
  265. LSUP_rc db_rc = LSUP_graph_add_stream_iter (it, sspo);
  266. if (db_rc == LSUP_OK) rc = LSUP_OK;
  267. if (UNLIKELY (db_rc < 0)) return db_rc;
  268. }
  269. LSUP_striple_free (sspo);
  270. }
  271. // Insert serialized triples.
  272. for (size_t i = 0; i < strp_ct; i++) {
  273. TRACE ("Inserting triple #%lu\n", i);
  274. LSUP_rc db_rc = LSUP_graph_add_stream_iter (it, strp + i);
  275. if (db_rc == LSUP_OK) rc = LSUP_OK;
  276. if (UNLIKELY (db_rc < 0)) return db_rc;
  277. }
  278. LSUP_graph_add_stream_done (it);
  279. if (inserted) *inserted = it->ct;
  280. return rc;
  281. }
  282. return LSUP_VALUE_ERR;
  283. }
  284. LSUP_rc
  285. LSUP_graph_remove (Graph *gr, const LSUP_Triple *spo, size_t *ct)
  286. {
  287. LSUP_rc rc;
  288. LSUP_SerTriple *sspo = LSUP_striple_new_from_triple (spo);
  289. LSUP_Buffer *sc = LSUP_buffer_new_from_term (gr->uri);
  290. if (gr->store_type == LSUP_STORE_MEM)
  291. rc = 0;// TODO uncomment LSUP_htstore_remove (gr->ht_store, &sspo, ct);
  292. else
  293. rc = LSUP_mdbstore_remove (gr->mdb_store, sspo, sc, ct);
  294. LSUP_striple_free (sspo);
  295. LSUP_buffer_free (sc);
  296. return rc;
  297. }
  298. GraphIterator *
  299. LSUP_graph_lookup (const Graph *gr, const LSUP_Triple *spo)
  300. {
  301. GraphIterator *it;
  302. CRITICAL (it = malloc (sizeof (GraphIterator)));
  303. it->graph = gr;
  304. LSUP_SerTriple *sspo = LSUP_striple_new_from_triple (spo);
  305. LSUP_Buffer *sc = LSUP_buffer_new_from_term (gr->uri);
  306. if (gr->store_type == LSUP_STORE_MEM) {
  307. // TODO uncomment it->ht_iter = LSUP_htstore_lookup (gr->ht_store, sspo, &it->ct);
  308. } else {
  309. it->mdb_iter = LSUP_mdbstore_lookup (gr->mdb_store, sspo, sc, &it->ct);
  310. }
  311. LSUP_striple_free (sspo);
  312. LSUP_buffer_free (sc);
  313. return it;
  314. }
  315. /** @brief Advance iterator and return serialized triple.
  316. *
  317. * This is an internal function to pass raw buffers between higher-level
  318. * functions without serializing and deserializing triples.
  319. */
  320. inline static LSUP_rc
  321. graph_iter_next_buffer (GraphIterator *it, LSUP_SerTriple *sspo)
  322. {
  323. LSUP_rc rc;
  324. if (it->graph->store_type == LSUP_STORE_MEM)
  325. rc = 0;// TODO uncomment LSUP_htiter_next (it->ht_iter, sspo);
  326. else rc = LSUP_mdbiter_next (it->mdb_iter, sspo);
  327. return rc;
  328. }
  329. LSUP_rc
  330. LSUP_graph_iter_next (GraphIterator *it, LSUP_Triple *spo)
  331. {
  332. LSUP_SerTriple *sspo = NULL;
  333. if (spo) sspo = STRP_DUMMY;
  334. LSUP_rc rc = graph_iter_next_buffer (it, sspo);
  335. if (rc == LSUP_OK) {
  336. if (spo) {
  337. LSUP_term_deserialize (sspo->s, spo->s);
  338. LSUP_term_deserialize (sspo->p, spo->p);
  339. LSUP_term_deserialize (sspo->o, spo->o);
  340. }
  341. it->i++;
  342. }
  343. // Addresses of triple buffers are owned by the DB. Do not free.
  344. if (sspo) {
  345. free (sspo->s);
  346. free (sspo->p);
  347. free (sspo->o);
  348. free (sspo);
  349. }
  350. return rc;
  351. }
  352. void
  353. LSUP_graph_iter_free (GraphIterator *it)
  354. {
  355. if (it->graph->store_type == LSUP_STORE_MEM)
  356. NULL;// TODO uncomment LSUP_htiter_free (it->ht_iter);
  357. else
  358. LSUP_mdbiter_free (it->mdb_iter);
  359. free (it);
  360. }
  361. bool
  362. LSUP_graph_contains (const LSUP_Graph *gr, const LSUP_Triple *spo)
  363. {
  364. GraphIterator *it = LSUP_graph_lookup (gr, spo);
  365. bool rc = LSUP_graph_iter_next (it, NULL) != LSUP_NORESULT;
  366. LSUP_graph_iter_free (it);
  367. return rc;
  368. }
  369. /* * * Static functions * * */
  370. /** @brief Initialize default context and MDB environments.
  371. *
  372. * This is done only once per process.
  373. *
  374. * The ramdisk store persists after the application is closed, but will be
  375. * wiped clean the next time this function is called.
  376. */
  377. static inline LSUP_rc
  378. mdbstore_init()
  379. {
  380. if (UNLIKELY (!default_ctx)) {
  381. // RAM disk store.
  382. char *path = MDB_RAMDISK_PATH;
  383. if (LSUP_mdbstore_setup (&path, true) != LSUP_OK) return LSUP_DB_ERR;
  384. // Permanent store.
  385. path = getenv ("LSUP_MDB_STORE_PATH");
  386. if (LSUP_mdbstore_setup (&path, false) != LSUP_OK) return LSUP_DB_ERR;
  387. LSUP_Term *default_ctx_uri = LSUP_uri_new (default_ctx_label);
  388. default_ctx = LSUP_buffer_new_from_term (default_ctx_uri);
  389. LSUP_term_free (default_ctx_uri);
  390. atexit (ctx_cleanup);
  391. }
  392. return LSUP_OK;
  393. }