store_mdb.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. /** @file store_mdb.h
  2. *
  3. * @brief LMDB graph store backend.
  4. *
  5. * This module stores triples in a LMDB embedded store, organized
  6. * into named graphs. The store is optimized and indexed for fast lookup of any
  7. * number of bound terms.
  8. *
  9. * The store must be first initialized once, to create the environment files
  10. * and folders as well as the internal databases, then it must be opened once
  11. * per session. Within that session multiple R/W operations can be performed
  12. * using transactions.
  13. *
  14. * Note that, even though the terms "graph", "context", etc. are used, no code
  15. * in this module checks for valid RDF data. In theory any term can be any
  16. * binary data. This allows using the store for non-RDF graph data.
  17. *
  18. * TODO more doc
  19. */
  20. #ifndef _LSUP_STORE_MDB_H
  21. #define _LSUP_STORE_MDB_H
  22. #include "lmdb.h"
  23. #include "triple.h"
  24. // FIXME find a better cross-platform path.
  25. #define DEFAULT_ENV_PATH "./mdb_store"
  26. // RAMdisk path for MDB volatile store.
  27. #define LSUP_MDB_RAMDISK_PATH TMPDIR "/lsup_mem_graph"
  28. #include "store.h"
  29. typedef char DbLabel[8];
  30. typedef struct mdbstore_t LSUP_MDBStore;
  31. typedef struct mdbstore_iter_t LSUP_MDBIterator;
  32. typedef LSUP_rc (*store_match_fn_t)(const LSUP_TripleKey spok, void *data);
  33. // TODO Introduce compile-time LSUP_BIG_STORE option to define two store
  34. // options: false: 64-bit hashes, uint32 keys, max 4G entries; true:
  35. // 128-bit hashes, size_t keys, max MAX_SIZE entries, larger and slower.
  36. // Ideally this could be specified at runtime to handle different stores with
  37. // different sizes, but code could become more complex.
  38. /** @brief Create the MDB environment and databases on disk.
  39. *
  40. * This function takes care of creaating the environment path if not existing,
  41. * and checking that it's a writable directory. If the path is not specified
  42. * in the LSUP_STORE_PATH environment variable, a default directory is used.
  43. *
  44. * @param[in] clear Whether to remove a previous environment at this location.
  45. *
  46. * @param[in,out] path Path of the suggested directory to use. It may be NULL,
  47. * in which case it will be set either to the environment variable
  48. * LSUP_STORE_PATH, or if that is not set, a default local path.
  49. */
  50. LSUP_rc LSUP_mdbstore_setup (const char *path, bool clear);
  51. /** @brief Open an MDB store.
  52. *
  53. * The store must have been set up with #LSUP_mdbstore_setup.
  54. *
  55. * Some environment variables affect various store parameters:
  56. *
  57. * - LSUP_MDB_MAPSIZE Long int specifying the size of the memory map. Usually
  58. * it is not necessary to modify this, unless one is operating under memory
  59. * and disk constraints. The default map size is 1Tb.
  60. *
  61. * @param[in,out] store Uninitialized store struct pointer.
  62. *
  63. * @param[in] path MDB environment path. This must be the path given by
  64. * #LSUP_mdbstore_setup.
  65. *
  66. * @param[in] default_ctx Serialized URI to be used as a default context for
  67. * triples inserted without a context specified. If NULL, the store operates
  68. * in triple mode.
  69. */
  70. LSUP_MDBStore *
  71. LSUP_mdbstore_new (const char *path, const LSUP_Buffer *default_ctx);
  72. /** @brief Close a store and free its handle.
  73. *
  74. * @param[in] store Store pointer.
  75. *
  76. */
  77. void LSUP_mdbstore_free (LSUP_MDBStore *store);
  78. /** @brief Print stats about a store and its databases.
  79. *
  80. * TODO
  81. *
  82. * @param store[in] The store to get stats for.
  83. */
  84. LSUP_rc LSUP_mdbstore_stat (LSUP_MDBStore *store, MDB_stat *stat);
  85. /** @brief Store size.
  86. *
  87. * @param store[in] The store to calculate size of.
  88. *
  89. * @return Number of stored SPO triples across all contexts.
  90. */
  91. size_t LSUP_mdbstore_size (LSUP_MDBStore *store);
  92. /** @brief Initialize bulk triple load.
  93. *
  94. * This is the first step of a bulk load. It is best used when the data at hand
  95. * need to be pre-processed, which can be done in the same loop as the next
  96. * step to keep memory usage low.
  97. *
  98. * @param store[in] The store to add to.
  99. *
  100. * @param sc[in] Context as a serialized term. If this is NULL, and the
  101. * default context is not NULL, triples will be added to the default context
  102. * for the store, If the default context for the store is NULL, regardless of
  103. * the value of sc, triples will be added with no context.
  104. *
  105. * @param it[out] Pointer to an iterator pointer to be passed to the following
  106. * load steps.
  107. */
  108. LSUP_MDBIterator *
  109. LSUP_mdbstore_add_init (LSUP_MDBStore *store, const LSUP_Buffer *sc);
  110. /** @brief Add one triple into the store.
  111. *
  112. * This must be called after #LSUP_mdbstore_add_init, using the iterator
  113. * yielded by that function. It may be called multiple times and must be
  114. * followed by #LSUP_mdbstore_add_done.
  115. *
  116. * NOTE: at the moment #LSUP_mdbstore_remove() or another
  117. * #LSUP_mdbstore_init() cannot be called between #LSUP_mdbstore_add_init and
  118. * #LSUP_mdbstore_add_abort or #LSUP_mdbstore_add_done. FIXME
  119. *
  120. * @param it[in] Iterator obtained by #LSUP_mdbstore_add_init.
  121. * The following members are of interest:
  122. * it->i stores the total number of records inserted.
  123. *
  124. * @param sspo[in] Serialized triple to be added.
  125. *
  126. * @return LSUP_OK if the triple was inserted; LSUP_NOACTION if the triple
  127. * already existed; LSUP_DB_ERR if an MDB error occurred.
  128. */
  129. LSUP_rc
  130. LSUP_mdbstore_add_iter (LSUP_MDBIterator *it, const LSUP_BufferTriple *sspo);
  131. /** @brief Finalize an add loop and free iterator.
  132. *
  133. * If a count of inserted records is needed, #LSUP_mdbiter_cur must be called
  134. * before this function.
  135. *
  136. * This must be called after #LSUP_mdbstore_add_iter.
  137. *
  138. * @param it[in] Iterator obtained by #LSUP_mdbstore_add_init.
  139. */
  140. LSUP_rc
  141. LSUP_mdbstore_add_done (LSUP_MDBIterator *it);
  142. /** @brief Abort an add loop and free iterator.
  143. *
  144. * Usually called on an irrecoverable error from LSUP_mdb_add_iter. None of the
  145. * successful inserts in the same loop is retained.
  146. *
  147. * @param it[in] Iterator obtained by #LSUP_mdbstore_add_init.
  148. */
  149. void
  150. LSUP_mdbstore_add_abort (LSUP_MDBIterator *it);
  151. /** @brief Add a batch of triples with optional context to the store.
  152. *
  153. * This is a shortcut for calling #LSUP_mdbstore_add_init,
  154. * #LSUP_mdbstore_add_iter and #LSUP_mdbstore_add_done in a sequence
  155. * when an array of pre-serialized triples is available.
  156. *
  157. * @param store[in] The store to add to.
  158. *
  159. * @param sc[in] Context as a serialized term. If this is NULL, and the
  160. * default context is not NULL, triples will be added to the default context
  161. * for the store. If the default context for the store is NULL, regardless of
  162. * the value of sc, triples will be added with no context.
  163. * @param data[in] Triples to be inserted as a 2D array of triples in the shape
  164. * of data[n][3], where n is the value of data_size.
  165. *
  166. * @param inserted[out] If not NULL, it will be filled with the count of
  167. * effectively inserted triples.
  168. *
  169. * @param data_size[in] Number of triples to be inserted.
  170. */
  171. LSUP_rc LSUP_mdbstore_add(
  172. LSUP_MDBStore *store, const LSUP_Buffer *sc,
  173. const LSUP_BufferTriple strp[], const size_t ct, size_t *inserted);
  174. /** @brief Delete triples by pattern matching.
  175. *
  176. * The ss, sp, so, sc terms act as a matching pattern as documented in
  177. * #LSUP_mdbstore_lookup. if not NULL, ct yields the number of triples actually
  178. * deleted.
  179. */
  180. LSUP_rc
  181. LSUP_mdbstore_remove(
  182. LSUP_MDBStore *store, const LSUP_Buffer *ss, const LSUP_Buffer *sp,
  183. const LSUP_Buffer *so, const LSUP_Buffer *sc, size_t *ct);
  184. /** @brief Look up matching triples and optional context.
  185. *
  186. * This function may return a count of matches and/or an iterator of results as
  187. * serialized triples.
  188. *
  189. * Any and all of the terms may be NULL, which indicates an unbound query
  190. * term. Stores with context not set will always ignore the fourth term.
  191. *
  192. * @param[in] store The store to be queried.
  193. *
  194. * @param[in] ss Buffer representing the serialized s term.
  195. *
  196. * @param[in] sp Buffer representing the serialized p term.
  197. *
  198. * @param[in] so Buffer representing the serialized o term.
  199. *
  200. * @param[in] sc Serialized context to limit search to. It may be NULL, in which
  201. * case search is done in all contexts. Note that triples inserted without
  202. * context are assigned the *default* context, indicated by the "default_ctx"
  203. * member of the store struct.
  204. *
  205. * @param[out] it Pointer to an #LSUP_MDBIterator handle that will be populated
  206. * with a result iterator. This is always created even if no matches are found
  207. * and must be freed with #LSUP_mdbiter_free() after use. If matches are found,
  208. * the iterator points to the first result which can be retrieved with
  209. * #LSUP_mdbiter_next().
  210. *
  211. * @param[out] ct If not NULL, this will be populated with the number of
  212. * entries found. It is very inexpensive to set for lookups without context,
  213. * much less so for 1-bound and 2-bound context lookups, in which cases it
  214. * should be set only if needed.
  215. *
  216. * @return LSUP_OK if entries were found, LSUP_NORESULT if none were found.
  217. */
  218. LSUP_MDBIterator *
  219. LSUP_mdbstore_lookup(
  220. LSUP_MDBStore *store, const LSUP_Buffer *ss, const LSUP_Buffer *sp,
  221. const LSUP_Buffer *so, const LSUP_Buffer *sc, size_t *ct);
  222. /** @brief Yield the matching triples and advance the iterator.
  223. *
  224. * This function also checks if the matching triple is associated with a
  225. * context, if one was specified. If no associated contexts are found, the next
  226. * triple is searched, until the end of the results.
  227. *
  228. * NOTE: Iterators keep LMDB cursors and (read only) transactions open. Don't
  229. * hold on to them longer than necessary.
  230. *
  231. * NOTE: The memory pointed to by the individual LSUP_Buffer pointers is
  232. * owned by the database. It must not be written to or freed. To modify
  233. * the data or use them beyond the caller's scope, this memory must be copied.
  234. *
  235. * @param it[in] Opaque iterator handle obtained with #LSUP_mdbstore_lookup.
  236. *
  237. * @param sspo[out] #LSUP_BufferTriple to be populated with three serialized terms
  238. * if found, NULL if not found. Internal callers (e.g. counters) may pass NULL
  239. * if they don't need the serialized triples.
  240. *
  241. * @return LSUP_OK if results were found; LSUP_END if no (more) results were
  242. * found; LSUP_DB_ERR if a MDB_* error occurred.
  243. */
  244. LSUP_rc LSUP_mdbiter_next (
  245. LSUP_MDBIterator *it, LSUP_BufferTriple *sspo, LSUP_Buffer **ctx);
  246. /** @brief Iterator's internal counter.
  247. *
  248. * This is only useful with #LSUP_mdbstore_add_iter to count inserted records.
  249. *
  250. * @param it[in] An iterator primed with LSUP_mdbstore_add_init.
  251. *
  252. * @return The value of the #i member. For an add iterator, this is the number
  253. * of succcessfully inserted records.
  254. */
  255. size_t
  256. LSUP_mdbiter_cur (LSUP_MDBIterator *it);
  257. /** @brief Free an iterator allocated by a lookup.
  258. *
  259. * @param it[in] Iterator pointer. It will be set to NULL after freeing.
  260. */
  261. void LSUP_mdbiter_free (LSUP_MDBIterator *it);
  262. /** @brief Gather the contexts of all triples matching a pattern.
  263. *
  264. * This function yields a NULL-terminated array of LSUP_Buffer handles for all
  265. * the triples that match an s, p, o lookup pattern in a store. All values are
  266. * unique.
  267. *
  268. * TODO Implement a free method (and align names). Currently freeing the result
  269. * is non-trivial and easy to forget:
  270. *
  271. * size_t i = 0;
  272. * while (ctx_a[i] != NULL)
  273. * free (ctx_a[i++]); // Buffer data are memory-mapped. Not freeing.
  274. * free (ctx_a);
  275. *
  276. * ss, sp, so arguments are used as in #LSUP_mdbstore_lookup().
  277. *
  278. * @param store[in] The store to be queried.
  279. *
  280. * @param ss[in] Serialized subject. It may be NULL.
  281. *
  282. * @param sp[in] Serialized predicate. It may be NULL.
  283. *
  284. * @param so[in] Serialized object. It may be NULL.
  285. *
  286. * @return Array of context handles. Memory is allocated by this function and
  287. * must be freed by the caller.
  288. */
  289. LSUP_Buffer **
  290. LSUP_mdbstore_lookup_contexts (
  291. LSUP_MDBStore *store, const LSUP_Buffer *ss, const LSUP_Buffer *sp,
  292. const LSUP_Buffer *so);
  293. /** @brief Get all namespace prefixes in the store.
  294. *
  295. * @param[in] store MDB store to query.
  296. *
  297. * @param[out] nsm Pointer to namespace map to generate.
  298. *
  299. * @return LSUP_OK on success; LSUP_DB_ERR on MDB error.
  300. */
  301. LSUP_rc
  302. LSUP_mdbstore_nsm_get (LSUP_MDBStore *store, LSUP_NSMap **nsm);
  303. /** @brief Store an in-memory namespace map into the permanent back end.
  304. *
  305. * Existing prefixes and namespaces are not updated. Thus, if the following are
  306. * already stored:
  307. *
  308. * ns1: <urn:ns:a#>
  309. * ns2: <urn:ns:b#>
  310. *
  311. * Neither of the following will be inserted:
  312. *
  313. * ns3: <urn:ns:a#>
  314. * ns2: <urn:ns:c#>
  315. *
  316. * @param[in] store MDB store to update.
  317. *
  318. * @param[out] nsm Namespace map handle to store.
  319. *
  320. * @return LSUP_OK if all terms were updated; LSUP_CONFLICT if one or more
  321. * namespaces or terms were not updated because they already existed.
  322. */
  323. LSUP_rc
  324. LSUP_mdbstore_nsm_store (LSUP_MDBStore *store, const LSUP_NSMap *nsm);
  325. /** @brief Populate the ID cache with data types and lang tags from store.
  326. *
  327. * @param[in] store The store to get data from.
  328. */
  329. LSUP_rc
  330. LSUP_mdbstore_idcache_get (LSUP_MDBStore *store);
  331. /** @brief Store an ID into a MDB store.
  332. *
  333. * @param[in] store MDB store.
  334. *
  335. * @param[in] id ID to store.
  336. */
  337. LSUP_rc
  338. LSUP_mdbstore_idcache_store (LSUP_MDBStore *store, const char *id);
  339. #endif