store_mdb.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. /** @file store_mdb.h
  2. *
  3. * @brief LMDB graph store backend.
  4. *
  5. * This module stores triples in a LMDB embedded store, optionally organized
  6. * into named graphs. The store is optimized and indexed for fast lookup of any
  7. * number of bound terms.
  8. *
  9. * The store must be first initialized once, to create the environment files
  10. * and folders as well as the internal databases, then it must be opened once
  11. * per session. Within that session multiple R/W operations can be performed
  12. * using transactions.
  13. *
  14. * Note that, even though the terms "graph", "context", etc. are used, no code
  15. * in this module checks for valid RDF data. In theory any term can be any
  16. * binary data. This allows using the store for non-RDF graph data.
  17. *
  18. * TODO more doc
  19. */
  20. #ifndef _LSUP_STORE_MDB_H
  21. #define _LSUP_STORE_MDB_H
  22. #include "lmdb.h"
  23. #include "triple.h"
  24. // FIXME find a better cross-platform path.
  25. #define DEFAULT_ENV_PATH "./mdb_store"
  26. typedef char DbLabel[8];
  27. typedef struct MDBStore LSUP_MDBStore;
  28. typedef struct MDBIterator LSUP_MDBIterator;
  29. typedef LSUP_rc (*store_match_fn_t)(const LSUP_TripleKey spok, void *data);
  30. // TODO Introduce compile-time LSUP_BIG_STORE option to define two store
  31. // options: false: 64-bit hashes, uint32 keys, max 4G entries; true:
  32. // 128-bit hashes, size_t keys, max MAX_SIZE entries, larger and slower.
  33. // Ideally this could be specified at runtime to handle different stores with
  34. // different sizes, but code could become more complex.
  35. /** @brief Create the MDB environment and databases on disk.
  36. *
  37. * This function takes care of creaating the environment path if not existing,
  38. * and checking that it's a writable directory. If the path is not specified
  39. * in the LSUP_STORE_PATH environment variable, a default directory is used.
  40. *
  41. * @param[in] clear Whether to remove a previous environment at this location.
  42. *
  43. * @param[in,out] path Path of the suggested directory to use. It may be NULL,
  44. * in which case it will be set either to the environment variable
  45. * LSUP_STORE_PATH, or if that is not set, a default local path.
  46. */
  47. LSUP_rc LSUP_mdbstore_setup (char *path, bool clear);
  48. /** @brief Open an MDB store.
  49. *
  50. * The store must have been set up with #LSUP_mdbstore_setup.
  51. *
  52. * Some environment variables affect various store parameters:
  53. *
  54. * - LSUP_MDB_MAPSIZE Long int specifying the size of the memory map. Usually
  55. * it is not necessary to modify this, unless one is operating under memory
  56. * and disk constraints. The default map size is 1Tb.
  57. *
  58. * @param[in,out] store Uninitialized store struct pointer.
  59. *
  60. * @param[in] path MDB environment path. This must be the path given by
  61. * #LSUP_mdbstore_setup.
  62. *
  63. * @param[in] default_ctx Serialized URI to be used as a default context for
  64. * triples inserted without a context specified. If NULL, the store operates
  65. * in triple mode.
  66. */
  67. LSUP_MDBStore *
  68. LSUP_mdbstore_new (const char *path, const LSUP_Buffer *default_ctx);
  69. /** @brief Close a store and free its handle.
  70. *
  71. * @param[in] store Store pointer.
  72. *
  73. */
  74. void LSUP_mdbstore_free (LSUP_MDBStore *store);
  75. /** @brief Print stats about a store and its databases.
  76. *
  77. * TODO
  78. *
  79. * @param store[in] The store to get stats for.
  80. */
  81. LSUP_rc LSUP_mdbstore_stat (LSUP_MDBStore *store, MDB_stat *stat);
  82. /** @brief Store size.
  83. *
  84. * @param store[in] The store to calculate size of.
  85. *
  86. * @return Number of stored SPO triples across all contexts.
  87. */
  88. size_t LSUP_mdbstore_size (LSUP_MDBStore *store);
  89. /** @brief Initialize bulk triple load.
  90. *
  91. * This is the first step of a bulk load. It is best used when the data at hand
  92. * need to be pre-processed, which can be done in the same loop as the next
  93. * step to keep memory usage low.
  94. *
  95. * @param store[in] The store to add to.
  96. *
  97. * @param sc[in] Context as a serialized term. If this is NULL, and the
  98. * default context is not NULL, triples will be added to the default context
  99. * for the store, If the default context for the store is NULL, regardless of
  100. * the value of sc, triples will be added with no context.
  101. *
  102. * @param it[out] Pointer to an iterator pointer to be passed to the following
  103. * load steps.
  104. */
  105. LSUP_MDBIterator *
  106. LSUP_mdbstore_add_init (LSUP_MDBStore *store, const LSUP_Buffer *sc);
  107. /** @brief Add one triple into the store.
  108. *
  109. * This must be called after #LSUP_mdbstore_add_init, using the iterator
  110. * yielded by that function. It may be called multiple times and must be
  111. * followed by #LSUP_mdbstore_add_done.
  112. *
  113. * NOTE: at the moment #LSUP_mdbstore_remove() or another
  114. * #LSUP_mdbstore_init() cannot be called between #LSUP_mdbstore_add_init and
  115. * #LSUP_mdbstore_add_abort or #LSUP_mdbstore_add_done. FIXME
  116. *
  117. * @param it[in] Iterator obtained by #LSUP_mdbstore_add_init.
  118. * The following members are of interest:
  119. * it->i stores the total number of records inserted.
  120. *
  121. * @param sspo[in] Serialized triple to be added.
  122. *
  123. * @return LSUP_OK if the triple was inserted; LSUP_NOACTION if the triple
  124. * already existed; LSUP_DB_ERR if an MDB error occurred.
  125. */
  126. LSUP_rc
  127. LSUP_mdbstore_add_iter (struct MDBIterator *it, const LSUP_SerTriple *sspo);
  128. /** @brief Finalize an add loop and free iterator.
  129. *
  130. * If a count of inserted records is needed, #LSUP_mdbiter_cur must be called
  131. * before this function.
  132. *
  133. * This must be called after #LSUP_mdbstore_add_iter.
  134. *
  135. * @param it[in] Iterator obtained by #LSUP_mdbstore_add_init.
  136. */
  137. LSUP_rc
  138. LSUP_mdbstore_add_done (LSUP_MDBIterator *it);
  139. /** @brief Abort an add loop and free iterator.
  140. *
  141. * Usually called on an irrecoverable error from LSUP_mdb_add_iter. None of the
  142. * successful inserts in the same loop is retained.
  143. *
  144. * @param it[in] Iterator obtained by #LSUP_mdbstore_add_init.
  145. */
  146. void
  147. LSUP_mdbstore_add_abort (LSUP_MDBIterator *it);
  148. /** @brief Add a batch of triples with optional context to the store.
  149. *
  150. * This is a shortcut for calling #LSUP_mdbstore_add_init,
  151. * #LSUP_mdbstore_add_iter and #LSUP_mdbstore_add_done in a sequence
  152. * when an array of pre-serialized triples is available.
  153. *
  154. * @param store[in] The store to add to.
  155. *
  156. * @param sc[in] Context as a serialized term. If this is NULL, and the
  157. * default context is not NULL, triples will be added to the default context
  158. * for the store. If the default context for the store is NULL, regardless of
  159. * the value of sc, triples will be added with no context.
  160. * @param data[in] Triples to be inserted as a 2D array of triples in the shape
  161. * of data[n][3], where n is the value of data_size.
  162. *
  163. * @param inserted[out] If not NULL, it will be filled with the count of
  164. * effectively inserted triples.
  165. *
  166. * @param data_size[in] Number of triples to be inserted.
  167. */
  168. LSUP_rc LSUP_mdbstore_add(
  169. struct MDBStore *store, const LSUP_Buffer *sc,
  170. const LSUP_SerTriple strp[], const size_t ct, size_t *inserted);
  171. LSUP_rc
  172. LSUP_mdbstore_remove(
  173. LSUP_MDBStore *store, const LSUP_SerTriple *sspo,
  174. const LSUP_Buffer *sc, size_t *ct);
  175. /** @brief Look up matching triples and optional context.
  176. *
  177. * This function may return a count of matches and/or an iterator of results as
  178. * serialized triples.
  179. *
  180. * @param store[in] The store to be queried.
  181. *
  182. * @param sspo Serialized triple representing the s, p, o
  183. * terms. Any and all of these may be NULL, which indicates an unbound query
  184. * term. Stores with context not set will always ignore the fourth term.
  185. *
  186. * @param sc Serialized context to limit search to. It may be NULL, in which
  187. * case search is done in all contexts. Note that triples inserted without
  188. * context are assigned the *default* context, indicated by the "default_ctx"
  189. * member of the store struct.
  190. *
  191. * @param it[out] Pointer to a pointer to an #LSUP_MDBIterator that will be
  192. * populated with a result iterator. This is always created even if no matches
  193. * are found and must be freed with #LSUP_mdbiter_free after use. If matches
  194. * are found, the iterator points to the first result which can be retrieved
  195. * with #LSUP_mdbiter_next.
  196. *
  197. * @param ct[out] If not NULL, this will be populated with the number of
  198. * entries found. It is very inexpensive to set for lookups without context,
  199. * much less so for 1-bound and 2-bound context lookups, in which cases it
  200. * should be set only if needed.
  201. *
  202. * @return LSUP_OK if entries were found, LSUP_NORESULT if none were found.
  203. */
  204. LSUP_MDBIterator *
  205. LSUP_mdbstore_lookup(
  206. LSUP_MDBStore *store, const LSUP_SerTriple *sspo,
  207. const LSUP_Buffer *sc, size_t *ct);
  208. /** @brief Yield the matching triples and advance the iterator.
  209. *
  210. * This function also checks if the matching triple is associated with a
  211. * context, if one was specified. If no associated contexts are found, the next
  212. * triple is searched, until the end of the results.
  213. *
  214. * NOTE: Iterators keep LMDB cursors and (read only) transactions open. Don't
  215. * hold on to them longer than necessary.
  216. *
  217. * NOTE: The memory pointed to by the individual LSUP_Buffer pointers is
  218. * owned by the database. It must not be written to or freed. To modify
  219. * the data or use them beyond the caller's scope, this memory must be copied.
  220. *
  221. * @param it[in] Opaque iterator handle obtained with #LSUP_mdbstore_lookup.
  222. *
  223. * @param sspo[out] #LSUP_SerTriple to be populated with three serialized terms
  224. * if found, NULL if not found. Internal callers (e.g. counters) may pass NULL
  225. * if they don't need the serialized triples.
  226. *
  227. * @return LSUP_OK if results were found; LSUP_END if no (more) results were
  228. * found; LSUP_DB_ERR if a MDB_* error occurred.
  229. */
  230. LSUP_rc LSUP_mdbiter_next (LSUP_MDBIterator *it, LSUP_SerTriple *sspo);
  231. /** @brief Iterator's internal counter.
  232. *
  233. * This is only useful with #LSUP_mdbstore_add_iter to count inserted records.
  234. *
  235. * @param it[in] An iterator primed with LSUP_mdbstore_add_init.
  236. *
  237. * @return The value of the #i member. For an add iterator, this is the number
  238. * of succcessfully inserted records.
  239. */
  240. size_t
  241. LSUP_mdbiter_cur (LSUP_MDBIterator *it);
  242. /** @brief Free an iterator allocated by a lookup.
  243. *
  244. * @param it[in] Iterator pointer. It will be set to NULL after freeing.
  245. */
  246. void LSUP_mdbiter_free (struct MDBIterator *it);
  247. /** @brief Contexts that a triple key appears in.
  248. *
  249. * This function is most conveniently used by a callback to
  250. * #LSUP_mdbstore_lookup because it handles triple keys.
  251. *
  252. * @param store[in] The store to be queried.
  253. *
  254. * @param spok[in] Triple key to look up.
  255. *
  256. * @param ck[out] Pointer to an array of contexts. Memory is allocated by this
  257. * function and must be freed by the caller.
  258. *
  259. * @param ct[out] Number of contexts found.
  260. */
  261. LSUP_rc LSUP_mdbstore_triple_contexts(
  262. LSUP_MDBStore *store, LSUP_Key spok[], LSUP_Key **ck, size_t *ct);
  263. #endif