store_mdb.h 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. /** @file store_mdb.h
  2. *
  3. * @brief LMDB graph store backend.
  4. *
  5. * This module stores triples in a LMDB embedded store, optionally organized
  6. * into named graphs. The store is optimized and indexed for fast lookup of any
  7. * number of bound terms.
  8. *
  9. * The store must be first initialized once, to create the environment files
  10. * and folders as well as the internal databases, then it must be opened once
  11. * per session. Within that session multiple R/W operations can be performed
  12. * using transactions.
  13. *
  14. * Note that, even though the terms "graph", "context", etc. are used, no code
  15. * in this module checks for valid RDF data. In theory any term can be any
  16. * binary data. This allows using the store for non-RDF graph data.
  17. *
  18. * TODO more doc
  19. */
  20. #ifndef _LSUP_STORE_MDB_H
  21. #define _LSUP_STORE_MDB_H
  22. #include "lmdb.h"
  23. #include "triple.h"
  24. typedef char DbLabel[8];
  25. typedef struct MDBStore LSUP_MDBStore;
  26. typedef struct MDBIterator LSUP_MDBIterator;
  27. typedef LSUP_rc (*store_match_fn_t)(const LSUP_TripleKey spok, void *data);
  28. // TODO Introduce compile-time LSUP_BIG_STORE option to define two store
  29. // options: false: 64-bit hashes, uint32 keys, max 4G entries; true:
  30. // 128-bit hashes, size_t keys, max MAX_SIZE entries, larger and slower.
  31. // Ideally this could be specified at runtime to handle different stores with
  32. // different sizes, but code could become more complex.
  33. /** @brief Create the MDB environment and databases on disk.
  34. *
  35. * This function takes care of creaating the environment path if not existing,
  36. * and checking that it's a writable directory. If the path is not specified
  37. * in the LSUP_STORE_PATH environment variable, a default directory is used.
  38. *
  39. * TODO Add clear parameter.
  40. *
  41. * @param[in,out] path Path of the suggested directory to use. It may be NULL,
  42. * in which case it will be set either to the environment variable
  43. * LSUP_STORE_PATH, or if that is not set, a default local path.
  44. */
  45. LSUP_rc LSUP_store_setup(char **path/*, bool clear*/);
  46. /** @brief Open an MDB store.
  47. *
  48. * The store must have been set up with #LSUP_store_setup.
  49. *
  50. * Some environment variables affect various store parameters:
  51. *
  52. * - LSUP_MDB_MAPSIZE Long int specifying the size of the memory map. Usually
  53. * it is not necessary to modify this, unless one is operating under memory
  54. * and disk constraints. The default map size is 1Tb.
  55. *
  56. * @param[in,out] store Uninitialized store struct pointer.
  57. *
  58. * @param[in] path MDB environment path. This must be the path given by
  59. * #LSUP_store_setup.
  60. *
  61. * @param[in] default_ctx Serialized URI to be used as a default context for
  62. * triples inserted without a context specified. If NULL, the store operates
  63. * in triple mode.
  64. */
  65. LSUP_MDBStore *
  66. LSUP_store_new(const char *path, const LSUP_Buffer *default_ctx);
  67. /** @brief Close a store and free its handle.
  68. *
  69. * @param[in] store Store pointer.
  70. *
  71. */
  72. void LSUP_store_free(LSUP_MDBStore *store);
  73. /** @brief Print stats about a store and its databases.
  74. *
  75. * TODO
  76. *
  77. * @param store[in] The store to get stats for.
  78. */
  79. LSUP_rc LSUP_store_stats(LSUP_MDBStore *store);
  80. /** @brief Store size.
  81. *
  82. * @param store[in] The store to calculate size of.
  83. *
  84. * @return Number of stored SPO triples across all contexts.
  85. */
  86. size_t LSUP_store_size(LSUP_MDBStore *store);
  87. /** @brief Add a batch of triples with optional context to the store.
  88. *
  89. * @param store[in] The store to add to.
  90. *
  91. * @param sc[in] Context as a serialized term. If this is NULL, and the
  92. * default context is not NULL, triples will be added to the default context
  93. * for the store, If the default context for the store is NULL, regardless of
  94. * the value of sc, triples will be added with no context.
  95. * @param data[in] Triples to be inserted as a 2D array of triples in the shape
  96. * of data[n][3], where n is the value of data_size.
  97. *
  98. * @param data_size[in] Number of triples to be inserted.
  99. */
  100. LSUP_rc LSUP_store_add(
  101. struct MDBStore *store, const LSUP_Buffer *sc,
  102. const LSUP_SerTriple *data, const size_t data_size);
  103. /** @brief Look up matching triples and optional context.
  104. *
  105. * This function may return a count of matches and/or an iterator of results as
  106. * serialized triples.
  107. *
  108. * @param store[in] The store to be queried.
  109. *
  110. * @param sspoc Array of 4 serialized term pointers representing the s, p, o, c
  111. * terms. Any and all of these may be NULL, which indicates an unbound query
  112. * term. Stores with context not set will always ignore the fourth term.
  113. *
  114. * @param it[out] Pointer to a pointer to an #LSUP_MDBIterator that will be
  115. * populated with a result iterator. This is always created even if no matches
  116. * are found and must be freed with #LSUP_store_it_free after use. If matches
  117. * are found, the iterator points to the first result which can be retrieved
  118. * with #LSUP_store_it_next.
  119. *
  120. * @param ct[out] If not NULL, this will be populated with the number of
  121. * entries found. It is very inexpensive to set for lookups without context,
  122. * much less so for 1-bound and 2-bound context lookups, in which cases it
  123. * should be set only if needed.
  124. *
  125. * @return LSUP_OK if entries were found, LSUP_NORESULT if none were found.
  126. */
  127. LSUP_rc LSUP_store_lookup(
  128. LSUP_MDBStore *store, LSUP_SerTerm *sspoc[],
  129. LSUP_MDBIterator **it, size_t *ct);
  130. /** @brief Yield the matching triples and advance the iterator.
  131. *
  132. * This function also checks if the matching triple is associated with a
  133. * context, if one was specified. If no associated contexts are found, the next
  134. * triple is searched, until the end of the results.
  135. *
  136. * NOTE: Iterators keep LMDB cursors and (read only) transactions open. Don't
  137. * hold on to them longer than necessary.
  138. *
  139. * NOTE: The memory pointed to by the individual LSUP_SerTerm pointers is
  140. * owned by the database. It must not be written to or freed. To modify
  141. * the data or use them beyond the caller's scope, this memory must be copied.
  142. *
  143. * @param it[in] Opaque iterator handle obtained with #LSUP_store_lookup.
  144. *
  145. * @param sspo[out] #LSUP_SerTriple to be populated with three serialized terms
  146. * if found, NULL if not found. Internal callers may pass NULL if they don't
  147. * need the serialized triples.
  148. *
  149. * @return LSUP_OK if results were found; LSUP_END if no (more) results were
  150. * found; LSUP_DB_ERR if a MDB_* error occurred.
  151. */
  152. LSUP_rc LSUP_store_it_next(LSUP_MDBIterator *it, LSUP_SerTerm **sspo);
  153. /** @brief Free an iterator allocated by a lookup.
  154. *
  155. * @param it[in] Iterator pointer. It will be set to NULL after freeing.
  156. */
  157. void LSUP_store_it_free(struct MDBIterator *it);
  158. /** @brief Contexts that a triple key appears in.
  159. *
  160. * This function is most conveniently used by a callback to #LSUP_store_lookup
  161. * because it handles triple keys.
  162. *
  163. * @param store[in] The store to be queried.
  164. *
  165. * @param spok[in] Triple key to look up.
  166. *
  167. * @param ck[out] Pointer to an array of contexts. Memory is allocated by this
  168. * function and must be freed by the caller.
  169. *
  170. * @param ct[out] Number of contexts found.
  171. */
  172. LSUP_rc LSUP_store_triple_contexts(
  173. LSUP_MDBStore *store, LSUP_Key spok[], LSUP_Key **ck, size_t *ct);
  174. #endif