123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373 |
- /** @file store_mdb.h
- *
- * @brief LMDB graph store backend.
- *
- * This module stores triples in a LMDB embedded store, organized
- * into named graphs. The store is optimized and indexed for fast lookup of any
- * number of bound terms.
- *
- * The store must be first initialized once, to create the environment files
- * and folders as well as the internal databases, then it must be opened once
- * per session. Within that session multiple R/W operations can be performed
- * using transactions.
- *
- * Note that, even though the terms "graph", "context", etc. are used, no code
- * in this module checks for valid RDF data. In theory any term can be any
- * binary data. This allows using the store for non-RDF graph data.
- *
- * TODO more doc
- */
- #ifndef _LSUP_STORE_MDB_H
- #define _LSUP_STORE_MDB_H
- #include "lmdb.h"
- #include "buffer.h"
- #include "namespace.h"
- #include "store.h"
- // FIXME find a better cross-platform path.
- #define DEFAULT_ENV_PATH "./mdb_store"
- // RAMdisk path for MDB volatile store.
- #define LSUP_MDB_RAMDISK_PATH TMPDIR "/lsup_mem_graph"
- typedef char DbLabel[8];
- typedef struct mdbstore_t LSUP_MDBStore;
- typedef struct mdbstore_iter_t LSUP_MDBIterator;
- typedef LSUP_rc (*store_match_fn_t)(const LSUP_TripleKey spok, void *data);
- // TODO Introduce compile-time LSUP_BIG_STORE option to define two store
- // options: false: 64-bit hashes, uint32 keys, max 4G entries; true:
- // 128-bit hashes, size_t keys, max MAX_SIZE entries, larger and slower.
- // Ideally this could be specified at runtime to handle different stores with
- // different sizes, but code could become more complex.
- /** @brief Create the MDB environment and databases on disk.
- *
- * This function takes care of creaating the environment path if not existing,
- * and checking that it's a writable directory. If the path is not specified
- * in the LSUP_STORE_PATH environment variable, a default directory is used.
- *
- * @param[in] clear Whether to remove a previous environment at this location.
- *
- * @param[in,out] path Path of the suggested directory to use. It may be NULL,
- * in which case it will be set either to the environment variable
- * LSUP_STORE_PATH, or if that is not set, a default local path.
- */
- LSUP_rc LSUP_mdbstore_setup (const char *path, bool clear);
- /** @brief Open an MDB store.
- *
- * The store must have been set up with #LSUP_mdbstore_setup.
- *
- * Some environment variables affect various store parameters:
- *
- * - LSUP_MDB_MAPSIZE Long int specifying the size of the memory map. Usually
- * it is not necessary to modify this, unless one is operating under memory
- * and disk constraints. The default map size is 1Tb.
- *
- * @param[in,out] store Uninitialized store struct pointer.
- *
- * @param[in] path MDB environment path. This must be the path given by
- * #LSUP_mdbstore_setup.
- *
- * @param[in] default_ctx Serialized URI to be used as a default context for
- * triples inserted without a context specified. If NULL, the store operates
- * in triple mode.
- */
- LSUP_MDBStore *
- LSUP_mdbstore_new (const char *path, const LSUP_Buffer *default_ctx);
- /** @brief Close a store and free its handle.
- *
- * @param[in] store Store pointer.
- *
- */
- void LSUP_mdbstore_free (LSUP_MDBStore *store);
- /** @brief Store feature flags.
- *
- * @param[in] store Store handle.
- *
- * @return A combination of LSUP_STORE_* feature flags.
- */
- int LSUP_mdbstore_features (LSUP_MDBStore *store);
- /** @brief Print stats about a store and its databases.
- *
- * TODO
- *
- * @param store[in] The store to get stats for.
- */
- LSUP_rc LSUP_mdbstore_stat (LSUP_MDBStore *store, MDB_stat *stat);
- /** @brief Store size.
- *
- * @param store[in] The store to calculate size of.
- *
- * @return Number of stored SPO triples across all contexts.
- */
- size_t LSUP_mdbstore_size (LSUP_MDBStore *store);
- /** @brief Initialize bulk triple load.
- *
- * This is the first step of a bulk load. It is best used when the data at hand
- * need to be pre-processed, which can be done in the same loop as the next
- * step to keep memory usage low.
- *
- * @param store[in] The store to add to.
- *
- * @param sc[in] Context as a serialized term. If this is NULL, and the
- * default context is not NULL, triples will be added to the default context
- * for the store, If the default context for the store is NULL, regardless of
- * the value of sc, triples will be added with no context.
- *
- * @param it[out] Pointer to an iterator pointer to be passed to the following
- * load steps.
- */
- LSUP_MDBIterator *
- LSUP_mdbstore_add_init (LSUP_MDBStore *store, const LSUP_Buffer *sc);
- /** @brief Add one triple into the store.
- *
- * This must be called after #LSUP_mdbstore_add_init, using the iterator
- * yielded by that function. It may be called multiple times and must be
- * followed by #LSUP_mdbstore_add_done.
- *
- * NOTE: at the moment #LSUP_mdbstore_remove() or another
- * #LSUP_mdbstore_init() cannot be called between #LSUP_mdbstore_add_init and
- * #LSUP_mdbstore_add_abort or #LSUP_mdbstore_add_done. FIXME
- *
- * @param it[in] Iterator obtained by #LSUP_mdbstore_add_init.
- * The following members are of interest:
- * it->i stores the total number of records inserted.
- *
- * @param sspo[in] Serialized triple to be added.
- *
- * @return LSUP_OK if the triple was inserted; LSUP_NOACTION if the triple
- * already existed; LSUP_DB_ERR if an MDB error occurred.
- */
- LSUP_rc
- LSUP_mdbstore_add_iter (LSUP_MDBIterator *it, const LSUP_BufferTriple *sspo);
- /** @brief Finalize an add loop and free iterator.
- *
- * If a count of inserted records is needed, #LSUP_mdbiter_cur must be called
- * before this function.
- *
- * This must be called after #LSUP_mdbstore_add_iter.
- *
- * @param it[in] Iterator obtained by #LSUP_mdbstore_add_init.
- */
- LSUP_rc
- LSUP_mdbstore_add_done (LSUP_MDBIterator *it);
- /** @brief Abort an add loop and free iterator.
- *
- * Usually called on an irrecoverable error from LSUP_mdb_add_iter. None of the
- * successful inserts in the same loop is retained.
- *
- * @param it[in] Iterator obtained by #LSUP_mdbstore_add_init.
- */
- void
- LSUP_mdbstore_add_abort (LSUP_MDBIterator *it);
- /** @brief Add a batch of triples with optional context to the store.
- *
- * This is a shortcut for calling #LSUP_mdbstore_add_init,
- * #LSUP_mdbstore_add_iter and #LSUP_mdbstore_add_done in a sequence
- * when an array of pre-serialized triples is available.
- *
- * @param store[in] The store to add to.
- *
- * @param sc[in] Context as a serialized term. If this is NULL, and the
- * default context is not NULL, triples will be added to the default context
- * for the store. If the default context for the store is NULL, regardless of
- * the value of sc, triples will be added with no context.
- * @param data[in] Triples to be inserted as a 2D array of triples in the shape
- * of data[n][3], where n is the value of data_size.
- *
- * @param inserted[out] If not NULL, it will be filled with the count of
- * effectively inserted triples.
- *
- * @param data_size[in] Number of triples to be inserted.
- */
- LSUP_rc LSUP_mdbstore_add(
- LSUP_MDBStore *store, const LSUP_Buffer *sc,
- const LSUP_BufferTriple strp[], const size_t ct, size_t *inserted);
- /** @brief Delete triples by pattern matching.
- *
- * The ss, sp, so, sc terms act as a matching pattern as documented in
- * #LSUP_mdbstore_lookup. if not NULL, ct yields the number of triples actually
- * deleted.
- */
- LSUP_rc
- LSUP_mdbstore_remove(
- LSUP_MDBStore *store, const LSUP_Buffer *ss, const LSUP_Buffer *sp,
- const LSUP_Buffer *so, const LSUP_Buffer *sc, size_t *ct);
- /** @brief Look up matching triples and optional context.
- *
- * This function may return a count of matches and/or an iterator of results as
- * serialized triples.
- *
- * Any and all of the terms may be NULL, which indicates an unbound query
- * term. Stores with context not set will always ignore the fourth term.
- *
- * @param[in] store The store to be queried.
- *
- * @param[in] ss Buffer representing the serialized s term.
- *
- * @param[in] sp Buffer representing the serialized p term.
- *
- * @param[in] so Buffer representing the serialized o term.
- *
- * @param[in] sc Serialized context to limit search to. It may be NULL, in which
- * case search is done in all contexts. Note that triples inserted without
- * context are assigned the *default* context, indicated by the "default_ctx"
- * member of the store struct.
- *
- * @param[out] it Pointer to an #LSUP_MDBIterator handle that will be populated
- * with a result iterator. This is always created even if no matches are found
- * and must be freed with #LSUP_mdbiter_free() after use. If matches are found,
- * the iterator points to the first result which can be retrieved with
- * #LSUP_mdbiter_next().
- *
- * @param[out] ct If not NULL, this will be populated with the number of
- * entries found. It is very inexpensive to set for lookups without context,
- * much less so for 1-bound and 2-bound context lookups, in which cases it
- * should be set only if needed.
- *
- * @return LSUP_OK if entries were found, LSUP_NORESULT if none were found.
- */
- LSUP_MDBIterator *
- LSUP_mdbstore_lookup(
- LSUP_MDBStore *store, const LSUP_Buffer *ss, const LSUP_Buffer *sp,
- const LSUP_Buffer *so, const LSUP_Buffer *sc, size_t *ct);
- /** @brief Yield the matching triples and advance the iterator.
- *
- * This function also checks if the matching triple is associated with a
- * context, if one was specified. If no associated contexts are found, the next
- * triple is searched, until the end of the results.
- *
- * NOTE: Iterators keep LMDB cursors and (read only) transactions open. Don't
- * hold on to them longer than necessary.
- *
- * NOTE: The memory pointed to by the individual LSUP_Buffer pointers is
- * owned by the database. It must not be written to or freed. To modify
- * the data or use them beyond the caller's scope, this memory must be copied.
- *
- * @param[in] it Opaque iterator handle obtained with #LSUP_mdbstore_lookup.
- *
- * @param[out] sspo #LSUP_BufferTriple to be populated with three serialized
- * terms if found, NULL if not found. Internal callers (e.g. counters) may pass
- * NULL if they don't need the serialized triples.
- *
- * @param[out] ctx If not NULL, it is populated with a NULL-terminated array of
- * LSUP_Buffer structs, one for each context associated with the matching
- * triple. These contexts are the same regardless of the context filter used
- * in the lookup. The array is freed with a simple #free().
- *
- * To iterate over the context array, use this loop:
- *
- * size_t i = 0;
- * while (ctx[i].addr)
- * do_something(ctx + i++); // Buffer data are memory-mapped and RO.
- *
- * @return LSUP_OK if results were found; LSUP_END if no (more) results were
- * found; LSUP_DB_ERR if a MDB_* error occurred.
- */
- LSUP_rc LSUP_mdbiter_next (
- LSUP_MDBIterator *it, LSUP_BufferTriple *sspo, LSUP_Buffer **ctx);
- /** @brief Free an iterator allocated by a lookup.
- *
- * @param it[in] Iterator pointer. It will be set to NULL after freeing.
- */
- void LSUP_mdbiter_free (LSUP_MDBIterator *it);
- /** @brief Get all namespace prefixes in the store.
- *
- * @param[in] store MDB store to query.
- *
- * @param[out] nsm Pointer to namespace map to generate.
- *
- * @return LSUP_OK on success; LSUP_DB_ERR on MDB error.
- */
- LSUP_rc
- LSUP_mdbstore_nsm_get (LSUP_MDBStore *store, LSUP_NSMap **nsm);
- /** @brief Store an in-memory namespace map into the permanent back end.
- *
- * Existing prefixes and namespaces are not updated. Thus, if the following are
- * already stored:
- *
- * ns1: <urn:ns:a#>
- * ns2: <urn:ns:b#>
- *
- * Neither of the following will be inserted:
- *
- * ns3: <urn:ns:a#>
- * ns2: <urn:ns:c#>
- *
- * @param[in] store MDB store to update.
- *
- * @param[out] nsm Namespace map handle to store.
- *
- * @return LSUP_OK if all terms were updated; LSUP_CONFLICT if one or more
- * namespaces or terms were not updated because they already existed; <0 if
- * an error occurred.
- */
- LSUP_rc
- LSUP_mdbstore_nsm_store (LSUP_MDBStore *store, const LSUP_NSMap *nsm);
- /** @brief Add a single term to the store.
- *
- * @param[in] store MDB store handle.
- *
- * @param[in] sterm Serialized term to store.
- */
- LSUP_rc
- LSUP_mdbstore_add_term (LSUP_MDBStore *store, const LSUP_Buffer *sterm);
- /** @brief Whether a term key exists.
- *
- * @param[in] store MDB store to search in.
- *
- * @param[in] key Key to look up.
- *
- * @return 1 if the term exists, 0 if it does not exist; <0 on error.
- */
- int
- LSUP_mdbstore_tkey_exists (LSUP_MDBStore *store, LSUP_Key tkey);
- #endif /* _LSUP_STORE_MDB_H */
|