/** @file store_mdb.h * * @brief LMDB graph store backend. * * This module stores triples in a LMDB embedded store, organized * into named graphs. The store is optimized and indexed for fast lookup of any * number of bound terms. * * The store must be first initialized once, to create the environment files * and folders as well as the internal databases, then it must be opened once * per session. Within that session multiple R/W operations can be performed * using transactions. * * Note that, even though the terms "graph", "context", etc. are used, no code * in this module checks for valid RDF data. In theory any term can be any * binary data. This allows using the store for non-RDF graph data. * * TODO more doc */ #ifndef _LSUP_STORE_MDB_H #define _LSUP_STORE_MDB_H #include "lmdb.h" #include "buffer.h" #include "namespace.h" // FIXME find a better cross-platform path. #define DEFAULT_ENV_PATH "./mdb_store" // RAMdisk path for MDB volatile store. #define LSUP_MDB_RAMDISK_PATH TMPDIR "/lsup_mem_graph" #include "store.h" typedef char DbLabel[8]; typedef struct mdbstore_t LSUP_MDBStore; typedef struct mdbstore_iter_t LSUP_MDBIterator; typedef LSUP_rc (*store_match_fn_t)(const LSUP_TripleKey spok, void *data); // TODO Introduce compile-time LSUP_BIG_STORE option to define two store // options: false: 64-bit hashes, uint32 keys, max 4G entries; true: // 128-bit hashes, size_t keys, max MAX_SIZE entries, larger and slower. // Ideally this could be specified at runtime to handle different stores with // different sizes, but code could become more complex. /** @brief Create the MDB environment and databases on disk. * * This function takes care of creaating the environment path if not existing, * and checking that it's a writable directory. If the path is not specified * in the LSUP_STORE_PATH environment variable, a default directory is used. * * @param[in] clear Whether to remove a previous environment at this location. * * @param[in,out] path Path of the suggested directory to use. It may be NULL, * in which case it will be set either to the environment variable * LSUP_STORE_PATH, or if that is not set, a default local path. */ LSUP_rc LSUP_mdbstore_setup (const char *path, bool clear); /** @brief Open an MDB store. * * The store must have been set up with #LSUP_mdbstore_setup. * * Some environment variables affect various store parameters: * * - LSUP_MDB_MAPSIZE Long int specifying the size of the memory map. Usually * it is not necessary to modify this, unless one is operating under memory * and disk constraints. The default map size is 1Tb. * * @param[in,out] store Uninitialized store struct pointer. * * @param[in] path MDB environment path. This must be the path given by * #LSUP_mdbstore_setup. * * @param[in] default_ctx Serialized URI to be used as a default context for * triples inserted without a context specified. If NULL, the store operates * in triple mode. */ LSUP_MDBStore * LSUP_mdbstore_new (const char *path, const LSUP_Buffer *default_ctx); /** @brief Close a store and free its handle. * * @param[in] store Store pointer. * */ void LSUP_mdbstore_free (LSUP_MDBStore *store); /** @brief Print stats about a store and its databases. * * TODO * * @param store[in] The store to get stats for. */ LSUP_rc LSUP_mdbstore_stat (LSUP_MDBStore *store, MDB_stat *stat); /** @brief Store size. * * @param store[in] The store to calculate size of. * * @return Number of stored SPO triples across all contexts. */ size_t LSUP_mdbstore_size (LSUP_MDBStore *store); /** @brief Initialize bulk triple load. * * This is the first step of a bulk load. It is best used when the data at hand * need to be pre-processed, which can be done in the same loop as the next * step to keep memory usage low. * * @param store[in] The store to add to. * * @param sc[in] Context as a serialized term. If this is NULL, and the * default context is not NULL, triples will be added to the default context * for the store, If the default context for the store is NULL, regardless of * the value of sc, triples will be added with no context. * * @param it[out] Pointer to an iterator pointer to be passed to the following * load steps. */ LSUP_MDBIterator * LSUP_mdbstore_add_init (LSUP_MDBStore *store, const LSUP_Buffer *sc); /** @brief Add one triple into the store. * * This must be called after #LSUP_mdbstore_add_init, using the iterator * yielded by that function. It may be called multiple times and must be * followed by #LSUP_mdbstore_add_done. * * NOTE: at the moment #LSUP_mdbstore_remove() or another * #LSUP_mdbstore_init() cannot be called between #LSUP_mdbstore_add_init and * #LSUP_mdbstore_add_abort or #LSUP_mdbstore_add_done. FIXME * * @param it[in] Iterator obtained by #LSUP_mdbstore_add_init. * The following members are of interest: * it->i stores the total number of records inserted. * * @param sspo[in] Serialized triple to be added. * * @return LSUP_OK if the triple was inserted; LSUP_NOACTION if the triple * already existed; LSUP_DB_ERR if an MDB error occurred. */ LSUP_rc LSUP_mdbstore_add_iter (LSUP_MDBIterator *it, const LSUP_BufferTriple *sspo); /** @brief Finalize an add loop and free iterator. * * If a count of inserted records is needed, #LSUP_mdbiter_cur must be called * before this function. * * This must be called after #LSUP_mdbstore_add_iter. * * @param it[in] Iterator obtained by #LSUP_mdbstore_add_init. */ LSUP_rc LSUP_mdbstore_add_done (LSUP_MDBIterator *it); /** @brief Abort an add loop and free iterator. * * Usually called on an irrecoverable error from LSUP_mdb_add_iter. None of the * successful inserts in the same loop is retained. * * @param it[in] Iterator obtained by #LSUP_mdbstore_add_init. */ void LSUP_mdbstore_add_abort (LSUP_MDBIterator *it); /** @brief Add a batch of triples with optional context to the store. * * This is a shortcut for calling #LSUP_mdbstore_add_init, * #LSUP_mdbstore_add_iter and #LSUP_mdbstore_add_done in a sequence * when an array of pre-serialized triples is available. * * @param store[in] The store to add to. * * @param sc[in] Context as a serialized term. If this is NULL, and the * default context is not NULL, triples will be added to the default context * for the store. If the default context for the store is NULL, regardless of * the value of sc, triples will be added with no context. * @param data[in] Triples to be inserted as a 2D array of triples in the shape * of data[n][3], where n is the value of data_size. * * @param inserted[out] If not NULL, it will be filled with the count of * effectively inserted triples. * * @param data_size[in] Number of triples to be inserted. */ LSUP_rc LSUP_mdbstore_add( LSUP_MDBStore *store, const LSUP_Buffer *sc, const LSUP_BufferTriple strp[], const size_t ct, size_t *inserted); /** @brief Delete triples by pattern matching. * * The ss, sp, so, sc terms act as a matching pattern as documented in * #LSUP_mdbstore_lookup. if not NULL, ct yields the number of triples actually * deleted. */ LSUP_rc LSUP_mdbstore_remove( LSUP_MDBStore *store, const LSUP_Buffer *ss, const LSUP_Buffer *sp, const LSUP_Buffer *so, const LSUP_Buffer *sc, size_t *ct); /** @brief Look up matching triples and optional context. * * This function may return a count of matches and/or an iterator of results as * serialized triples. * * Any and all of the terms may be NULL, which indicates an unbound query * term. Stores with context not set will always ignore the fourth term. * * @param[in] store The store to be queried. * * @param[in] ss Buffer representing the serialized s term. * * @param[in] sp Buffer representing the serialized p term. * * @param[in] so Buffer representing the serialized o term. * * @param[in] sc Serialized context to limit search to. It may be NULL, in which * case search is done in all contexts. Note that triples inserted without * context are assigned the *default* context, indicated by the "default_ctx" * member of the store struct. * * @param[out] it Pointer to an #LSUP_MDBIterator handle that will be populated * with a result iterator. This is always created even if no matches are found * and must be freed with #LSUP_mdbiter_free() after use. If matches are found, * the iterator points to the first result which can be retrieved with * #LSUP_mdbiter_next(). * * @param[out] ct If not NULL, this will be populated with the number of * entries found. It is very inexpensive to set for lookups without context, * much less so for 1-bound and 2-bound context lookups, in which cases it * should be set only if needed. * * @return LSUP_OK if entries were found, LSUP_NORESULT if none were found. */ LSUP_MDBIterator * LSUP_mdbstore_lookup( LSUP_MDBStore *store, const LSUP_Buffer *ss, const LSUP_Buffer *sp, const LSUP_Buffer *so, const LSUP_Buffer *sc, size_t *ct); /** @brief Yield the matching triples and advance the iterator. * * This function also checks if the matching triple is associated with a * context, if one was specified. If no associated contexts are found, the next * triple is searched, until the end of the results. * * NOTE: Iterators keep LMDB cursors and (read only) transactions open. Don't * hold on to them longer than necessary. * * NOTE: The memory pointed to by the individual LSUP_Buffer pointers is * owned by the database. It must not be written to or freed. To modify * the data or use them beyond the caller's scope, this memory must be copied. * * @param it[in] Opaque iterator handle obtained with #LSUP_mdbstore_lookup. * * @param sspo[out] #LSUP_BufferTriple to be populated with three serialized terms * if found, NULL if not found. Internal callers (e.g. counters) may pass NULL * if they don't need the serialized triples. * * @return LSUP_OK if results were found; LSUP_END if no (more) results were * found; LSUP_DB_ERR if a MDB_* error occurred. */ LSUP_rc LSUP_mdbiter_next ( LSUP_MDBIterator *it, LSUP_BufferTriple *sspo, LSUP_Buffer **ctx); /** @brief Iterator's internal counter. * * This is only useful with #LSUP_mdbstore_add_iter to count inserted records. * * @param it[in] An iterator primed with LSUP_mdbstore_add_init. * * @return The value of the #i member. For an add iterator, this is the number * of succcessfully inserted records. */ size_t LSUP_mdbiter_cur (LSUP_MDBIterator *it); /** @brief Free an iterator allocated by a lookup. * * @param it[in] Iterator pointer. It will be set to NULL after freeing. */ void LSUP_mdbiter_free (LSUP_MDBIterator *it); /** @brief Gather the contexts of all triples matching a pattern. * * This function yields a NULL-terminated array of LSUP_Buffer handles for all * the triples that match an s, p, o lookup pattern in a store. All values are * unique. * * TODO Implement a free method (and align names). Currently freeing the result * is non-trivial and easy to forget: * * size_t i = 0; * while (ctx_a[i] != NULL) * free (ctx_a[i++]); // Buffer data are memory-mapped. Not freeing. * free (ctx_a); * * ss, sp, so arguments are used as in #LSUP_mdbstore_lookup(). * * @param store[in] The store to be queried. * * @param ss[in] Serialized subject. It may be NULL. * * @param sp[in] Serialized predicate. It may be NULL. * * @param so[in] Serialized object. It may be NULL. * * @return Array of context handles. Memory is allocated by this function and * must be freed by the caller. */ LSUP_Buffer ** LSUP_mdbstore_lookup_contexts ( LSUP_MDBStore *store, const LSUP_Buffer *ss, const LSUP_Buffer *sp, const LSUP_Buffer *so); /** @brief Get all namespace prefixes in the store. * * @param[in] store MDB store to query. * * @param[out] nsm Pointer to namespace map to generate. * * @return LSUP_OK on success; LSUP_DB_ERR on MDB error. */ LSUP_rc LSUP_mdbstore_nsm_get (LSUP_MDBStore *store, LSUP_NSMap **nsm); /** @brief Store an in-memory namespace map into the permanent back end. * * Existing prefixes and namespaces are not updated. Thus, if the following are * already stored: * * ns1: * ns2: * * Neither of the following will be inserted: * * ns3: * ns2: * * @param[in] store MDB store to update. * * @param[out] nsm Namespace map handle to store. * * @return LSUP_OK if all terms were updated; LSUP_CONFLICT if one or more * namespaces or terms were not updated because they already existed; <0 if * an error occurred. */ LSUP_rc LSUP_mdbstore_nsm_store (LSUP_MDBStore *store, const LSUP_NSMap *nsm); /** @brief Add a single term to the store. * * @param[in] store MDB store handle. * * @param[in] sterm Serialized term to store. */ LSUP_rc LSUP_mdbstore_add_term (LSUP_MDBStore *store, const LSUP_Buffer *sterm); /** @brief Whether a term key exists. * * @param[in] store MDB store to search in. * * @param[in] key Key to look up. * * @raturn 1 if the term exists, 0 if it does not exist; <0 on error. */ int LSUP_mdbstore_tkey_exists (LSUP_MDBStore *store, LSUP_Key tkey); #endif