Browse Source

Add namespace module, only in memory. Tests pass.

Stefano Cossu 3 years ago
parent
commit
c5f1d315e2
14 changed files with 449 additions and 28 deletions
  1. 3 3
      TODO.md
  2. 1 0
      include/core.h
  3. 41 7
      include/graph.h
  4. 120 0
      include/namespace.h
  5. 1 1
      include/store_htable.h
  6. 3 5
      include/term.h
  7. 24 4
      src/graph.c
  8. 192 0
      src/namespace.c
  9. 7 6
      src/store_htable.c
  10. 1 1
      src/store_mdb.c
  11. 14 0
      src/term.c
  12. 2 0
      test.c
  13. 3 1
      test/test.h
  14. 37 0
      test/test_namespace.c

+ 3 - 3
TODO.md

@@ -5,10 +5,10 @@ P = pending; W = working on it; D = done.
 ## Critical for MVP
 
 - *D* LMDB back end
-- *W* Python bindings
+- D* Hash table back end
+- D* Python bindings
+- W* Namespace manager
 - *W* Better error handling
-- *P* Hash table back end
-- *P* Namespace manager
 - *P* N3 serialization / deserialization
 - *P* Turtle serialization / deserialization
 

+ 1 - 0
include/core.h

@@ -86,6 +86,7 @@ typedef LSUP_Key LSUP_QuadKey[4];
 
 typedef char uuid_str_t[UUIDSTR_SIZE];
 
+
 // Yes, a textbook mistake; but writing min and max for all int types is crazy.
 #define min(x, y) (x) < (y) ? (x) : (y)
 #define max(x, y) (x) > (y) ? (x) : (y)

+ 41 - 7
include/graph.h

@@ -1,8 +1,8 @@
 #ifndef _LSUP_GRAPH_H
 #define _LSUP_GRAPH_H
 
-#include "store_mdb.h"
-#include "store_htable.h"
+#include "triple.h"
+#include "namespace.h"
 
 /*
  * Define backend types and checks.
@@ -38,6 +38,11 @@ typedef struct GraphIterator LSUP_GraphIterator;
  * The new graph has zero capacity and a random URN. To change either one, use
  * #LSUP_graph_resize and #LSUP_graph_set_uri, respectively.
  *
+ * The graph is assigned a default (volatile) namespace map if it's in memory,
+ * hence all graphs share the same namespace map by default. To change this,
+ * use #LSUP_graph_set_namespace(). MDB graphs use a persistent namespace map
+ * that is common to all the graphs in the same store. This cannot be changed.
+ *
  * @param store_type[in] TYpe of store for the graph. One of the values of
  *  #LSUP_store_type.
  *
@@ -91,12 +96,14 @@ void
 LSUP_graph_free (LSUP_Graph *gr);
 
 
-/** @brief Number of triples in a graph.
+/** @brief Compare two graphs.
+ *
+ * @param[in] gr1 First operand.
+ *
+ * @param[in] gr2 Second operand.
+ *
+ * @return True if the graphs are topologically equal, false otherwise.
  */
-size_t
-LSUP_graph_size (const LSUP_Graph *gr);
-
-
 bool
 LSUP_graph_equals (const LSUP_Graph *gr1, const LSUP_Graph *gr2);
 
@@ -118,6 +125,33 @@ LSUP_graph_uri (const LSUP_Graph *gr);
 LSUP_rc
 LSUP_graph_set_uri (LSUP_Graph *gr, const char *uri);
 
+
+/** @brief Get the namespace map for an in-memory graph.
+ *
+ * @return Namespace handler for in-memory graphs, NULL for MDB graphs.
+ */
+LSUP_NSMap *
+LSUP_graph_namespace (LSUP_Graph *gr);
+
+
+/** @brief Set the namespace map for an in-memory graph.
+ *
+ * This has no effect on MDB graphs.
+ *
+ * @param[in] gr Graph to set the namespace map for.
+ *
+ * @param[in] nsm Namespace handle.
+ */
+void
+LSUP_graph_set_namespace (LSUP_Graph *gr, LSUP_NSMap *nsm);
+
+
+/** @brief Number of triples in a graph.
+ */
+size_t
+LSUP_graph_size (const LSUP_Graph *gr);
+
+
 bool
 LSUP_graph_contains (const LSUP_Graph *gr, const LSUP_Triple *spo);
 

+ 120 - 0
include/namespace.h

@@ -0,0 +1,120 @@
+#ifndef _LSUP_NAMESPACE_H
+#define _LSUP_NAMESPACE_H
+
+#include "core.h"
+
+/** @brief Namespace prefix length, including terminator.
+ */
+#define PFX_LEN 8
+
+
+/** @brief Namespace map structure.
+ *
+ * It contains a double hash map of pfx->ns and ns->pfx for fast 2-way lookup.
+ *
+ * Prefixes are fixed PFX_LEN-size strings, namespaces are arbitrary sized
+ * strings.
+ */
+typedef struct ns_map_t LSUP_NSMap;
+
+/** @brief Namespace prefix type.
+ */
+typedef char ns_pfx[PFX_LEN];
+
+
+/** @brief Create a new namespace map.
+ *
+ * @return A pointer to an empty map. It must be freed with #LSUP_nsmap_free().
+ */
+LSUP_NSMap *
+LSUP_nsmap_new (void);
+
+
+/** @brief Free a namespace map and its internal structures.
+ *
+ * @param[in] map The map to free.
+ */
+void
+LSUP_nsmap_free (LSUP_NSMap *map);
+
+
+/** @brief Add a prefix -> namespace pair to the map or update it.
+ *
+ * If the prefix already exists, it is updated with the new value, if
+ * different.
+ *
+ * @param[in] map The map to add to.
+ *
+ * @param[in] pfx The namespace prefix.
+ *
+ * @param[in] nsstr Fully qualified namespace.
+ *
+ * @return LSUP_OK if the record was added or replaced; LSUP_NOACTION if the
+ *  record already existed with the same value; LSUP_MEM_ERR if an allocation
+ *  error occurred.
+ */
+LSUP_rc
+LSUP_nsmap_add (LSUP_NSMap *map, const ns_pfx pfx, const char *nsstr);
+
+
+/** @brief Remove a prefix -> namespace pair from a map.
+ *
+ * @param[in] map The map to remove from.
+ *
+ * @param[in] pfx The namespace prefix to remove.
+ *
+ * @return LSUP_OK on successful delete; LSUP_NOACTION if no record was found.
+ */
+LSUP_rc
+LSUP_nsmap_remove (LSUP_NSMap *map, const ns_pfx pfx);
+
+
+/** @brief Get the namespace for a prefix.
+ *
+ * @param[in] map The map to look up the namespace in.
+ *
+ * @param[in] pfx The prefix to look up.
+ *
+ * @return A pointer to the namespace string. Note that this is not a copy and
+ *  should not be modified directly.
+ */
+const char *
+LSUP_nsmap_get (const LSUP_NSMap *map, const ns_pfx pfx);
+
+
+/** @brief Convert a FQ URI string to a prefixed string if the prefix is found.
+ *
+ * @param[in] map Namespace map to look up.
+ *
+ * @param[in] uri URI string to normalize.
+ *
+ * @param[out] String pointer to be filled with the prefixed URI. If the
+ *  namespace is not in the map or an error occurred, this will be NULL.
+ *  The caller is in charge of freeing the memory.
+ *
+ * @return LSUP_OK on success, LSUP_NORESULT if no entry was found in the map,
+ *  LSUP_MEM_ERR if a memory allocation error ocurred.
+ */
+LSUP_rc
+LSUP_nsmap_normalize_uri (
+        const LSUP_NSMap *map, const char *uri, char **pfx_uri);
+
+
+/** @brief Convert a namespace-prefixed string to a FQ URI sring if mapped.
+ *
+ * @param[in] map Namespace map to look up.
+ *
+ * @param[in] uri URI string to denormalize.
+ *
+ * @param[out] String pointer to be filled with the FQ URI. If the
+ *  namespace is not in the map or an error occurred, this will be NULL.
+ *  The caller is in charge of freeing the memory.
+ *
+ * @return LSUP_OK on success, LSUP_NORESULT if no entry was found in the map,
+ *  LSUP_MEM_ERR if a memory allocation error ocurred.
+ */
+LSUP_rc
+LSUP_nsmap_denormalize_uri (
+        const LSUP_NSMap *map, const char *pfx_uri, char **uri);
+
+#endif

+ 1 - 1
include/store_htable.h

@@ -45,7 +45,7 @@ LSUP_htstore_new (void);
  * #LSUP_htstore_free after use.
  */
 LSUP_HTStore *
-LSUP_htstore_bool_op(
+LSUP_htstore_bool_op (
         const LSUP_bool_op op, const LSUP_HTStore *s1, const LSUP_HTStore *s2);
 
 

+ 3 - 5
include/term.h

@@ -1,14 +1,12 @@
-#ifndef LSUP_TERM_H
-#define LSUP_TERM_H
+#ifndef _LSUP_TERM_H
+#define _LSUP_TERM_H
 
 #include <assert.h>
 #include <regex.h>
 
 #include "buffer.h"
+#include "namespace.h"
 
-// URI parsing regular expression. Conforms to RFC3986.
-#define URI_REGEX_STR \
-    "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
 #define LANG_SIZE 8 // Size in chars of lang tag
 
 // "NULL" triple, a value that is never user-provided. Used to fill deleted

+ 24 - 4
src/graph.c

@@ -1,3 +1,5 @@
+#include "store_htable.h"
+#include "store_mdb.h"
 #include "graph.h"
 
 // Initial size of lookup graph. It will double each time capacity is reached.
@@ -27,15 +29,17 @@ typedef enum KSetFlag {
 static const char *default_ctx_label = "urn:lsup:default";
 static LSUP_Buffer *default_ctx = NULL;
 static LSUP_MDBStore *default_store = NULL, *default_tmp_store = NULL;
+static LSUP_NSMap *default_nsm = NULL;
 
 
 typedef struct Graph {
-    LSUP_store_type         store_type;     // In-memory or MDB-backed
-    LSUP_Term               *uri;                 // Graph "name" (URI)
-    union {
+    LSUP_store_type         store_type;     // Back end type: in-memory or MDB.
+    LSUP_Term               *uri;           // Graph "name" (URI)
+    union {                                 // Back end, defined by store_type.
         LSUP_HTStore *      ht_store;
         LSUP_MDBStore *     mdb_store;
     };
+    LSUP_NSMap *        nsm;            // Namespace map.
 } Graph;
 
 
@@ -78,13 +82,14 @@ graph_iter_next_buffer (GraphIterator *it, LSUP_SerTriple *sspo);
 /* Atexit functions. */
 void ctx_cleanup()
 {
-/*@ @brief Close LMDB environment.
+/*@ @brief Close default LMDB environments, context, and namespace map.
  *
  * Run at exit.
  */
     LSUP_mdbstore_free (default_store);
     LSUP_mdbstore_free (default_tmp_store);
     LSUP_buffer_free (default_ctx);
+    LSUP_nsmap_free (default_nsm);
 }
 
 
@@ -122,10 +127,15 @@ LSUP_graph_new (const LSUP_store_type store_type)
         gr->ht_store = LSUP_htstore_new();
         if (UNLIKELY (!gr->ht_store)) return NULL;
 
+        if (!default_nsm) default_nsm = LSUP_nsmap_new();
+        gr->nsm = default_nsm;
+
     } else if (gr->store_type == LSUP_STORE_MDB) {
         gr->mdb_store = default_store;
         if (UNLIKELY (!gr->mdb_store)) return NULL;
 
+        gr->nsm = NULL;
+
     } else { // LSUP_STORE_MDB_TMP
         gr->mdb_store = default_tmp_store;
     }
@@ -231,6 +241,16 @@ LSUP_graph_set_uri (LSUP_Graph *gr, const char *uri)
 { return LSUP_uri_init (gr->uri, uri); }
 
 
+LSUP_NSMap *
+LSUP_graph_namespace (Graph *gr)
+{ return gr->nsm; }
+
+
+void
+LSUP_graph_set_namespace (Graph *gr, LSUP_NSMap *nsm)
+{ if (gr->store_type == LSUP_STORE_MEM) gr->nsm = nsm; }
+
+
 size_t
 LSUP_graph_size (const Graph *gr)
 {

+ 192 - 0
src/namespace.c

@@ -0,0 +1,192 @@
+#include "uthash.h"
+
+#include "namespace.h"
+
+typedef struct namespace_t {
+    ns_pfx              pfx;        // Namespace prefix.
+    char *              ns;         // Fully qualified NS.
+    UT_hash_handle      hh;         // UTHash handle.
+} Namespace;
+
+typedef struct namespace_index_t {
+    Namespace *         ns;         // Pointer to a NS struct.
+    UT_hash_handle      hh;         // UTHash handle.
+} NSIndex;
+
+typedef struct ns_map_t {
+    Namespace *         pn;         // Prefix to namespace.
+    NSIndex *           np;         // Namespace to prefix.
+} NSMap;
+
+
+NSMap *
+LSUP_nsmap_new (void)
+{
+    NSMap *map = malloc (sizeof (*map));
+    if (UNLIKELY (!map)) return NULL;
+
+    map->pn = NULL;
+    map->np = NULL;
+
+    return map;
+}
+
+
+void
+LSUP_nsmap_free (NSMap *map)
+{
+    if (UNLIKELY (!map)) return;
+
+    Namespace *entry, *tmp;
+
+    HASH_ITER (hh, map->pn, entry, tmp) {
+        HASH_DEL (map->pn, entry);
+        free (entry->ns);
+        free (entry);
+    }
+
+    NSIndex *idx_entry, *idx_tmp;
+
+    HASH_ITER (hh, map->np, idx_entry, idx_tmp) {
+        HASH_DEL (map->np, idx_entry);
+        free (idx_entry->ns);
+        free (idx_entry);
+    }
+
+    free (map);
+}
+
+
+LSUP_rc
+LSUP_nsmap_add (NSMap *map, const ns_pfx pfx, const char *nsstr)
+{
+    // Main entry (pn)
+
+    // Delete any found record.
+    // Main and index are deleted independently because the pair may be
+    // different.
+    Namespace *entry = NULL;
+    HASH_FIND_STR (map->pn, pfx, entry);
+    if (entry) {
+        HASH_DEL (map->pn, entry);
+        free (entry->ns);
+        free (entry);
+    }
+
+    // Add.
+    entry = malloc (sizeof (*entry));
+    if (UNLIKELY (!entry)) return LSUP_MEM_ERR;
+
+    entry->ns = strdup (nsstr);
+    strcpy (entry->pfx, pfx);
+
+    HASH_ADD_STR (map->pn, pfx, entry);
+
+    // Index.
+
+    // Delete any found record.
+    NSIndex *idx_entry = NULL;
+    HASH_FIND_STR (map->np, nsstr, idx_entry);
+    if (idx_entry) {
+        HASH_DEL (map->np, idx_entry);
+        free (idx_entry);
+    }
+
+    // Add.
+    idx_entry = malloc (sizeof (*idx_entry));
+    if (UNLIKELY (!entry)) return LSUP_MEM_ERR;
+
+    idx_entry->ns = entry;
+
+    HASH_ADD_KEYPTR (hh, map->np, entry->ns, strlen (nsstr), idx_entry);
+
+    return LSUP_OK;
+}
+
+
+LSUP_rc
+LSUP_nsmap_remove (NSMap *map, const ns_pfx pfx)
+{
+    Namespace *entry = NULL;
+    NSIndex *idx_entry = NULL;
+
+    HASH_FIND_STR (map->pn, pfx, entry);
+
+    if (entry) {
+        HASH_FIND_STR (map->np, entry->ns, idx_entry);
+
+        if (idx_entry) {
+            HASH_DEL (map->np, idx_entry);
+            free (idx_entry);
+        }
+
+        HASH_DEL (map->pn, entry);
+        free (entry->ns);
+        free (entry);
+
+        return LSUP_OK;
+    }
+
+    return LSUP_NOACTION;
+}
+
+
+const char *
+LSUP_nsmap_get (const NSMap *map, const ns_pfx pfx)
+{
+    Namespace *entry = NULL;
+    HASH_FIND_STR (map->pn, pfx, entry);
+
+    return (entry) ? entry->ns : NULL;
+}
+
+
+LSUP_rc
+LSUP_nsmap_normalize_uri (
+        const NSMap *map, const char *uri, char **pfx_uri)
+{
+    *pfx_uri = NULL;
+
+    NSIndex *entry;
+    HASH_FIND_STR (map->np, uri, entry);
+
+    if (entry) {
+        *pfx_uri = malloc (
+                strlen (entry->ns->pfx)
+                + strlen (uri) - strlen (entry->ns->ns)
+                + 2); // one for terminating \x00, one for the colon.
+        if (UNLIKELY (! (*pfx_uri))) return LSUP_MEM_ERR;
+
+        sprintf (
+                *pfx_uri, "%s:%s", 
+                entry->ns->pfx, uri + strlen (entry->ns->ns));
+
+        return LSUP_OK;
+
+    } else return LSUP_NORESULT;
+}
+
+
+LSUP_rc
+LSUP_nsmap_denormalize_uri (
+        const NSMap *map, const char *pfx_uri, char **uri)
+{
+    *uri = NULL;
+
+    size_t pfx_len = strcspn (pfx_uri, ":");
+    if (pfx_len >= PFX_LEN) pfx_len = PFX_LEN - 1;
+
+    ns_pfx pfx;
+    strncpy (pfx, pfx_uri, pfx_len);
+    pfx[pfx_len] = 0;
+
+    Namespace *entry;
+    HASH_FIND_STR (map->pn, pfx, entry);
+
+    if (entry)
+        *uri = malloc (strlen (entry->ns) + strlen (pfx_uri) - pfx_len - 1);
+
+    else strcpy (*uri, pfx_uri);
+
+    return LSUP_OK;
+}

+ 7 - 6
src/store_htable.c

@@ -1,5 +1,6 @@
 #include "uthash.h"
 
+#include "namespace.h"
 #include "store_htable.h"
 
 
@@ -11,14 +12,14 @@ typedef bool (*LSUP_key_eq_fn_t)(
 
 
 typedef struct triple_entry_t {
-    LSUP_TripleKey  key;
-    UT_hash_handle  hh;
+    LSUP_TripleKey      key;
+    UT_hash_handle      hh;
 } TripleEntry;
 
 typedef struct idx_entry_t {
-    LSUP_Key        key;
-    LSUP_Buffer *   sterm;
-    UT_hash_handle  hh;
+    LSUP_Key            key;
+    LSUP_Buffer *       sterm;
+    UT_hash_handle      hh;
 } IndexEntry;
 
 typedef struct ht_store_t {
@@ -43,7 +44,7 @@ typedef struct ht_iterator_t {
  *
  * Since the key is already a strong hash, reuse it for bucket allocation.
  */
-#define HASH_FUNCTION (s,len,hashv) (hashv) = (unsigned)(s)
+//#define HASH_FUNCTION (s,len,hashv) (hashv) = (unsigned)(s)
 
 /* * * CALLBACKS * * */
 

+ 1 - 1
src/store_mdb.c

@@ -112,7 +112,7 @@ typedef struct MDBIterator {
     ENTRY(  SO_P,    "so:p",    DUPFIXED_MASK   )   /* 2-bound lookup */    \
     ENTRY(  SP_O,    "sp:o",    DUPFIXED_MASK   )   /* 2-bound lookup */    \
     ENTRY(  C_SPO,   "c:spo",   DUPFIXED_MASK   )   /* Context lookup */    \
-    ENTRY(  NS_PFX,  "ns:pfx",  0               )   /* NS to prefix */      \
+    ENTRY(  NS_PFX,  "ns:pfx",  DUPSORT_MASK    )   /* NS to prefix */      \
 
 /**
  * DB labels. They are prefixed with DB_

+ 14 - 0
src/term.c

@@ -1,6 +1,11 @@
 #include "term.h"
 
+// URI parsing regular expression. Conforms to RFC3986.
+#define URI_REGEX_STR \
+    "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
 #define NLEN(str)   (str) == NULL ? 0 : strlen ((str))
+#define INVALID_URI_CHARS "<>\" {}|\\^`"
+
 
 static regex_t ptn;
 static bool ptn_init = false;
@@ -81,6 +86,15 @@ LSUP_term_init(
 
     // Validate URI.
     if (term->type == LSUP_TERM_URI) {
+        // TODO Cheap fix. Should url-encode all invalid chars.
+        if (strpbrk (data, INVALID_URI_CHARS) != NULL) {
+            fprintf (
+                    stderr, "Characters %s are not allowed.\n",
+                    INVALID_URI_CHARS);
+
+            return LSUP_VALUE_ERR;
+        }
+
         if (UNLIKELY (!ptn_init)) {
             int rc = regcomp (&ptn, URI_REGEX_STR, REG_EXTENDED);
             if (rc != 0) return LSUP_ERROR;

+ 2 - 0
test.c

@@ -1,4 +1,5 @@
 #include "test_term.c"
+#include "test_namespace.c"
 #include "test_store_ht.c"
 #include "test_store_mdb.c"
 #include "test_graph.c"
@@ -14,6 +15,7 @@ int main(int argc, char **argv) {
 
     if (
         term_tests() ||
+        namespace_tests() ||
         store_ht_tests() ||
         store_mdb_tests() ||
         graph_tests() ||

+ 3 - 1
test/test.h

@@ -36,7 +36,9 @@
 /** @brief Expect that two string values are equal.
  */
 #define EXPECT_STR_EQ(got, exp) do { \
-    if (strcmp((exp), (got)) != 0) {\
+    const char *_str1 = (exp); \
+    const char *_str2 = (got); \
+    if (((_str1 == NULL) ^ (_str2 == NULL)) || strcmp(_str1, _str2) != 0) {\
         fprintf(\
                 stderr, "!!! Test failed at %s:%d. Expected: %s; got: %s\n", \
                 __FILE__, __LINE__, (exp), (got)); \

+ 37 - 0
test/test_namespace.c

@@ -0,0 +1,37 @@
+#include "test.h"
+#include "namespace.h"
+
+static int
+test_namespace()
+{
+    LSUP_NSMap *nsm = LSUP_nsmap_new();
+    ASSERT (nsm != NULL, "Error creating namespace map!");
+
+    EXPECT_PASS (
+            LSUP_nsmap_add (nsm, "dc", "http://purl.org/dc/elements/1.1/"));
+    EXPECT_PASS (LSUP_nsmap_add (nsm, "dcterms", "http://purl.org/dc/terms/"));
+
+    EXPECT_STR_EQ (
+            LSUP_nsmap_get (nsm, "dc"), "http://purl.org/dc/elements/1.1/");
+    EXPECT_STR_EQ (
+            LSUP_nsmap_get (nsm, "dcterms"), "http://purl.org/dc/terms/");
+    // Prefixes longer than 7 chars are truncated.
+    ASSERT (
+            LSUP_nsmap_get (nsm, "dctermsxxx") == NULL,
+            "Non-existent NS found!");
+    ASSERT (LSUP_nsmap_get (nsm, "none") == NULL, "Non-existent NS found!");
+
+    EXPECT_PASS (LSUP_nsmap_remove (nsm, "dc"));
+    ASSERT (
+            LSUP_nsmap_remove (nsm, "none") == LSUP_NOACTION,
+            "Wrong result for removal of non-existent prefix!");
+    ASSERT (LSUP_nsmap_get (nsm, "dc") == NULL, "Deleted NS found!");
+
+    return 0;
+}
+
+int namespace_tests()
+{
+    RUN (test_namespace);
+    return 0;
+}