Преглед изворни кода

Term reform:

* NS-mapped IRI;
* Store IRI ref substrings;
* Literal datatype is a Term* struct;
* Shortcuts to create specific term types;
* Compare terms by their hashes;
* Semantically equal terms serialize identically;
* Graph can be assigned a IRI on creation;
* Tests pass; memory errors are not fixed.
Stefano Cossu пре 3 година
родитељ
комит
97a0c1442c
21 измењених фајлова са 703 додато и 286 уклоњено
  1. 19 10
      Makefile
  2. 14 2
      cpython/py_graph.h
  3. 2 2
      cpython/py_term.h
  4. 2 2
      include/core.h
  5. 0 57
      include/data/bootstrap.h
  6. 7 8
      include/graph.h
  7. 148 31
      include/term.h
  8. 2 0
      src/buffer.c
  9. 11 3
      src/codec/nt_lexer.re
  10. 2 2
      src/codec_nt.c
  11. 13 2
      src/environment.c
  12. 29 19
      src/graph.c
  13. 276 60
      src/term.c
  14. 2 3
      test.c
  15. 28 23
      test/assets/triples.h
  16. 0 0
      test/keyset.c
  17. 4 4
      test/test.h
  18. 13 13
      test/test_codec_nt.c
  19. 7 6
      test/test_graph.c
  20. 3 3
      test/test_store_mdb.c
  21. 121 36
      test/test_term.c

+ 19 - 10
Makefile

@@ -19,10 +19,11 @@ INCLUDE_BASE = . -Iinclude -Iext/xxHash -Iext/openldap/libraries/liblmdb \
 	-Iext/tpl/src -Iext/uthash/src -Iext/log/src
 INCLUDE = -I$(INCLUDE_BASE)
 CFLAGS += -Wall -fPIC -MMD -DLOG_USE_COLOR $(INCLUDE)
+TEST_CFLAGS = -Itest -O0 -g3 -DDEBUG
 # NOTE: -luuid is a Linux system library. Other OS's might need a different
 # link or a non-system library built.
 LDFLAGS = -Lext/openldap/libraries/liblmdb -Lext/xxHash \
-		  -llmdb -lxxhash -luuid -pthread
+		  -llmdb -lxxhash -luuid
 
 CODEC_DIR = src/codec
 #CODEC_SRC = $(wildcard src/codec/*_parser.c)
@@ -34,6 +35,11 @@ CODEC_OBJ = $(CODEC_SRC:.c=.o)
 EXT_SRC = $(wildcard ext/log/src/*.c) \
 	  	  $(wildcard ext/tpl/src/*.c)
 
+# External headers of libraries compiled in core.
+EXT_H = $(wildcard ext/log/src/*.h) \
+	  	$(wildcard ext/tpl/src/*.h) \
+	  	$(wildcard ext/uthash/src/*.h)
+
 SRC = $(EXT_SRC) $(wildcard src/*.c)
 #CODEC_OBJ = $(wildcard src/codec/*.o)
 OBJ = $(SRC:.c=.o) $(CODEC_OBJ)
@@ -69,11 +75,9 @@ $(CODEC_OBJ): $(CODEC_SRC)
 
 
 # Build all external dependencies in core object.
-core.o: core.c $(EXT_SRC) \
-		$(wildcard ext/tpl/*.c) $(wildcard ext/tpl/*.h)
+core.o: core.c $(EXT_SRC) $(EXT_H)
 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
 
-
 # Ext libraries compiled as shared objects.
 
 .PHONY: libxxhash
@@ -88,7 +92,7 @@ install: all
 	mkdir -p $(DESTDIR)$(libdir)
 	mkdir -p $(DESTDIR)$(includedir)
 	cp liblsuprdf.* $(DESTDIR)$(libdir) && \
-		cp include/*.h $(DESTDIR)$(includedir)
+		cp include/*.h $(EXT_H) $(DESTDIR)$(includedir)
 	
 .PHONY: clean
 clean:
@@ -99,14 +103,19 @@ uninstall:
 	rm -f $(DESTDIR)$(libdir)/liblsuprdf.*
 	rm -rf $(DESTDIR)$(includedir)
 
-.PHONY: test
-test: all $(TEST_SRC)
+bin/test: $(OBJ) $(TEST_SRC) $(DEPLIBS)
 	$(CC) \
-		$(CFLAGS) -g3 -DDEBUG \
-		$(INCLUDE) -Itest \
-		test.c -L. $(LDFLAGS) -llsuprdf \
+		$(CFLAGS) $(TEST_CFLAGS) $(SRC) $(CODEC_SRC) test.c -L. $(LDFLAGS) \
 		-o bin/test
 
+.PHONY: debug
+debug: bin/test
+	exec gdb bin/test
+
+.PHONY: test
+test: bin/test
+	exec bin/test
+
 lint:
 	splint \
 		$(INCLUDE) -Itest \

+ 14 - 2
cpython/py_graph.h

@@ -136,11 +136,23 @@ static int
 Graph_init (GraphObject *self, PyObject *args, PyObject *kwargs)
 {
     unsigned char store_type;
+    PyObject *uri_obj;
+    LSUP_Term *uri;
 
-    if (!PyArg_ParseTuple (args, "b", &store_type))
+    static char *kwlist[] = {"", "uri", NULL};
+
+    if (!PyArg_ParseTupleAndKeywords (
+            args, kwargs, "b|O", kwlist, &store_type, &uri_obj))
         return -1;
 
-    self->ob_struct = LSUP_graph_new ((LSUP_store_type)store_type);
+    if (uri_obj)
+        if (!PyObject_TypeCheck (uri, &TermType)) {
+            PyErr_SetString (PyExc_TypeError, "uri is not a Term type.");
+            return -1;
+        }
+    } else uri = LSUP_iriref_new (NULL, NULL);
+
+    self->ob_struct = LSUP_graph_new (uri, (LSUP_store_type) store_type);
     if (!self->ob_struct) {
         PyErr_SetString (PyExc_ValueError, "Could not create graph.");
         return -1;

+ 2 - 2
cpython/py_term.h

@@ -59,8 +59,8 @@ Term_iriref_init (TermObject *self, PyObject *args, PyObject *kwargs)
         return -1;
     }
 
-    // TODO Add nsm parameter.
-    self->ob_struct = LSUP_uri_new (data);
+    self->ob_struct = LSUP_iriref_new (
+            data, (nsm ? nsm->ob_struct : NULL));
     if (!self->ob_struct) {
         PyErr_SetString (PyExc_ValueError, "Could not create term.");
         return -1;

+ 2 - 2
include/core.h

@@ -43,8 +43,8 @@
 
 # define UUIDSTR_SIZE 37
 
-// "NULL" triple, a value that is never user-provided. Used to fill deleted
-// triples in a keyset.
+/** @brief "NULL" triple, a value that is never user-provided.
+ */
 #define NULL_TRP {NULL_KEY, NULL_KEY, NULL_KEY}
 
 

+ 0 - 57
include/data/bootstrap.h

@@ -14,61 +14,4 @@ const char *init_nsmap[][2] = {
     {NULL}
 };
 
-/** @brief Initial data types loaded into the environment.
- *
- * For XSD type reference see http://www.w3.org/TR/xmlschema11-2/
- */
-const char *init_datatypes[] = {
-    /* XSD primitive types. */
-    "xsd:string",
-    "xsd:boolean",
-    "xsd:decimal",
-    "xsd:float",
-    "xsd:double",
-    "xsd:duration",
-    "xsd:dateTime",
-    "xsd:time",
-    "xsd:date",
-    "xsd:gYearMonth",
-    "xsd:gYear",
-    "xsd:gMonthDay",
-    "xsd:gDay",
-    "xsd:gMonth",
-    "xsd:hexBinary",
-    "xsd:base64Binary",
-    "xsd:anyURI",
-    "xsd:QName",
-    "xsd:NOTATION",
-
-    /* Other ordinary (constructed) data types. Not compiled by default. */
-#ifdef XSD_CONSTRUCTED_DATATYPES
-    "xsd:normalizedString",
-    "xsd:token",
-    "xsd:language",
-    "xsd:NMTOKEN",
-    "xsd:NMTOKENS",
-    "xsd:Name",
-    "xsd:NCName",
-    "xsd:ID",
-    "xsd:IDREF",
-    "xsd:IDREFS",
-    "xsd:ENTITY",
-    "xsd:ENTITIES",
-    "xsd:integer",
-    "xsd:nonPositiveInteger",
-    "xsd:negativeInteger",
-    "xsd:long",
-    "xsd:int",
-    "xsd:short",
-    "xsd:byte",
-    "xsd:nonNegativeInteger",
-    "xsd:unsignedLong",
-    "xsd:unsignedInt",
-    "xsd:unsignedShort",
-    "xsd:unsignedByte",
-    "xsd:positiveInteger",
-#endif  /* XSD_CONSTRUCTED_DATATYPES */
-    NULL
-};
-
 #endif /* LSUP_INIT_DATA_H */

+ 7 - 8
include/graph.h

@@ -47,20 +47,19 @@ typedef struct GraphIterator LSUP_GraphIterator;
  * @param store_type[in] Type of store for the graph. One of the values of
  *  #LSUP_store_type.
  *
- * @param gr[out] Pointer to a pointer to the new graph. It must be freed with
- *  #LSUP_graph_free when done.
- *
  * @return LSUP_OK if the graph was created, or < 0 if an error occurred.
  */
 LSUP_Graph *
-LSUP_graph_new_env (const LSUP_Env *env, const LSUP_store_type store_type);
+LSUP_graph_new_env (
+        const LSUP_Env *env, LSUP_Term *uri, const LSUP_store_type store_type);
 
 
 /** @brief Create an empty graph with the default environment.
  *
  * This is likely to be used more often than #LSUP_graph_new_env().
  */
-#define LSUP_graph_new(type) LSUP_graph_new_env (LSUP_default_env, type)
+#define LSUP_graph_new(uri, type) \
+    LSUP_graph_new_env (LSUP_default_env, uri, type)
 
 
 /** @brief Create an array of graph from triples matching a pattern in a store.
@@ -184,14 +183,14 @@ LSUP_graph_uri (const LSUP_Graph *gr);
  * the graph URI represents. A non-context graph retains the same triple set
  * when graph URI changes.
  *
- * @param gr[in] Graph whose URI is to be changed.
+ * @param gr[in] Graph handle.
  *
- * @param uri[in] New URI as a string. If NULL, a UUID4 URN is generated.
+ * @param uri[in] IRI handle. It is freed together with the graph.
  *
  * @return LSUP_OK on success; <0 on error.
  */
 LSUP_rc
-LSUP_graph_set_uri (LSUP_Graph *gr, const char *uri);
+LSUP_graph_set_uri (LSUP_Graph *gr, LSUP_Term *uri);
 
 
 /** @brief Get the namespace map for an in-memory graph.

+ 148 - 31
include/term.h

@@ -31,10 +31,26 @@
  */
 #define DEFAULT_DTYPE           "http://www.w3.org/2001/XMLSchema#string"
 
-/** @brief URI parsing regular expression. Conforms to RFC3986.
+/** @brief URI parsing regular expression.
+ *
+ * Based on RFC3986 (see https://tools.ietf.org/html/rfc3986#appendix-B) and
+ * modified for use in this application. Relevant matching groups are the
+ * following, for a sample URI `http://example.org/123/456/?query=blah#frag`:
+ *
+ * #0:  Full parsed URI (http://example.org/123/456/?query=blah#frag)
+ * #1:  Domain prefix (http://example.org)
+ * #2:  Protocol (http:)
+ * #4:  Authority (example.org)
+ * #5:  Path relative to domain (/123/456/?query=blah#frag)
+ * #6:  Path, excluding query and fragment (/123/456/)
+ * #8:  Query (query=blah)
+ * #10: Fragment (frag)
+ *
+ * For URN-like URIs, such as `urn:s:0`, the prefix part (#1) is `urn:` and
+ * the path (#4) is `s:0`.
  */
 #define LSUP_URI_REGEX_STR \
-    "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
+    "^(([^:/?#]+:)?(//([^/?#]*))?)?(([^?#]*)(\\?([^#]*))?(#(.*))?)"
 
 
 /*
@@ -45,19 +61,35 @@ typedef XXH64_hash_t LSUP_Hash64;
 typedef char LSUP_TermType;
 typedef char LSUP_LangTag[8];
 
+/** @brief IRI information.
+ *
+ * See regex matching group for #LSUP_URI_REGEX_STR for more information.
+ */
+typedef struct iri_info_t LSUP_IRIInfo;
 
 typedef struct term_t {
     char *              data;       // URI, literal value, or BNode label.
     union {
-        LSUP_Key        datatype;   // Data type key for LSUP_TERM_LITERAL.
+        struct term_t * datatype;   // Data type IRI for LSUP_TERM_LITERAL.
         LSUP_LangTag    lang;       // Lang tag for LSUP_TERM_LT_LITERAL.
         LSUP_Key        bnode_id;   // BNode ID for comparison & skolemization.
-        LSUP_NSMap *    nsm;        // NSM handle for prefixed IRI.
+        LSUP_IRIInfo *  iri_info;   // IRI information structure.
     };
-    LSUP_TermType      type;       // Term type.
+    LSUP_TermType       type;       // Term type.
 } LSUP_Term;
 
 
+/** @brief Shorthand to test if a term is a IRI of any kind.
+ */
+#define LSUP_IS_IRI(term) \
+    (term->type == LSUP_TERM_IRIREF || term->type == LSUP_TERM_NS_IRIREF)
+
+/** @brief Shorthand to test if a term is a literal of any kind.
+ */
+#define LSUP_IS_LITERAL(term) \
+    term->type == LSUP_TERM_LITERAL || term->type == LSUP_TERM_LT_LITERAL
+
+
 /** @brief Hash cache for data types.
  */
 struct term_cache_t {
@@ -105,14 +137,19 @@ extern LSUP_Term *LSUP_default_datatype;
  */
 
 /** @brief Create a new term.
+ *
+ * This is a generic function; it is recommended to use specialized functions
+ * such as #LSUP_term_new(), #LSUP_literal_new(), etc. as they have strict type
+ * checks for the metadata parameter.
  *
  * @param type[in] Term type. One of #LSUP_TermType.
  *
  * @param data[in] Term data: textual URI, literal value without data type
  *  or langtag, etc.
  *
- * @param metadata[in]: language tag (LSUP_LangTag) for language-tagged
- * literals; or data type (LSUP_Term *) for other literals. It may be NULL.
+ * @param metadata[in] Namespace map (LSUP_NSMap *) for IRI refs; language tag
+ * (LSUP_LangTag *) for language-tagged literals; or data type (LSUP_Term *)
+ * for other literals. It may be NULL.
  *
  * @return New term, which must be freed with #LSUP_term_free after use; or
  *  NULL on error.
@@ -126,33 +163,69 @@ LSUP_term_new (LSUP_TermType type, const char *data, void *metadata);
 #define TERM_DUMMY LSUP_term_new (LSUP_TERM_UNDEFINED, NULL, NULL)
 
 
-/** @brief Shortcut to create a URI.
+/** @brief Shortcut to create an IRI reference.
  *
  * Must be freed with #LSUP_term_free.
  *
  * @param data[in] The URI string. If NULL, a UUID4-based URN is generated.
+ *  This cannot be NULL if the nsm parameter is not NULL.
  *
- * @param uri[out] The URI to be created.
+ * @param nsm[in] Namespace map. If not NULL, a namespace-prefixed
+ *  (#LSUP_TERM_NS_IRIREF) is created, otherwise a regular one
+ *  (#LSUP_TERM_IRIREF).
  *
- * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails.
+ * @return same as #LSUP_term_new().
  */
 inline LSUP_Term *
-LSUP_uri_new (const char *data)
+LSUP_iriref_new (const char *data, LSUP_NSMap *nsm)
 {
-    return LSUP_term_new (LSUP_TERM_IRIREF, data, NULL);
+    return (
+            nsm ? LSUP_term_new (LSUP_TERM_NS_IRIREF, data, nsm) :
+            LSUP_term_new (LSUP_TERM_IRIREF, data, NULL));
 }
 
+/** @brief Shortcut to create a literal term.
+ *
+ * Must be freed with #LSUP_term_free.
+ *
+ * @param data[in] The literal string.
+ *
+ * @param datatype[in] Data type URI string. If NULL, the default data type
+ * (xsd:string) is used.
+ *
+ * @return same as #LSUP_term_new().
+ */
+inline LSUP_Term *
+LSUP_literal_new (const char *data, LSUP_Term *datatype)
+{ return LSUP_term_new (LSUP_TERM_LITERAL, data, datatype); }
 
-/* @brief Initialize or reuse a pre-allocated term structure.
+
+/** @brief Shortcut to create a language-tagged literal term.
+ *
+ * Must be freed with #LSUP_term_free.
+ *
+ * @param data[in] The literal string.
  *
- * The structure must have been previously created with #LSUP_term_new. It can
- * be reinitialized multiple times without freeing it. It must be eventually
- * freed with #LSUP_term_free.
+ * @param lang[in] Language tag string.
+ *
+ * @return same as #LSUP_term_new().
  */
-LSUP_rc
-LSUP_term_init(
-        LSUP_Term *term, LSUP_TermType type,
-        const char *data, void *metadata);
+inline LSUP_Term *
+LSUP_lt_literal_new (const char *data, char *lang)
+{ return LSUP_term_new (LSUP_TERM_LT_LITERAL, data, lang); }
+
+
+/** @brief Shortcut to create a blank node.
+ *
+ * Must be freed with #LSUP_term_free.
+ *
+ * @param data[in] The BNode identifier.
+ *
+ * @return same as #LSUP_term_new().
+ */
+inline LSUP_Term *
+LSUP_bnode_new (const char *data)
+{ return LSUP_term_new (LSUP_TERM_BNODE, data, NULL); }
 
 
 /** @brief Deserialize a buffer into a term.
@@ -176,24 +249,20 @@ LSUP_Buffer *
 LSUP_term_serialize (const LSUP_Term *term);
 
 
-/**
- * @brief Shortcut to initialize a URI.
- */
-inline LSUP_rc
-LSUP_uri_init (LSUP_Term *term, const char *data)
-{ return LSUP_term_init (term, LSUP_TERM_IRIREF, data, NULL); }
-
-
 /** @brief Hash a buffer.
  */
 LSUP_Key
 LSUP_term_hash (const LSUP_Term *term);
 
 
-/**
- * Compare two terms.
+/** @brief Compare two terms.
+ *
+ * The terms evaluate as equal if their hashes are equal—i.e. if they are
+ * semantically equivalent.
  */
-bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2);
+inline bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2)
+{ return LSUP_term_hash (term1) == LSUP_term_hash (term2); }
+
 
 void
 LSUP_term_done (LSUP_Term *term);
@@ -202,6 +271,54 @@ void
 LSUP_term_free (LSUP_Term *term);
 
 
+/** @brief Namespace map of a IRI ref.
+ *
+ * @param[in] iri IRI reference handle.
+ *
+ * @return A pointer to the namespace map assiciated with the IRI. It is
+ *  freed at program shutdown.
+ */
+LSUP_NSMap *
+LSUP_iriref_nsm (LSUP_Term *iri);
+
+
+/** @brief Get the prefix portion of a IRI ref.
+ *
+ * @param[in] iri IRI reference handle.
+ *
+ * @return String containing the protocol and domain name part of the IRI. It
+ *  should be freed after use.
+ */
+char *
+LSUP_iriref_prefix (LSUP_Term *iri);
+
+
+/** @brief Get the path portion of a IRI ref.
+ *
+ * @param[in] iri IRI reference handle.
+ *
+ * @return String containing the path of the IRI relative to the web root. For
+ * a URN, such as `urn:myns:myid`, it would be `myns:myid`. This string should
+ * be freed after use.
+ */
+char *
+LSUP_iriref_path (LSUP_Term *iri);
+
+
+/** @brief Get the fragment portion of a IRI ref.
+ *
+ * @param[in] iri IRI reference handle.
+ *
+ * @return String containing the fragment part of the IRI, or NULL if the IRI
+ * contains no fragment. It should be freed after use.
+ */
+char *
+LSUP_iriref_frag (LSUP_Term *iri);
+
+/*
+ * TRIPLES
+ */
+
 /** @brief Create a new triple from three terms.
  *
  * TODO Term types are not validated at the moment.

+ 2 - 0
src/buffer.c

@@ -53,6 +53,8 @@ LSUP_buffer_as_str (const LSUP_Buffer *buf)
         else str_size += 4; // 4 characters for ASCII representation (\xNN).
     }
 
+    log_trace ("Byte buffer str size: %lu", str_size);
+
     char *cstr = malloc (str_size);
 
     size_t cur = 0; // Position in target string.

+ 11 - 3
src/codec/nt_lexer.re

@@ -208,7 +208,7 @@ loop:
 
         log_debug ("URI data: %s", data);
 
-        *term = LSUP_uri_new ((char*)data);
+        *term = LSUP_iriref_new ((char*)data, NULL);
         free (data);
 
         return T_IRIREF;
@@ -240,7 +240,14 @@ loop:
             log_trace ("metadata: %s", metadata);
         }
 
-        *term = LSUP_term_new (type, (char *) data, (char *) metadata);
+        if (type == LSUP_TERM_LITERAL) {
+            LSUP_Term *dtype;
+            dtype = (
+                metadata ? LSUP_iriref_new ((char *) metadata, NULL) : NULL);
+
+            *term = LSUP_literal_new ((char *) data, dtype);
+
+        } else *term = LSUP_lt_literal_new ((char *) data, (char *) metadata);
 
         free (data);
         free (metadata);
@@ -330,7 +337,8 @@ LSUP_nt_parse_doc (FILE *fh, LSUP_Graph **gr_p, size_t *ct, char **err_p)
 
     LSUP_rc rc;
 
-    LSUP_Graph *gr = LSUP_graph_new (LSUP_STORE_MEM);
+    LSUP_Graph *gr = LSUP_graph_new (
+            LSUP_iriref_new (NULL, NULL), LSUP_STORE_MEM);
     if (UNLIKELY (!gr)) return LSUP_MEM_ERR;
 
     LSUP_GraphIterator *it = LSUP_graph_add_init (gr);

+ 2 - 2
src/codec_nt.c

@@ -50,9 +50,9 @@ term_to_nt (const LSUP_Term *term, const LSUP_NSMap *nsm, char **out_p)
 
             if (
                 term->datatype != 0
-                && term->datatype != LSUP_default_dtype_key
+                && term->datatype != LSUP_default_datatype
             ) {
-                metadata = LSUP_tcache_get (term->datatype)->data;
+                metadata = term->datatype->data;
                 buf_len += strlen (metadata) + 4; // Room for ^^<>
             }
 

+ 13 - 2
src/environment.c

@@ -24,7 +24,7 @@ LSUP_env_new (
     CALLOC_GUARD (env, NULL);
 
     // Default store context.
-    LSUP_Term *default_ctx_uri = LSUP_uri_new (default_ctx);
+    LSUP_Term *default_ctx_uri = LSUP_iriref_new (default_ctx, NULL);
     env->default_ctx = LSUP_term_serialize (default_ctx_uri);
     LSUP_term_free (default_ctx_uri);
     log_info ("Set up default context.");
@@ -59,13 +59,24 @@ LSUP_init (void)
 
         // URI validation pattern.
         MALLOC_GUARD (LSUP_uri_ptn, LSUP_MEM_ERR);
+        /* Uncomment in case a change in the URI regex results in an error.
+        int regex_rc = regcomp (LSUP_uri_ptn, LSUP_URI_REGEX_STR, REG_EXTENDED);
+        if (regex_rc != 0) {
+            char err_msg[128];
+            size_t err_msg_sz = regerror (regex_rc, LSUP_uri_ptn, err_msg, 128);
+            log_error (
+                    "Error compiling regular expression pattern: %s.",
+                    err_msg);
+            return LSUP_ERROR;
+        }
+        */
         if (regcomp (LSUP_uri_ptn, LSUP_URI_REGEX_STR, REG_EXTENDED) != 0) {
             log_error ("Error compiling regular expression pattern.");
             return LSUP_ERROR;
         }
 
         // Default literal datatype key.
-        LSUP_default_datatype = LSUP_uri_new (DEFAULT_DTYPE);
+        LSUP_default_datatype = LSUP_iriref_new (DEFAULT_DTYPE, NULL);
         LSUP_default_dtype_key  = LSUP_term_hash (LSUP_default_datatype);
         LSUP_tcache_add (LSUP_default_dtype_key, LSUP_default_datatype);
 

+ 29 - 19
src/graph.c

@@ -55,7 +55,8 @@ check_backend (LSUP_store_type be)
  */
 
 Graph *
-LSUP_graph_new_env (const LSUP_Env *env, const LSUP_store_type store_type)
+LSUP_graph_new_env (
+        const LSUP_Env *env, LSUP_Term *uri, const LSUP_store_type store_type)
 {
     if (UNLIKELY (!env)) {
         log_error ("No valid environment passed. Did you call LSUP_init()?");
@@ -66,7 +67,7 @@ LSUP_graph_new_env (const LSUP_Env *env, const LSUP_store_type store_type)
     LSUP_Graph *gr;
     MALLOC_GUARD (gr, NULL);
 
-    gr->uri = LSUP_uri_new (NULL);
+    gr->uri = uri;
     gr->store_type = store_type;
     gr->env = env;
     gr->nsm = env->nsm;
@@ -115,7 +116,8 @@ LSUP_graph_new_lookup_env (
     if (UNLIKELY (!gr_a)) return NULL;
 
     for (i = 0; ctx_a[i] != NULL; i++) {
-        gr_a[i] = LSUP_graph_new (LSUP_STORE_MDB);
+        gr_a[i] = LSUP_graph_new (
+                LSUP_iriref_new (NULL, NULL), LSUP_STORE_MDB);
         LSUP_Term *uri = LSUP_term_new_from_buffer (ctx_a[i]);
         gr_a[i]->uri = uri;
         LSUP_buffer_free (ctx_a[i]);
@@ -129,7 +131,8 @@ LSUP_graph_new_lookup_env (
 LSUP_Graph *
 LSUP_graph_copy (const Graph *src)
 {
-    LSUP_Graph *dest = LSUP_graph_new_env (src->env, src->store_type);
+    LSUP_Graph *dest = LSUP_graph_new_env (
+            src->env, LSUP_iriref_new (NULL, NULL), src->store_type);
     if (UNLIKELY (!dest)) return NULL;
 
     LSUP_rc rc = graph_copy_contents (src, dest);
@@ -147,16 +150,11 @@ LSUP_graph_store (
     if (src->store_type == LSUP_STORE_MDB && src->env == env)
         return LSUP_NOACTION;
 
-    LSUP_Graph *dest = LSUP_graph_new_env (env, LSUP_STORE_MDB);
+    LSUP_Graph *dest = LSUP_graph_new_env (
+            env, LSUP_iriref_new (src->uri->data, LSUP_iriref_nsm (src->uri)),
+            LSUP_STORE_MDB);
     if (UNLIKELY (!dest)) return LSUP_DB_ERR;
 
-    char *uri_str = strdup(src->uri->data);
-    if (UNLIKELY (!uri_str)) return LSUP_MEM_ERR;
-
-    LSUP_term_done (dest->uri);
-    LSUP_uri_init (dest->uri, uri_str);
-    free (uri_str);
-
     LSUP_rc rc;
     rc = graph_copy_contents (src, dest);
     if (UNLIKELY (rc < 0)) return LSUP_DB_ERR;
@@ -194,7 +192,8 @@ LSUP_graph_bool_op(
         return NULL;
     }
 
-    LSUP_Graph *res = LSUP_graph_new (LSUP_STORE_MEM);
+    LSUP_Graph *res = LSUP_graph_new (
+            LSUP_iriref_new (NULL, NULL), LSUP_STORE_MEM);
     res->ht_store = LSUP_htstore_bool_op (op, gr1->ht_store, gr2->ht_store);
 
     return res;
@@ -220,8 +219,17 @@ LSUP_graph_uri (const LSUP_Graph *gr) { return gr->uri; }
 
 
 LSUP_rc
-LSUP_graph_set_uri (LSUP_Graph *gr, const char *uri)
-{ return LSUP_uri_init (gr->uri, uri); }
+LSUP_graph_set_uri (LSUP_Graph *gr, LSUP_Term *uri)
+{
+    if (!LSUP_IS_IRI (uri)) {
+        log_error ("Term provided is not a IRI.");
+        return LSUP_VALUE_ERR;
+    }
+
+    gr->uri = uri;
+
+    return LSUP_OK;
+}
 
 
 LSUP_NSMap *
@@ -313,10 +321,9 @@ LSUP_graph_add_iter (LSUP_GraphIterator *it, const LSUP_Triple *spo)
             if (
                 term->type == LSUP_TERM_LITERAL
                 && !LSUP_mdbstore_tkey_exists (
-                        it->graph->mdb_store, term->datatype)
+                        it->graph->mdb_store, LSUP_term_hash (term->datatype))
             ) {
-                LSUP_Buffer *ser_dtype = LSUP_term_serialize (
-                        LSUP_tcache_get (term->datatype));
+                LSUP_Buffer *ser_dtype = LSUP_term_serialize (term->datatype);
                 LSUP_mdbstore_add_term (it->graph->mdb_store, ser_dtype);
                 LSUP_buffer_free (ser_dtype);
             }
@@ -543,7 +550,10 @@ graph_copy_contents (const LSUP_Graph *src, LSUP_Graph *dest)
         LSUP_rc add_rc = LSUP_graph_add_iter (add_it, &spo);
         LSUP_triple_done (&spo);
         if (LIKELY (add_rc == LSUP_OK)) rc = LSUP_OK;
-        else if (add_rc < 0) return add_rc;
+        else if (add_rc < 0) {
+            rc = add_rc;
+            break;
+        }
     }
 
     LSUP_graph_add_done (add_it);

+ 276 - 60
src/term.c

@@ -2,13 +2,28 @@
 
 #include "term.h"
 
-/*
- * tpl packing format for the term structure.
+/** @brief tpl packing format for a term.
+ *
+ * The pack elements are: 1. term type (char); 2. data (string); 3. void* type
+ * metadata, cast to 8-byte unsigned.
  */
-#define TERM_PACK_FMT "S(sUc)"
+#define TERM_PACK_FMT "csU"
 
 #define MAX_VALID_TERM_TYPE     LSUP_TERM_BNODE /* For type validation. */
 
+
+/*
+ * Data structures.
+ */
+
+struct iri_info_t {
+    LSUP_NSMap *        nsm;        // NSM handle for prefixed IRI.
+    regmatch_t          prefix;     // Matching group #1.
+    regmatch_t          path;       // Matching group #5.
+    regmatch_t          frag;       // Matching group #10.
+};
+
+
 /*
  * Extern variables.
  */
@@ -27,6 +42,15 @@ LSUP_Term *LSUP_default_datatype = NULL;
 static const char *invalid_uri_chars = "<>\" {}|\\^`";
 
 
+/*
+ * Static prototypes.
+ */
+
+LSUP_rc
+term_init(
+        LSUP_Term *term, LSUP_TermType type, const char *data, void *metadata);
+
+
 /*
  * Term API.
  */
@@ -41,7 +65,7 @@ LSUP_term_new (
     // If undefined, just set the type.
     if (type == LSUP_TERM_UNDEFINED) term->type = type;
 
-    else if (UNLIKELY (LSUP_term_init (
+    else if (UNLIKELY (term_init (
                     term, type, data, metadata) != LSUP_OK)) {
         free (term);
         return NULL;
@@ -56,43 +80,90 @@ LSUP_term_new_from_buffer (const LSUP_Buffer *sterm)
 {
     if (UNLIKELY (!sterm)) return NULL;
 
-    LSUP_Term *term;
-    MALLOC_GUARD (term, NULL);
+    LSUP_Term *term = NULL;
+    LSUP_TermType type;
+    char *data = NULL;
+    void *metadata;
 
     tpl_node *tn;
 
-    tn = tpl_map (TERM_PACK_FMT, term);
-    if (UNLIKELY (!tn)) goto fail;
+    tn = tpl_map (TERM_PACK_FMT, &type, &data, &metadata);
+    if (UNLIKELY (!tn)) goto finally;
 
-    if (UNLIKELY (tpl_load (tn, TPL_MEM, sterm->addr, sterm->size) < 0))
-        goto fail;
+    if (UNLIKELY (tpl_load (tn, TPL_MEM, sterm->addr, sterm->size) < 0)) {
+        log_error ("Error loading serialized term.");
+        goto finally;
+    }
+    if (UNLIKELY (tpl_unpack (tn, 0) < 0)) {
+        log_error ("Error unpacking serialized term.");
+        goto finally;
+    }
 
-    if (UNLIKELY (tpl_unpack (tn, 0) < 0)) goto fail;
+    if (type == LSUP_TERM_LT_LITERAL)
+        term = LSUP_lt_literal_new (data, (char *)&metadata);
+    else term = LSUP_term_new (type, data, metadata);
 
+finally:
     tpl_free (tn);
+    free (data);
 
     return term;
-
-fail:
-    tpl_free (tn);
-    free (term);
-
-    return NULL;
 }
 
 
 LSUP_Buffer *
 LSUP_term_serialize (const LSUP_Term *term)
 {
+    /*
+     * In serializing a term, the fact that two terms of different types may
+     * be semantically identical must be taken into account. Specifically, a
+     * namespace-prefixed IRI ref is identical to its fully qualified version,
+     * and a LSUP_TERM_LT_LITERAL with no language tag is identical to a
+     * LSUP_TERM_LITERAL of xsd:string type, made up of the same string. Such
+     * terms must have identical serializations.
+     */
+
     if (UNLIKELY (!term)) return NULL;
 
+    LSUP_Term *tmp_term;
+    void *metadata = NULL;
+
+    if (term->type == LSUP_TERM_NS_IRIREF) {
+        // For IRI refs, simply serialize the FQ version of the term.
+        char *fq_uri;
+
+        if (LSUP_nsmap_normalize_uri (
+            term->iri_info->nsm, term->data, &fq_uri
+        ) != LSUP_OK) return NULL;
+
+        tmp_term = LSUP_iriref_new (fq_uri, NULL);
+
+    } else if (term->type == LSUP_TERM_LT_LITERAL) {
+        // For LT literals with empty lang tag, convert to a normal xsd:string.
+        if (strlen (term->lang) == 0)
+            tmp_term = LSUP_literal_new (term->data, NULL);
+        else tmp_term = LSUP_lt_literal_new (term->data, (char *) term->lang);
+
+    } else tmp_term = LSUP_term_new (
+            term->type, term->data, (void *) term->datatype);
+    // "datatype" can be anything here since it's cast to void *.
+
+    // metadata field is ignored for IRI ref.
+    if (tmp_term->type == LSUP_TERM_LITERAL)
+        metadata = tmp_term->datatype;
+    else if (tmp_term->type == LSUP_TERM_LT_LITERAL)
+        memcpy (&metadata, tmp_term->lang, sizeof (metadata));
+
     LSUP_Buffer *sterm;
     MALLOC_GUARD (sterm, NULL);
 
     int rc = tpl_jot (
-            TPL_MEM, &sterm->addr, &sterm->size, TERM_PACK_FMT, term);
+            TPL_MEM, &sterm->addr, &sterm->size, TERM_PACK_FMT,
+            &tmp_term->type, &tmp_term->data, &metadata);
+    LSUP_term_free (tmp_term);
+
     if (rc != 0) {
-        free (sterm);
+        LSUP_buffer_free (sterm);
         return NULL;
     }
 
@@ -101,7 +172,7 @@ LSUP_term_serialize (const LSUP_Term *term)
 
 
 LSUP_rc
-LSUP_term_init(
+term_init (
         LSUP_Term *term, LSUP_TermType type,
         const char *data, void *metadata)
 {
@@ -117,29 +188,49 @@ LSUP_term_init(
 
     term->type = type;
 
-    char *data_tmp;
     if (data) {
-        // Validate URI.
-        if (term->type == LSUP_TERM_IRIREF) {
-            if (strpbrk (data, invalid_uri_chars) != NULL) {
+        // Validate IRI.
+        if (LSUP_IS_IRI (term)) {
+            char *fquri;
+
+            // Find fully qualified IRI to parse.
+            if (term->type == LSUP_TERM_NS_IRIREF) {
+                if (LSUP_nsmap_normalize_uri (
+                    metadata, data, &fquri) != LSUP_OK
+                ) {
+                    log_error ("Error normalizing IRI data.");
+
+                    return LSUP_VALUE_ERR;
+                }
+                log_debug ("Fully qualified IRI: %s", fquri);
+            } else fquri = (char *) data;
+
+            if (strpbrk (fquri, invalid_uri_chars) != NULL) {
                 log_error (
                         "Characters %s are not allowed. Got: %s\n",
-                        invalid_uri_chars, data);
+                        invalid_uri_chars, fquri);
 
                 return LSUP_VALUE_ERR;
             }
 
-            if (regexec (LSUP_uri_ptn, data, 0, NULL, 0) != 0) {
+            // Capture interesting IRI parts.
+            regmatch_t matches[11];
+            if (UNLIKELY (regexec (LSUP_uri_ptn, fquri, 11, matches, 0) != 0)) {
                 fprintf (stderr, "Error matching URI pattern.\n");
 
                 return LSUP_VALUE_ERR;
             }
-        }
+            if (term->type == LSUP_TERM_NS_IRIREF) free (fquri);
+
+            MALLOC_GUARD (term->iri_info, LSUP_MEM_ERR);
 
-        data_tmp = realloc (term->data, strlen (data) + 1);
-        if (UNLIKELY (!data_tmp)) return LSUP_MEM_ERR;
+            term->iri_info->prefix = matches[1];
+            term->iri_info->path = matches[5];
+            term->iri_info->frag = matches[10];
+            term->iri_info->nsm = metadata;
+        }
 
-        strcpy (data_tmp, data);
+        term->data = strdup (data);
 
     } else {
         // No data. Make up a random UUID or URI if allowed.
@@ -151,41 +242,73 @@ LSUP_term_init(
             uuid_unparse_lower (uuid, uuid_str);
 
             if (type == LSUP_TERM_IRIREF) {
-                data_tmp = realloc (term->data, UUID4_URN_SIZE);
-                if (UNLIKELY (!data_tmp)) return LSUP_MEM_ERR;
-                snprintf (data_tmp, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str);
-                term->data = data_tmp;
-            } else {
-                data_tmp = realloc (term->data, sizeof(uuid_str));
-                strcpy(data_tmp, uuid_str);
-            }
+                term->data = malloc (UUID4_URN_SIZE);
+                snprintf (
+                        term->data, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str);
+
+                MALLOC_GUARD (term->iri_info, LSUP_MEM_ERR);
+
+                // Allocate IRI match patterns manually.
+                term->iri_info->prefix.rm_so = 0;
+                term->iri_info->prefix.rm_eo = 4;
+                term->iri_info->path.rm_so = 4;
+                term->iri_info->path.rm_eo = UUIDSTR_SIZE + 6;
+                term->iri_info->frag.rm_so = -1;
+                term->iri_info->frag.rm_eo = -1;
+                term->iri_info->nsm = NULL;
+
+            } else term->data = strdup (uuid_str);
         } else {
             log_error ("No data provided for term.");
             return LSUP_VALUE_ERR;
         }
     }
-    term->data = data_tmp;
 
     if (term->type == LSUP_TERM_LT_LITERAL) {
-        // Lang tags longer than 7 characters will be truncated.
-        strncpy (term->lang, metadata, sizeof (term->lang) - 1);
-        term->lang[7] = '\0';
+        if (!metadata) {
+            log_warn ("Lang tag is NULL. Creating a non-tagged literal.");
+            term->type = LSUP_TERM_LITERAL;
+        } else {
+            char *lang_str = (char *) metadata;
+            log_trace("Lang string: %s", lang_str);
+            // Lang tags longer than 7 characters will be truncated.
+            strncpy(term->lang, lang_str, sizeof (term->lang) - 1);
+            if (strlen (term->lang) < 1) {
+                log_error ("Lang tag cannot be an empty string.");
+                return LSUP_VALUE_ERR;
+            }
+            term->lang[7] = '\0';
+        }
+    }
 
-    } else if (term->type == LSUP_TERM_LITERAL) {
-        log_trace ("Storing data type.");
-        if (metadata && strcmp (metadata, DEFAULT_DTYPE) != 0) {
-            LSUP_Term *dtype = LSUP_uri_new ((char *) metadata);
-            term->datatype = LSUP_term_hash (dtype);
+    if (term->type == LSUP_TERM_LITERAL) {
+        term->datatype = metadata;
+        if (! term->datatype) term->datatype = LSUP_default_datatype;
+        log_trace ("Storing data type: %s", term->datatype->data);
 
-            if (LSUP_tcache_get (term->datatype) == NULL)
-                LSUP_tcache_add (term->datatype, dtype);
+        if (! LSUP_IS_IRI (term->datatype )) {
+            log_error (
+                    "Literal data tpe is not a IRI: %s",
+                    term->datatype ->data);
 
-            else LSUP_term_free (dtype);
+            return LSUP_VALUE_ERR;
+        }
 
-        } else term->datatype = LSUP_default_dtype_key;
+        if (term->datatype != LSUP_default_datatype) {
+            uint32_t dtype_hash = LSUP_term_hash (term->datatype );
 
-    // Blank node.
-    } else {
+            LSUP_Term *tmp = (LSUP_Term *) LSUP_tcache_get (dtype_hash);
+            if (!tmp) LSUP_tcache_add (dtype_hash, term->datatype);
+            else if (term->datatype != tmp) {
+                free (term->datatype);
+                term->datatype = tmp;
+            }
+        }
+
+        log_trace ("Datatype address: %p", term->datatype);
+        log_trace ("Datatype hash: %lu", LSUP_term_hash (term->datatype));
+
+    } else if (term->type == LSUP_TERM_BNODE) {
         // TODO This is not usable for global skolemization.
         term->bnode_id = XXH64 (
                 term->data, strlen (term->data) + 1, HASH_SEED);
@@ -211,13 +334,40 @@ LSUP_term_hash (const LSUP_Term *term)
 }
 
 
+/* DEPRECATED
 bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2)
 {
-    if (term1->type != term2->type)
-        return false;
+    LSUP_TermType type1, type2;
+    char *data1, *data2;
+
+    // Normalize IRI data before comparing.
+    if (term1->type == LSUP_TERM_NS_IRIREF) {
+        type1 = LSUP_TERM_IRIREF;
+        if (UNLIKELY (LSUP_nsmap_normalize_uri (
+            term1->iri_info->nsm, term1->data, &data1
+        ) != LSUP_OK)) return LSUP_ERROR;
+    } else {
+        type1 = term1->type;
+        data1 = term1->data;
+    }
+
+    if (term2->type == LSUP_TERM_NS_IRIREF) {
+        type2 = LSUP_TERM_IRIREF;
+        if (UNLIKELY (LSUP_nsmap_normalize_uri (
+            term2->iri_info->nsm, term2->data, &data2
+        ) != LSUP_OK)) return LSUP_ERROR;
+    } else {
+        type2 = term2->type;
+        data2 = term2->data;
+    }
+
+    if (type1 != type2) return false;
 
-    if (strcmp (term1->data, term2->data) != 0)
-        return false;
+    int cmp = strcmp (data1, data2);
+    if (term1->type == LSUP_TERM_NS_IRIREF) free (data1);
+    if (term2->type == LSUP_TERM_NS_IRIREF) free (data2);
+
+    if (cmp != 0) return false;
 
     if (term1->type == LSUP_TERM_LITERAL)
         return term1->datatype == term2->datatype;
@@ -227,6 +377,7 @@ bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2)
 
     return true;
 }
+*/
 
 
 void LSUP_term_done (LSUP_Term *term)
@@ -245,6 +396,66 @@ void LSUP_term_free (LSUP_Term *term)
 }
 
 
+LSUP_NSMap *
+LSUP_iriref_nsm (LSUP_Term *iri)
+{
+    if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) {
+        log_error ("Term is not a IRI ref type.");
+        return NULL;
+    }
+
+    return iri->iri_info->nsm;
+}
+
+
+char *
+LSUP_iriref_prefix (LSUP_Term *iri)
+{
+    if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) {
+        log_error ("Term is not a IRI ref type.");
+        return NULL;
+    }
+
+    if (iri->iri_info->prefix.rm_so == -1) return NULL;
+
+    size_t len = iri->iri_info->prefix.rm_eo - iri->iri_info->prefix.rm_so;
+
+    return strndup (iri->data + iri->iri_info->prefix.rm_so, len);
+}
+
+
+char *
+LSUP_iriref_path (LSUP_Term *iri)
+{
+    if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) {
+        log_error ("Term is not a IRI ref type.");
+        return NULL;
+    }
+
+    if (iri->iri_info->path.rm_so == -1) return NULL;
+
+    size_t len = iri->iri_info->path.rm_eo - iri->iri_info->path.rm_so;
+
+    return strndup (iri->data + iri->iri_info->path.rm_so, len);
+}
+
+
+char *
+LSUP_iriref_frag (LSUP_Term *iri)
+{
+    if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) {
+        log_error ("Term is not a IRI ref type.");
+        return NULL;
+    }
+
+    if (iri->iri_info->frag.rm_so == -1) return NULL;
+
+    size_t len = iri->iri_info->frag.rm_eo - iri->iri_info->frag.rm_so;
+
+    return strndup (iri->data + iri->iri_info->frag.rm_so, len);
+}
+
+
 /*
  * Triple API.
  */
@@ -359,10 +570,15 @@ LSUP_tcache_get (const LSUP_Key key)
 }
 
 
-// Extern inline functions.
+/*
+ * Extern inline functions.
+ */
 
 LSUP_Key LSUP_term_hash (const LSUP_Term *term);
-LSUP_Term *LSUP_uri_new (const char *data);
-LSUP_rc LSUP_uri_init (LSUP_Term *term, const char *data);
+LSUP_Term *LSUP_iriref_new (const char *data, LSUP_NSMap *nsm);
+LSUP_Term *LSUP_literal_new (const char *data, LSUP_Term *datatype);
+LSUP_Term *LSUP_lt_literal_new (const char *data, char *lang);
+LSUP_Term *LSUP_bnode_new (const char *data);
+bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2);
 LSUP_Term *LSUP_triple_pos (const LSUP_Triple *trp, LSUP_TriplePos n);
 LSUP_Key LSUP_triple_hash (const LSUP_Triple *trp);

+ 2 - 3
test.c

@@ -14,9 +14,8 @@ int main(int argc, char **argv) {
     // Clear out database from previous test.
     rm_r (TEST_STORE_PATH);
 
-    LSUP_init();
-
-    int rc;
+    int rc = LSUP_init();
+    if (rc != LSUP_OK) return rc;
 
     if (
         term_tests() ||

+ 28 - 23
test/assets/triples.h

@@ -7,38 +7,43 @@
 
 LSUP_Triple *create_triples()
 {
+    LSUP_NSMap *nsm = LSUP_nsmap_new();
+    LSUP_nsmap_add (nsm, "ns1", "urn:s:");
+    LSUP_nsmap_add (nsm, "ns2", "urn:p:");
+
     LSUP_Triple *trp;
     // Leave 1 spare NULL as a sentinel
     trp = calloc (NUM_TRP + 1, sizeof (LSUP_Triple));
     if (!trp) abort();
 
-    trp[0].s = LSUP_uri_new ("urn:s:0");
-    trp[0].p = LSUP_uri_new ("urn:p:0");
-    trp[0].o = LSUP_uri_new ("urn:o:0");
+    trp[0].s = LSUP_iriref_new("urn:s:0", NULL);
+    trp[0].p = LSUP_iriref_new("urn:p:0", NULL);
+    trp[0].o = LSUP_iriref_new("urn:o:0", NULL);
 
-    trp[1].s = LSUP_uri_new ("urn:s:1");
-    trp[1].p = LSUP_uri_new ("urn:p:1");
-    trp[1].o = LSUP_uri_new ("urn:o:1");
+    trp[1].s = LSUP_iriref_new("urn:s:1", NULL);
+    trp[1].p = LSUP_iriref_new("urn:p:1", NULL);
+    trp[1].o = LSUP_iriref_new("urn:o:1", NULL);
 
-    trp[2].s = LSUP_uri_new ("urn:s:2");
-    trp[2].p = LSUP_uri_new ("urn:p:2");
-    trp[2].o = LSUP_uri_new ("urn:o:2");
+    trp[2].s = LSUP_iriref_new("urn:s:2", NULL);
+    trp[2].p = LSUP_iriref_new("urn:p:2", NULL);
+    trp[2].o = LSUP_iriref_new("urn:o:2", NULL);
 
-    trp[3].s = LSUP_uri_new ("urn:s:0");
-    trp[3].p = LSUP_uri_new ("urn:p:1");
-    trp[3].o = LSUP_uri_new ("urn:o:2");
+    trp[3].s = LSUP_iriref_new("urn:s:0", NULL);
+    trp[3].p = LSUP_iriref_new("urn:p:1", NULL);
+    trp[3].o = LSUP_iriref_new("urn:o:2", NULL);
 
-    trp[4].s = LSUP_uri_new ("urn:s:0");
-    trp[4].p = LSUP_uri_new ("urn:p:2");
-    trp[4].o = LSUP_term_new (LSUP_TERM_LITERAL, "String 1", NULL);
+    trp[4].s = LSUP_iriref_new("urn:s:0", NULL);
+    trp[4].p = LSUP_iriref_new("ns2:2", nsm);
+    trp[4].o = LSUP_literal_new ("String 1", NULL);
 
-    trp[5].s = LSUP_uri_new ("urn:s:0");
-    trp[5].p = LSUP_uri_new ("urn:p:5");
-    trp[5].o = LSUP_term_new(LSUP_TERM_LITERAL, "String 1", "xsd:string");
+    trp[5].s = LSUP_iriref_new("ns1:0", nsm);
+    trp[5].p = LSUP_iriref_new("urn:p:5", NULL);
+    trp[5].o = LSUP_literal_new(
+            "String 1", LSUP_iriref_new ("urn:mydatatype:string", NULL));
 
-    trp[6].s = LSUP_uri_new ("urn:s:1");
-    trp[6].p = LSUP_uri_new ("urn:p:6");
-    trp[6].o = LSUP_term_new(LSUP_TERM_LT_LITERAL, "String 1", "es-ES");
+    trp[6].s = LSUP_iriref_new("urn:s:1", NULL);
+    trp[6].p = LSUP_iriref_new("urn:p:6", NULL);
+    trp[6].o = LSUP_lt_literal_new("String 1", "es-ES");
 
     // Unique triple from reused pointers. Do not double-free.
     trp[7].s = trp[0].s; // <urn:s:0>
@@ -50,7 +55,7 @@ LSUP_Triple *create_triples()
     trp[8].p = trp[2].p;
     trp[8].o = trp[5].o;
 
-    // Duplicate of trp[7] from different pointers with same value.
+    // Duplicate of trp[7] from different term type but semantically identical.
     // Do not double-free.
     trp[9].s = trp[5].s;
     trp[9].p = trp[4].p;
@@ -71,5 +76,5 @@ void free_triples (LSUP_Triple *trp)
 
     free (trp);
 }
-#endif
+#endif  /* _TEST_ASSETS_H */
 

+ 0 - 0
test/keyset.c


+ 4 - 4
test/test.h

@@ -1,11 +1,11 @@
+#ifndef _LSUP_TEST_H
+#define _LSUP_TEST_H
+
 #include <stdio.h>
 #include <stdlib.h>
 
 #include "lsup_rdf.h"
 
-#ifndef _LSUP_TEST_H
-#define _LSUP_TEST_H
-
 /**
  * Minimal unit testing framework.
  * Inspired from http://www.jera.com/techinfo/jtns/jtn002.html
@@ -66,4 +66,4 @@
 
 int tests_run;
 
-#endif
+#endif  /* _LSUP_TEST_H */

+ 13 - 13
test/test_codec_nt.c

@@ -9,18 +9,17 @@ static LSUP_Term **
 init_terms (void)
 {
     LSUP_Term **terms = malloc (TERM_CT * sizeof (*terms));
-    terms[0] = LSUP_uri_new ("urn:local:s1");
-    terms[1] = LSUP_uri_new ("http://example.org/p1");
-    terms[2] = LSUP_term_new (LSUP_TERM_LITERAL, "hello", NULL);
-    terms[3] = LSUP_term_new (LSUP_TERM_LT_LITERAL, "hello", "en-US");
-    terms[4] = LSUP_term_new (LSUP_TERM_LT_LITERAL, "hello", "es-ES");
-    terms[5] = LSUP_term_new (
-            LSUP_TERM_LITERAL, "25",
-            "http://www.w3.org/2001/XMLSchema#integer");
-    terms[6] = LSUP_term_new (
-            LSUP_TERM_LITERAL, "This \\is\\ a \"multi-line\"\n'literal'\t.",
-            NULL);
-    terms[7] = LSUP_term_new (LSUP_TERM_BNODE, "bn1", NULL);
+    terms[0] = LSUP_iriref_new("urn:local:s1", NULL);
+    terms[1] = LSUP_iriref_new("http://example.org/p1", NULL);
+    terms[2] = LSUP_literal_new ("hello", NULL);
+    terms[3] = LSUP_lt_literal_new ("hello", "en-US");
+    terms[4] = LSUP_lt_literal_new ("hello", "es-ES");
+    terms[5] = LSUP_literal_new (
+            "25",
+            LSUP_iriref_new ("http://www.w3.org/2001/XMLSchema#integer", NULL));
+    terms[6] = LSUP_literal_new (
+            "This \\is\\ a \"multi-line\"\n'literal'\t.", NULL);
+    terms[7] = LSUP_bnode_new ("bn1");
     terms[8] = LSUP_term_new (LSUP_TERM_UNDEFINED, "bogus", NULL);
     terms[9] = TERM_DUMMY;
 
@@ -161,7 +160,8 @@ test_encode_nt_term()
 
 static int test_encode_nt_graph()
 {
-    LSUP_Graph *gr = LSUP_graph_new (LSUP_STORE_MEM);
+    LSUP_Graph *gr = LSUP_graph_new (
+            LSUP_iriref_new (NULL, NULL), LSUP_STORE_MEM);
     if (!gr) return LSUP_MEM_ERR;
 
     size_t ins;

+ 7 - 6
test/test_graph.c

@@ -8,10 +8,10 @@ static int
 _graph_new (LSUP_store_type type)
 {
     LSUP_Graph *gr;
-    gr = LSUP_graph_new (type);
+    gr = LSUP_graph_new (LSUP_iriref_new (NULL, NULL), type);
     ASSERT (gr != NULL, "Error creating graph!");
 
-    EXPECT_PASS (LSUP_graph_set_uri (gr, "urn:gr:1"));
+    EXPECT_PASS (LSUP_graph_set_uri (gr, LSUP_iriref_new ("urn:gr:1", NULL)));
     EXPECT_STR_EQ (LSUP_graph_uri (gr)->data, "urn:gr:1");
 
     ASSERT (
@@ -30,7 +30,7 @@ _graph_add (LSUP_store_type type)
 {
     LSUP_Triple *trp = create_triples();
 
-    LSUP_Graph *gr = LSUP_graph_new (type);
+    LSUP_Graph *gr = LSUP_graph_new (LSUP_iriref_new (NULL, NULL), type);
     ASSERT (gr != NULL, "Error creating graph!");
 
     size_t ct;
@@ -107,7 +107,7 @@ _graph_lookup (LSUP_store_type type)
     };
     */
 
-    LSUP_Graph *gr = LSUP_graph_new (type);
+    LSUP_Graph *gr = LSUP_graph_new (LSUP_iriref_new (NULL, NULL), type);
 
     size_t ct;
     LSUP_graph_add (gr, trp, &ct);
@@ -150,7 +150,7 @@ _graph_remove (LSUP_store_type type)
 {
     LSUP_Triple *trp = create_triples();
 
-    LSUP_Graph *gr = LSUP_graph_new (type);
+    LSUP_Graph *gr = LSUP_graph_new (LSUP_iriref_new (NULL, NULL), type);
 
     size_t ct;
     LSUP_graph_add (gr, trp, &ct);
@@ -217,7 +217,8 @@ static int test_graph_copy()
 {
     LSUP_Triple *trp = create_triples();
 
-    LSUP_Graph *gr1 = LSUP_graph_new (LSUP_STORE_MEM);
+    LSUP_Graph *gr1 = LSUP_graph_new (
+            LSUP_iriref_new (NULL, NULL), LSUP_STORE_MEM);
     ASSERT (gr1 != NULL, "Error creating graph!");
 
     LSUP_graph_add (gr1, trp, NULL);

+ 3 - 3
test/test_store_mdb.c

@@ -107,7 +107,7 @@ static int test_quad_store()
 {
     EXPECT_PASS (LSUP_mdbstore_setup (path, true));
 
-    LSUP_Term *ctx1 = LSUP_uri_new ("urn:c:1");
+    LSUP_Term *ctx1 = LSUP_iriref_new("urn:c:1", NULL);
     LSUP_Buffer *sc1 = LSUP_term_serialize (ctx1);
 
     LSUP_MDBStore *store = LSUP_mdbstore_new (path, sc1); // quad store.
@@ -127,7 +127,7 @@ static int test_quad_store()
     EXPECT_PASS (LSUP_mdbstore_add (store, NULL, ser_trp, 6, &ct));
     EXPECT_INT_EQ (ct, 6);
 
-    LSUP_Term *ctx2 = LSUP_uri_new ("urn:c:2");
+    LSUP_Term *ctx2 = LSUP_iriref_new("urn:c:2", NULL);
     LSUP_Buffer *sc2 = LSUP_term_serialize (ctx2);
 
     // Only triples 4÷9 in context 2 (effectively 4 non-duplicates).
@@ -138,7 +138,7 @@ static int test_quad_store()
     EXPECT_INT_EQ (LSUP_mdbstore_size (store), 10);
 
     // This context has no triples.
-    LSUP_Term *ctx3 = LSUP_uri_new ("urn:c:3");
+    LSUP_Term *ctx3 = LSUP_iriref_new("urn:c:3", NULL);
     LSUP_Buffer *sc3 = LSUP_term_serialize (ctx3);
 
     // Test lookups.

+ 121 - 36
test/test_term.c

@@ -1,35 +1,109 @@
 #include "test.h"
 
 
-static int test_term_new()
+static int test_iriref()
+{
+    char *uri1_data = "http://example.org/term#12345";
+    char *uri2_data = "ns1:12345";
+    char *uri3_data = "ns2:12345";
+
+    LSUP_NSMap *nsm1 = LSUP_nsmap_new();
+    LSUP_nsmap_add (nsm1, "ns1", "http://example.org/term#");
+
+    LSUP_NSMap *nsm2 = LSUP_nsmap_new();
+    LSUP_nsmap_add (nsm2, "ns2", "http://example.org/term#");
+
+    LSUP_Term *uri1 = LSUP_iriref_new (uri1_data, NULL);
+    ASSERT (uri1, "IRI is NULL!");
+    ASSERT (LSUP_iriref_nsm (uri1) == NULL, "Wrong NSMap!");
+    EXPECT_STR_EQ (LSUP_iriref_prefix (uri1), "http://example.org");
+    EXPECT_STR_EQ (LSUP_iriref_path (uri1), "/term#12345");
+    EXPECT_STR_EQ (LSUP_iriref_frag (uri1), "12345");
+
+    LSUP_Term *uri2 = LSUP_iriref_new (uri2_data, nsm1);
+    ASSERT (uri2, "IRI is NULL!");
+    ASSERT (LSUP_iriref_nsm (uri2) == nsm1, "Wrong NSMap!");
+
+    LSUP_Term *uri3 = LSUP_iriref_new (uri3_data, nsm2);
+    ASSERT (uri3, "IRI is NULL!");
+    ASSERT (LSUP_iriref_nsm (uri3) == nsm2, "Wrong NSMap!");
+
+    LSUP_Term *uri4 = LSUP_iriref_new (NULL, NULL);
+    ASSERT (uri4, "IRI is NULL!");
+
+    LSUP_Term *uri5 = LSUP_iriref_new (NULL, NULL);
+    ASSERT (uri5, "IRI is NULL!");
+
+    EXPECT_INT_EQ (uri1->type, LSUP_TERM_IRIREF);
+    EXPECT_INT_EQ (uri2->type, LSUP_TERM_NS_IRIREF);
+    EXPECT_INT_EQ (uri3->type, LSUP_TERM_NS_IRIREF);
+    EXPECT_INT_EQ (uri4->type, LSUP_TERM_IRIREF);
+    EXPECT_INT_EQ (uri5->type, LSUP_TERM_IRIREF);
+
+    ASSERT (LSUP_term_equals (uri1, uri2), "IRIs don't match!");
+    ASSERT (LSUP_term_equals (uri2, uri3), "IRIs don't match!");
+    ASSERT (!LSUP_term_equals (uri3, uri4), "IRIs shouldn't match!");
+    ASSERT (!LSUP_term_equals (uri4, uri5), "IRIs shouldn't match!");
+
+    return 0;
+}
+
+static int test_literal()
 {
     char *data = "hello";
     char *datatype = "urn:my:datatype";
+    char *lang = "en-US";
 
-    LSUP_Term *term = LSUP_term_new (LSUP_TERM_LITERAL, data, datatype);
-    EXPECT_STR_EQ (term->data, data);
-    EXPECT_STR_EQ (LSUP_tcache_get (term->datatype)->data, datatype);
+    LSUP_Term *lit1 = LSUP_literal_new (data, LSUP_iriref_new (datatype, NULL));
+    EXPECT_INT_EQ (lit1->type, LSUP_TERM_LITERAL);
+    EXPECT_STR_EQ (lit1->data, data);
+    EXPECT_STR_EQ (lit1->datatype->data, datatype);
 
-    char *lang = "en-US";
-    LSUP_term_init (term, LSUP_TERM_LT_LITERAL, data, lang);
-    EXPECT_STR_EQ (term->data, data);
-    EXPECT_STR_EQ (term->lang, lang);
+    LSUP_Term *lit2 = LSUP_literal_new (data, LSUP_default_datatype);
+    EXPECT_INT_EQ (lit2->type, LSUP_TERM_LITERAL);
+    EXPECT_STR_EQ (lit2->data, data);
+    EXPECT_STR_EQ (lit2->datatype->data, DEFAULT_DTYPE);
+
+    LSUP_Term *lit3 = LSUP_literal_new (data, NULL);
+    EXPECT_INT_EQ (lit3->type, LSUP_TERM_LITERAL);
+    EXPECT_STR_EQ (lit3->data, data);
+    EXPECT_STR_EQ (lit3->datatype->data, DEFAULT_DTYPE);
+
+    ASSERT (lit1->datatype != lit2->datatype, "Wrong data type match!");
+    ASSERT (lit2->datatype == lit3->datatype, "Data type mismatch!");
 
-    char *uri_data = "urn:id:2144564356";
-    LSUP_uri_init (term, uri_data);
-    EXPECT_STR_EQ (term->data, uri_data);
+    ASSERT (!LSUP_term_equals (lit1, lit2), "Wrong term match!");
+    ASSERT (LSUP_term_equals (lit2, lit3), "Term mismatch!");
 
-    LSUP_term_free (term);
+    LSUP_Term *lt_lit1 = LSUP_lt_literal_new (data, lang);
+    EXPECT_INT_EQ (lt_lit1->type, LSUP_TERM_LT_LITERAL);
+    EXPECT_STR_EQ (lt_lit1->data, data);
+    EXPECT_STR_EQ (lt_lit1->lang, lang);
+
+    // LT-literal without lang is a normal string literal.
+    LSUP_Term *lt_lit2 = LSUP_lt_literal_new (data, NULL);
+    EXPECT_INT_EQ (lt_lit2->type, LSUP_TERM_LITERAL);
+    EXPECT_STR_EQ (lt_lit2->data, data);
+
+    ASSERT (LSUP_term_equals (lt_lit2, lit2), "Term mismatch!");
+    EXPECT_STR_EQ (lt_lit2->datatype->data, DEFAULT_DTYPE);
+
+    LSUP_term_free (lit1);
+    LSUP_term_free (lit2);
+    LSUP_term_free (lit3);
+    LSUP_term_free (lt_lit1);
+    LSUP_term_free (lt_lit2);
 
     return 0;
 }
 
 static int test_term_serialize_deserialize()
 {
-    LSUP_Term *uri = LSUP_uri_new ("http://hello.org");
-    LSUP_Term *lit = LSUP_term_new (LSUP_TERM_LITERAL, "hello", NULL);
-    LSUP_Term *tlit = LSUP_term_new (LSUP_TERM_LITERAL, "hello", "xsd:string");
-    LSUP_Term *tllit = LSUP_term_new (LSUP_TERM_LT_LITERAL, "hello", "en-US");
+    LSUP_Term *uri = LSUP_iriref_new ("http://hello.org", NULL);
+    LSUP_Term *lit = LSUP_literal_new ("hello", NULL);
+    LSUP_Term *tlit = LSUP_literal_new (
+            "hello", LSUP_iriref_new ("urn:mydatatype:string", NULL));
+    LSUP_Term *llit = LSUP_lt_literal_new ("hello", "en-US");
 
     LSUP_Buffer *sterm;
     LSUP_Term *dsterm;
@@ -70,15 +144,15 @@ static int test_term_serialize_deserialize()
     LSUP_buffer_free (sterm);
     LSUP_term_free (dsterm);
 
-    sterm = LSUP_term_serialize (tllit);
+    sterm = LSUP_term_serialize (llit);
     ASSERT (sterm != NULL, "Error serializing term!");
     //log_info ("%s", "Serialized typed and language-tagged URI: ");
     //LSUP_buffer_print (sterm);
     //log_info ("%s", "\n");
     dsterm = LSUP_term_new_from_buffer (sterm);
     ASSERT (dsterm != NULL, "Error deserializing term!");
-    ASSERT (LSUP_term_equals (dsterm, tllit), "URI serialization error!");
-    LSUP_term_free (tllit);
+    ASSERT (LSUP_term_equals (dsterm, llit), "URI serialization error!");
+    LSUP_term_free (llit);
     LSUP_buffer_free (sterm);
     LSUP_term_free (dsterm);
 
@@ -88,36 +162,47 @@ static int test_term_serialize_deserialize()
 
 static int test_term_to_key()
 {
-    LSUP_Term *uri = LSUP_uri_new ("http://hello.org");
-    LSUP_Term *lit = LSUP_term_new (LSUP_TERM_LITERAL, "hello", NULL);
-    LSUP_Term *tlit = LSUP_term_new(LSUP_TERM_LITERAL, "hello", DEFAULT_DTYPE);
-    LSUP_Term *tllit1 = LSUP_term_new(LSUP_TERM_LT_LITERAL, "hello", "en-US");
-    LSUP_Term *tllit2 = LSUP_term_new(LSUP_TERM_LT_LITERAL, "hello", "en-GB");
-
-    LSUP_Key uri_key = LSUP_term_hash (uri);
+    LSUP_NSMap *nsm = LSUP_nsmap_new();
+    LSUP_nsmap_add (nsm, "ns1", "http://hello.org/term#");
+
+    LSUP_Term *uri1 = LSUP_iriref_new ("http://hello.org/term#bye", NULL);
+    LSUP_Term *uri2 = LSUP_iriref_new ("ns1:bye", nsm);
+    LSUP_Term *lit = LSUP_literal_new ("hello", NULL);
+    LSUP_Term *tlit = LSUP_literal_new ("hello", LSUP_default_datatype);
+    LSUP_Term *llit1 = LSUP_lt_literal_new ("hello", "en-US");
+    LSUP_Term *llit2 = LSUP_lt_literal_new ("hello", "en-GB");
+    LSUP_Term *llit3 = LSUP_lt_literal_new ("hello", NULL);
+
+    LSUP_Key uri1_key = LSUP_term_hash (uri1);
+    LSUP_Key uri2_key = LSUP_term_hash (uri2);
     LSUP_Key lit_key = LSUP_term_hash (lit);
     LSUP_Key tlit_key = LSUP_term_hash (tlit);
-    LSUP_Key tllit1_key = LSUP_term_hash (tllit1);
-    LSUP_Key tllit2_key = LSUP_term_hash (tllit2);
+    LSUP_Key llit1_key = LSUP_term_hash (llit1);
+    LSUP_Key llit2_key = LSUP_term_hash (llit2);
+    LSUP_Key llit3_key = LSUP_term_hash (llit3);
 
-    ASSERT (uri_key != lit_key, "URI key conflict!");
+    ASSERT (uri1_key == uri2_key, "URI keys differ!");
+    ASSERT (uri1_key != lit_key, "URI key conflict!");
     ASSERT (lit_key == tlit_key, "URI keys differ!");
-    ASSERT (lit_key != tllit1_key, "URI key conflict!");
-    ASSERT (tlit_key != tllit1_key, "URI key conflict!");
-    ASSERT (tllit1_key != tllit2_key, "URI key conflict!");
+    ASSERT (lit_key != llit1_key, "URI key conflict!");
+    ASSERT (tlit_key != llit1_key, "URI key conflict!");
+    ASSERT (llit1_key != llit2_key, "URI key conflict!");
+    ASSERT (tlit_key == llit3_key, "URI keys differ!");
 
-    LSUP_term_free (uri);
+    LSUP_term_free (uri1);
+    LSUP_term_free (uri2);
     LSUP_term_free (lit);
     LSUP_term_free (tlit);
-    LSUP_term_free (tllit1);
-    LSUP_term_free (tllit2);
+    LSUP_term_free (llit1);
+    LSUP_term_free (llit2);
 
     return 0;
 }
 
 
 int term_tests() {
-    RUN (test_term_new);
+    RUN (test_iriref);
+    RUN (test_literal);
     RUN (test_term_serialize_deserialize);
     RUN (test_term_to_key);