1 year ago · dded6705b6
--- a/Makefile
+++ b/Makefile
@@ -26,7 +26,7 @@ DBG_CFLAGS = $(_CFLAGS) -Itest -O0 -g3 -DDEBUG
 
				 #$(info DBG_CFLAGS: $(DBG_CFLAGS))
			
 
				 # NOTE: -luuid is a Linux system library. Other OS's might need a different
			
 
				 # link or a non-system library built.
			
 
				-LDFLAGS := -L. -L$(outdir) -L$(libdir) -llmdb -lxxhash -luuid
			
 
				+LDFLAGS := -L$(libdir) -L$(outdir) -L. -llmdb -lxxhash -luuid
			
 
				 
			
 
				 PARSER = bin/lemon
			
 
				 LEMON_SRC = ext/sqlite/tool/lemon.c
			
@@ -66,10 +66,10 @@ LIBS = $(STATIC_LIB) $(DYN_LIB)
 
				 DBG_LIBS = $(STATIC_DBG_LIB) $(DYN_DBG_LIB)
			
 
				 
			
 
				 # LDD for Linux, otool -L for OSX.
			
 
				-ifdef $(shell which ldd 2> /dev/null)
			
 
				-LDD := ldd
			
 
				-else
			
 
				+ifeq (, $(shell which ldd))
			
 
				 LDD := otool -L
			
 
				+else
			
 
				+LDD := ldd
			
 
				 endif
			
 
				 
			
 
				 # For visual dep graph.
			
--- a/docs/dev/deps.dot
+++ b/docs/dev/deps.dot
@@ -5,48 +5,45 @@ digraph "source tree" {
 
				     fontsize="16";
			
 
				     fontname="Helvetica";
			
 
				 	clusterrank="local";
			
 
				+	"py_lsup_rdf" -> "py_graph"
			
 
				 	"term" -> "namespace"
			
 
				-	"profile" -> "lsup_rdf"
			
 
				+	"store" -> "store_mdb"
			
 
				 	"store_htable" -> "buffer"
			
 
				-	"grammar_ttl" -> "codec"
			
 
				-	"graph" -> "environment"
			
 
				-	"lsup_rdf" -> "codec_ttl"
			
 
				-	"store_interface" -> "environment"
			
 
				-	"lsup_rdf" -> "codec_nt"
			
 
				 	"namespace" -> "hashmap"
			
 
				-	"core" -> "lmdb"
			
 
				-	"environment" -> "term"
			
 
				-	"py_triple" -> "py_term"
			
 
				-	"core" -> "xxhash"
			
 
				-	"store_mdb" -> "buffer"
			
 
				+	"profile" -> "lsup_rdf"
			
 
				+	"py_graph" -> "codec_ttl"
			
 
				+	"store_mdb" -> "store_interface"
			
 
				+	"py_term" -> "py_namespace"
			
 
				+	"grammar_nt" -> "codec"
			
 
				 	"py_namespace" -> "namespace"
			
 
				-	"store_htable" -> "store_interface"
			
 
				 	"term" -> "buffer"
			
 
				+	"parser_nt" -> "codec"
			
 
				+	"term" -> "tpl"
			
 
				+	"store_mdb" -> "buffer"
			
 
				+	"environment" -> "bootstrap"
			
 
				+	"py_graph" -> "graph"
			
 
				+	"lsup_rdf" -> "codec_ttl"
			
 
				 	"graph" -> "store"
			
 
				-	"py_lsup_rdf" -> "py_graph"
			
 
				-	"store_htable" -> "hashmap"
			
 
				+	"parser_nt" -> "tokens_nt"
			
 
				+	"graph" -> "environment"
			
 
				 	"py_term" -> "term"
			
 
				-	"core" -> "log"
			
 
				-	"parser_ttl" -> "codec"
			
 
				-	"py_graph" -> "codec_ttl"
			
 
				-	"store" -> "store_htable"
			
 
				-	"codec_nt" -> "parser_nt"
			
 
				+	"codec_ttl" -> "parser_ttl"
			
 
				+	"store_htable" -> "hashmap"
			
 
				+	"grammar_ttl" -> "codec"
			
 
				 	"buffer" -> "core"
			
 
				-	"py_term" -> "py_namespace"
			
 
				-	"store_mdb" -> "store_interface"
			
 
				-	"parser_nt" -> "tokens_nt"
			
 
				-	"store" -> "store_mdb"
			
 
				-	"parser_nt" -> "codec"
			
 
				-	"parser_ttl" -> "tokens_ttl"
			
 
				 	"graph" -> "term"
			
 
				-	"environment" -> "bootstrap"
			
 
				-	"store_mdb" -> "lmdb"
			
 
				-	"term" -> "tpl"
			
 
				-	"codec" -> "graph"
			
 
				-	"codec_ttl" -> "parser_ttl"
			
 
				-	"py_graph" -> "graph"
			
 
				-	"py_graph" -> "py_triple"
			
 
				-	"grammar_nt" -> "codec"
			
 
				+	"lsup_rdf" -> "codec_nt"
			
 
				 	"py_graph" -> "codec_nt"
			
 
				+	"core" -> "log"
			
 
				+	"py_triple" -> "py_term"
			
 
				+	"py_graph" -> "py_triple"
			
 
				+	"codec_nt" -> "parser_nt"
			
 
				+	"codec" -> "graph"
			
 
				+	"store" -> "store_htable"
			
 
				+	"store_interface" -> "environment"
			
 
				+	"parser_ttl" -> "codec"
			
 
				+	"store_htable" -> "store_interface"
			
 
				 	"namespace" -> "core"
			
 
				+	"environment" -> "term"
			
 
				+	"parser_ttl" -> "tokens_ttl"
			
 
				 }
			
--- a/docs/dev/deps.pdf
+++ b/docs/dev/deps.pdf
--- a/include/term.h
+++ b/include/term.h
@@ -15,29 +15,6 @@
 
				 #define DEFAULT_DTYPE       "http://www.w3.org/2001/XMLSchema#string"
			
 
				 #define DEFAULT_DTYPE_NS    "xsd:string"
			
 
				 
			
 
				-/** @brief URI parsing regular expression.
			
 
				- *
			
 
				- * Based on RFC3986 (see https://tools.ietf.org/html/rfc3986#appendix-B) and
			
 
				- * modified for use in this application. Relevant matching groups are the
			
 
				- * following, for a sample URI `http://example.org/123/456/?query=blah#frag`:
			
 
				- *
			
 
				- * #0:  Full parsed URI (http://example.org/123/456/?query=blah#frag)
			
 
				- * #1:  Domain prefix (http://example.org)
			
 
				- * #2:  Protocol (http:)
			
 
				- * #4:  Authority (example.org)
			
 
				- * #5:  Path relative to domain (/123/456/?query=blah#frag)
			
 
				- * #6:  Path, excluding query and fragment (/123/456/)
			
 
				- * #8:  Query (query=blah)
			
 
				- * #10: Fragment (frag)
			
 
				- *
			
 
				- * For URN-like URIs, such as `urn:s:0`, the prefix part (#1) is `urn:` and
			
 
				- * the path (#4) is `s:0`.
			
 
				- *
			
 
				- * TODO Remove. Superseded by ad-hoc scanning (see static parse_ini in term.c.)
			
 
				- */
			
 
				-#define LSUP_URI_REGEX_STR \
			
 
				-    "^(([^:/?#]+:)?(//([^/?#]*))?)?(([^?#]*)(\\?([^#]*))?(#(.*))?)"
			
 
				-
			
 
				 
			
 
				 /*
			
 
				  * Data types.
			
@@ -92,11 +69,8 @@ typedef struct term_t {
 
				 
			
 
				 
			
 
				 /** @brief Whether the environment is already initialized.
			
 
				- *
			
 
				- * @TODO Check if the default NS was inserted; this would be slower but more
			
 
				- * accurate.
			
 
				  */
			
 
				-#define LSUP_IS_INIT (LSUP_term_cache != NULL)
			
 
				+#define LSUP_IS_INIT (LSUP_default_datatype != NULL)
			
 
				 
			
 
				 
			
 
				 /** @brief RDF triple.
			
--- a/src/environment.c
+++ b/src/environment.c
@@ -54,18 +54,21 @@ LSUP_init (void)
 
				     if (UNLIKELY (!LSUP_default_ctx_buf)) return LSUP_ERROR;
			
 
				 
			
 
				     // Initialize term cache.
			
 
				-    // This is the indicator for LSUP_IS_INIT.
			
 
				     LSUP_term_cache = LSUP_term_set_new();
			
 
				     if (UNLIKELY (!LSUP_term_cache)) return LSUP_MEM_ERR;
			
 
				 
			
 
				     // Create and cache default literal datatype key.
			
 
				     // This will be done only once in the program, so no need to check for
			
 
				     // duplicates.
			
 
				+    // This is the last operation that can fail in this function, and it is
			
 
				+    // the indicator for LSUP_IS_INIT.
			
 
				     LSUP_default_datatype = LSUP_iriref_new (DEFAULT_DTYPE, NULL);
			
 
				     LSUP_rc rc = LSUP_term_set_add (
			
 
				             LSUP_term_cache, LSUP_default_datatype, NULL);
			
 
				     PRCCK (rc);
			
 
				 
			
 
				+    log_info ("LSUP environment initialized.");
			
 
				+
			
 
				     // Set automatic teardown TODO Is this a good idea?
			
 
				     atexit (LSUP_done);
			
 
				 
			
@@ -85,7 +88,9 @@ LSUP_done (void)
 
				 
			
 
				     // Free ID cache, including default literal datatype.
			
 
				     hashmap_free (LSUP_term_cache);
			
 
				-    LSUP_term_cache = NULL; // This causes LSUP_IS_INIT to return false.
			
 
				+    LSUP_default_datatype = NULL; // This causes LSUP_IS_INIT to return false.
			
 
				+
			
 
				+    log_info ("LSUP environment torn down.");
			
 
				 }
			
 
				 
			
 
				 
			
--- a/src/term.c
+++ b/src/term.c
@@ -90,6 +90,10 @@ LSUP_TermSet *LSUP_term_cache = NULL;
 
				 // Characters not allowed in a URI string.
			
 
				 static const char *invalid_uri_chars = "<>\" {}|\\^`";
			
 
				 
			
 
				+/// Minimum valid type code.
			
 
				+static const LSUP_TermType MIN_VALID_TYPE = LSUP_TERM_IRIREF;
			
 
				+/// Maximum valid type code. Change this if adding to enum LSUP_TermType.
			
 
				+static const LSUP_TermType MAX_VALID_TYPE = LSUP_TERM_BNODE;
			
 
				 
			
 
				 /*
			
 
				  * Static prototypes.
			
@@ -245,9 +249,11 @@ LSUP_iriref_absolute (const LSUP_Term *root, const LSUP_Term *iri)
 
				         return NULL;
			
 
				     }
			
 
				 
			
 
				-    char *data, *pfx = LSUP_iriref_prefix (iri);
			
 
				+    char
			
 
				+        *data,
			
 
				+        *pfx = LSUP_iriref_prefix (iri);
			
 
				 
			
 
				-    if (pfx) data = iri->data;
			
 
				+    if (strlen (pfx) > 0) data = iri->data;
			
 
				 
			
 
				     else if (iri->data[0] == '/') {
			
 
				         free (pfx);
			
@@ -402,7 +408,7 @@ LSUP_iriref_prefix (const LSUP_Term *iri)
 
				         return NULL;
			
 
				     }
			
 
				 
			
 
				-    if (iri->iri_info->prefix.size == 0) return NULL;
			
 
				+    // if (iri->iri_info->prefix.size == 0) return NULL;
			
 
				 
			
 
				     return strndup (
			
 
				             iri->data + iri->iri_info->prefix.offset,
			
@@ -418,7 +424,7 @@ LSUP_iriref_path (const LSUP_Term *iri)
 
				         return NULL;
			
 
				     }
			
 
				 
			
 
				-    if (iri->iri_info->path.size == 0) return NULL;
			
 
				+    // if (iri->iri_info->path.size == 0) return NULL;
			
 
				 
			
 
				     return strndup (
			
 
				             iri->data + iri->iri_info->path.offset,
			
@@ -434,7 +440,7 @@ LSUP_iriref_frag (const LSUP_Term *iri)
 
				         return NULL;
			
 
				     }
			
 
				 
			
 
				-    if (iri->iri_info->frag.size == 0) return NULL;
			
 
				+    // if (iri->iri_info->frag.size == 0) return NULL;
			
 
				 
			
 
				     return strndup (
			
 
				             iri->data + iri->iri_info->frag.offset,
			
@@ -748,12 +754,8 @@ term_init (
 
				         LSUP_Term *term, LSUP_TermType type,
			
 
				         const char *data, void *metadata)
			
 
				 {
			
 
				-    if (UNLIKELY (!LSUP_IS_INIT)) {
			
 
				-        log_error ("Environment not initialized. Did you call LSUP_init()?");
			
 
				-        return LSUP_ERROR;
			
 
				-    }
			
 
				     // This can never be LSUP_TERM_UNDEFINED.
			
 
				-    if (type == LSUP_TERM_UNDEFINED) {
			
 
				+    if (type < MIN_VALID_TYPE || type > MAX_VALID_TYPE) {
			
 
				         log_error ("%d is not a valid term type.", type);
			
 
				         return LSUP_VALUE_ERR;
			
 
				     }
			
@@ -928,6 +930,10 @@ parse_iri (char *iri_str, MatchCoord coord[]) {
 
				     size_t iri_len = strlen (iri_str);
			
 
				     MatchCoord tmp = {};  // Temporary storage for capture groups
			
 
				 
			
 
				+    // Redundant if only called by term_init.
			
 
				+    // memset (coord, 0, sizeof(*coord));
			
 
				+
			
 
				+    log_debug ("Parsing IRI: %s", iri_str);
			
 
				     // #2: ([^:/?#]+)
			
 
				     while (
			
 
				             *cur != ':' && *cur != '/' && *cur != '?'
			
@@ -937,51 +943,50 @@ parse_iri (char *iri_str, MatchCoord coord[]) {
 
				     }
			
 
				 
			
 
				     // Non-capturing: (?([^:/?#]+):)?
			
 
				-    if (tmp.size > 0 && *(++cur) == ':') {
			
 
				+    if (tmp.size > 0 && *cur == ':') {
			
 
				         // Got capture groups #2 and #3. Store them.
			
 
				-        tmp.size++;
			
 
				-        coord[3].offset = tmp.offset;
			
 
				-        coord[3].size = tmp.size - 1;
			
 
				-    }
			
 
				+        coord[2].offset = 0;
			
 
				+        coord[2].size = tmp.size;
			
 
				+        cur++;
			
 
				+        log_debug ("Group #2: %lu, %lu", coord[2].offset, coord[2].size);
			
 
				+    } else cur = iri_str;  // Backtrack if no match.
			
 
				 
			
 
				     // Non-capturing: (?//([^/?#]*))?
			
 
				-    if (*(cur + 1) == '/' && *(cur + 2) == '/') {
			
 
				-        cur++;
			
 
				-        tmp.offset = cur - iri_str;
			
 
				-        tmp.size = 2;
			
 
				+    if (*cur == '/' && *(cur + 1) == '/') {
			
 
				         cur += 2;
			
 
				+        tmp.offset = cur - iri_str;
			
 
				+        tmp.size = 0;
			
 
				 
			
 
				         // #3: ([^/?#]*)
			
 
				         while (*cur != '/' && *cur != '?' && *cur != '#' && *cur != '\0') {
			
 
				             tmp.size++;
			
 
				             cur++;
			
 
				         }
			
 
				-
			
 
				-        // Maybe got capture group #5.
			
 
				-        coord[3].offset = tmp.offset + 2;
			
 
				-        coord[3].size = tmp.size -2;
			
 
				+        coord[3].offset = tmp.offset;
			
 
				+        coord[3].size = tmp.size;
			
 
				+        log_debug ("Group #3: %lu, %lu", coord[3].offset, coord[3].size);
			
 
				     }
			
 
				 
			
 
				-    // Capture group 1 and advance cursor.
			
 
				+    // Capture group 1.
			
 
				     coord[1].offset = 0;
			
 
				-    coord[1].size = cur++ - iri_str;
			
 
				+    coord[1].size = cur - iri_str;
			
 
				+    log_debug ("Group #1: %lu, %lu", coord[1].offset, coord[1].size);
			
 
				 
			
 
				-    // Non-capturing: (?[^?#]*)
			
 
				     tmp.offset = cur - iri_str;
			
 
				     tmp.size = 0;
			
 
				+
			
 
				+    coord[4].offset = tmp.offset;
			
 
				+    coord[4].size = iri_len - tmp.offset;
			
 
				+    log_debug ("Group #4: %lu, %lu", coord[4].offset, coord[4].size);
			
 
				+
			
 
				+    // Non-capturing: (?[^?#]*)
			
 
				     while (*cur != '?' && *cur != '#' && *cur != '\0') {
			
 
				         tmp.size++;
			
 
				         cur++;
			
 
				     }
			
 
				 
			
 
				-    if (tmp.size > 0) {
			
 
				-        coord[4].offset = tmp.offset;
			
 
				-        coord[4].size = iri_str + iri_len - cur;
			
 
				-
			
 
				-    } else return LSUP_NORESULT;  // This group is the only mandatory match.
			
 
				-
			
 
				     // Non-capturing: (?\?([^#]*))
			
 
				-    if (*(++cur) == '?') {
			
 
				+    if (*cur == '?') {
			
 
				         // 5: ([^#]*)
			
 
				         tmp.offset = ++cur - iri_str;
			
 
				         tmp.size = 0;
			
@@ -994,18 +999,21 @@ parse_iri (char *iri_str, MatchCoord coord[]) {
 
				             // Got capture group #5.
			
 
				             coord[5].offset = tmp.offset;
			
 
				             coord[5].size = tmp.size;
			
 
				+            log_debug ("Group #5: %lu, %lu", coord[5].offset, coord[5].size);
			
 
				         }
			
 
				     }
			
 
				 
			
 
				     // Non-capturing: (?#(.*))?
			
 
				-    if (*(++cur) == '#') {
			
 
				+    if (*cur == '#') {
			
 
				         // #6: (.*)
			
 
				         coord[6].offset = ++cur - iri_str;
			
 
				         coord[6].size = iri_str + iri_len - cur;
			
 
				+        log_debug ("Group #6: %lu, %lu", coord[6].offset, coord[6].size);
			
 
				     }
			
 
				 
			
 
				     coord[0].offset = 0;
			
 
				     coord[0].size = iri_len;
			
 
				+    log_debug ("Full match: %lu, %lu", coord[0].offset, coord[0].size);
			
 
				 
			
 
				     return LSUP_OK;
			
 
				 }
			
--- a/test/assets/triples.h
+++ b/test/assets/triples.h
@@ -1,5 +1,5 @@
 
				-#ifndef _TEST_ASSETS_H
			
 
				-#define _TEST_ASSETS_H
			
 
				+#ifndef _TEST_ASSETS_TRIPLES_H
			
 
				+#define _TEST_ASSETS_TRIPLES_H
			
 
				 
			
 
				 #include "term.h"
			
 
				 
			
@@ -71,5 +71,5 @@ void free_triples (LSUP_Triple **trp)
 
				 
			
 
				     free(trp);
			
 
				 }
			
 
				-#endif  /* _TEST_ASSETS_H */
			
 
				+#endif  /* _TEST_ASSETS_TRIPLES_H */
			
 
				 
			
--- a/test/test_term.c
+++ b/test/test_term.c
@@ -63,6 +63,60 @@ static int test_iriref()
 
				 }
			
 
				 
			
 
				 
			
 
				+static int test_iriref_parts()
			
 
				+{
			
 
				+    char *data[17][4] = {
			
 
				+        {"http://example.org", "http://example.org", "", ""},
			
 
				+        {"http://example.org/", "http://example.org", "/", ""},
			
 
				+        {"http://example.org?option", "http://example.org", "?option", ""},
			
 
				+        {"http://example.org/?option", "http://example.org", "/?option", ""},
			
 
				+        {
			
 
				+                "http://example.org#anchor",
			
 
				+                "http://example.org", "#anchor", "anchor"},
			
 
				+        {
			
 
				+                "http://example.org/#anchor",
			
 
				+                "http://example.org", "/#anchor", "anchor"},
			
 
				+        {
			
 
				+                "http://example.org/?option#anchor",
			
 
				+                "http://example.org", "/?option#anchor", "anchor"},
			
 
				+        {
			
 
				+                "http://example.org?option#anchor",
			
 
				+                "http://example.org", "?option#anchor", "anchor"},
			
 
				+        {"ftp:///", "ftp://", "/", ""},
			
 
				+        {
			
 
				+                "file:///usr/local/lib/liblsuprdf.so",
			
 
				+                "file://", "/usr/local/lib/liblsuprdf.so", ""},
			
 
				+        {"/", "", "/", ""},
			
 
				+        {"/tmp", "", "/tmp", ""},
			
 
				+        {"./tmp", "", "./tmp", ""},
			
 
				+        {"tmp/test.nt", "", "tmp/test.nt", ""},
			
 
				+        {"", "", "", ""},
			
 
				+        {"#hello", "", "#hello", "hello"},
			
 
				+        {
			
 
				+                "urn:uuid:950404b6-0e4f-4e21-8267-c8c00e83563b",
			
 
				+                "urn:", "uuid:950404b6-0e4f-4e21-8267-c8c00e83563b", ""}
			
 
				+    };
			
 
				+
			
 
				+    for (size_t i = 0; i < 17; i++) {
			
 
				+        LSUP_Term *iri = LSUP_iriref_new(data[i][0], NULL);
			
 
				+        char
			
 
				+            *pfx = LSUP_iriref_prefix (iri),
			
 
				+            *path = LSUP_iriref_path (iri),
			
 
				+            *frag = LSUP_iriref_frag (iri);
			
 
				+
			
 
				+        EXPECT_STR_EQ (pfx, data[i][1]);
			
 
				+        EXPECT_STR_EQ (path, data[i][2]);
			
 
				+        EXPECT_STR_EQ (frag, data[i][3]);
			
 
				+
			
 
				+        free (pfx);
			
 
				+        free (path);
			
 
				+        free (frag);
			
 
				+        LSUP_term_free (iri);
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				 static int test_iriref_abs_rel()
			
 
				 {
			
 
				     LSUP_NSMap *nsm1 = LSUP_nsmap_new();
			
@@ -294,6 +348,7 @@ static int test_term_to_key()
 
				 
			
 
				 int term_tests() {
			
 
				     RUN (test_iriref);
			
 
				+    RUN (test_iriref_parts);
			
 
				     RUN (test_iriref_abs_rel);
			
 
				     RUN (test_literal);
			
 
				     RUN (test_term_copy);