4 years ago · 5f38184512
--- a/src/codec/nt_lexer.re
+++ b/src/codec/nt_lexer.re
@@ -42,6 +42,19 @@ typedef struct {
 
				 } ParseIterator;
			
 
				 
			
 
				 
			
 
				+// TODO The opposite of this is in codec_nt.c. Find a better place for both.
			
 
				+static inline char unescape_char(const char c) {
			
 
				+    switch (c) {
			
 
				+        case 't': return '\t';
			
 
				+        case 'b': return '\b';
			
 
				+        case 'n': return '\n';
			
 
				+        case 'r': return '\r';
			
 
				+        case 'f': return '\f';
			
 
				+        default: return c;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				 static int fill(ParseIterator *it)
			
 
				 {
			
 
				     if (it->eof) {
			
@@ -107,31 +120,40 @@ static YYCTYPE *unescape_unicode (const YYCTYPE *esc_str, size_t size)
 
				     size_t j = 0;
			
 
				     YYCTYPE tmp_chr[5];
			
 
				     for (size_t i = 0; i < size;) {
			
 
				-        if (memcmp (esc_str + i, "\\u", 2) == 0) {
			
 
				-            i += 2; // backslash + 'u'
			
 
				+        if (esc_str[i] == '\\') {
			
 
				+            i++; // Skip over '\\'
			
 
				+
			
 
				+            // 4-hex sequence.
			
 
				+            if (esc_str[i] == 'u') {
			
 
				+                i ++; // Skip over 'u'
			
 
				+
			
 
				+                // Use tmp_chr to hold the hex string for the code point.
			
 
				+                memcpy(tmp_chr, esc_str + i, sizeof (tmp_chr) - 1);
			
 
				+                tmp_chr[4] = '\0';
			
 
				 
			
 
				-            // Use tmp_chr to hold the hex string representing the code point.
			
 
				-            memcpy(tmp_chr, esc_str + i, sizeof (tmp_chr) - 1);
			
 
				-            tmp_chr[4] = '\0';
			
 
				+                uint32_t tmp_val = strtol ((char*)tmp_chr, NULL, 16);
			
 
				+                TRACE ("tmp_val: %d\n", tmp_val);
			
 
				 
			
 
				-            uint32_t tmp_val = strtol ((char*)tmp_chr, NULL, 16);
			
 
				-            TRACE ("tmp_val: %d\n", tmp_val);
			
 
				+                // Reuse tmp_chr to hold the byte values for the code point.
			
 
				+                int nbytes = utf8_encode (tmp_val, tmp_chr);
			
 
				 
			
 
				-            // Reuse tmp_chr to hold the byte values for the code point.
			
 
				-            int nbytes = utf8_encode (tmp_val, tmp_chr);
			
 
				+                // Copy bytes into destination.
			
 
				+                memcpy (uc_str + j, tmp_chr, nbytes);
			
 
				+                TRACE ("UC byte value: %x %x\n", uc_str[j], uc_str[j + 1]);
			
 
				 
			
 
				-            // Copy bytes into destination.
			
 
				-            memcpy (uc_str + j, tmp_chr, nbytes);
			
 
				-            TRACE ("UC byte value: %x %x\n", uc_str[j], uc_str[j + 1]);
			
 
				+                j += nbytes;
			
 
				+                i += 4;
			
 
				 
			
 
				-            j += nbytes;
			
 
				-            i += 4;
			
 
				+            // 8-hex sequence.
			
 
				+            } else if (esc_str[i] == 'U') {
			
 
				+                i ++; // Skip over 'U'
			
 
				+                fprintf (
			
 
				+                        stderr,
			
 
				+                        "UTF-16 sequence unescaping not yet implemented.\n");
			
 
				+                return NULL; // TODO encode UTF-16
			
 
				 
			
 
				-        } else if (memcmp (esc_str + i, "\\U", 2) == 0) {
			
 
				-            fprintf (
			
 
				-                    stderr,
			
 
				-                    "UTF-16 sequence unescaping not yet implemented.\n");
			
 
				-            return NULL; // TODO encode UTF-16
			
 
				+            // Unescape other escaped characters.
			
 
				+            } else uc_str[j++] = unescape_char(esc_str[i++]);
			
 
				         } else {
			
 
				             // Copy ASCII char verbatim.
			
 
				             uc_str[j++] = esc_str[i++];
			
@@ -254,7 +276,7 @@ loop:
 
				     }
			
 
				 
			
 
				     BNODE {
			
 
				-        YYCTYPE *data = unescape_unicode (it->tok + 2, YYCURSOR - it->tok - 1);
			
 
				+        YYCTYPE *data = unescape_unicode (it->tok + 2, YYCURSOR - it->tok - 2);
			
 
				 
			
 
				         TRACE ("BNode data: %s\n", data);
			
 
				 
			
--- a/src/codec_nt.c
+++ b/src/codec_nt.c
@@ -204,7 +204,6 @@ escape_lit (const char *in, char **out_p)
 
				     size_t out_size = strlen (in) + 1;
			
 
				 
			
 
				     // Expand output string size to accommodate escape characters.
			
 
				-    //size_t i = strcspn (in, LIT_ECHAR);
			
 
				     for (
			
 
				             size_t i = strcspn (in, LIT_ECHAR);
			
 
				             i < strlen (in);
			
@@ -213,6 +212,7 @@ escape_lit (const char *in, char **out_p)
 
				     }
			
 
				 
			
 
				     char *out = calloc (1, out_size);
			
 
				+    if (UNLIKELY (!out)) return LSUP_MEM_ERR;
			
 
				 
			
 
				     size_t boundary;
			
 
				     boundary = strcspn (in, LIT_ECHAR);
			
--- a/src/graph.c
+++ b/src/graph.c
@@ -256,7 +256,6 @@ LSUP_graph_set_namespace (Graph *gr, LSUP_NSMap *nsm)
 
				 size_t
			
 
				 LSUP_graph_size (const Graph *gr)
			
 
				 {
			
 
				-    TRACE ("Store type: %d\n", gr->store_type);
			
 
				     if (gr->store_type == LSUP_STORE_MEM)
			
 
				         return LSUP_htstore_size (gr->ht_store);
			
 
				 
			
@@ -404,13 +403,12 @@ LSUP_graph_lookup (const Graph *gr, const LSUP_Triple *spo, size_t *ct)
 
				     LSUP_SerTriple *sspo = LSUP_striple_new_from_triple (spo);
			
 
				     LSUP_Buffer *sc = LSUP_buffer_new_from_term (gr->uri);
			
 
				 
			
 
				-    if (gr->store_type == LSUP_STORE_MEM) {
			
 
				-        it->ht_iter = LSUP_htstore_lookup (gr->ht_store, sspo);
			
 
				+    if (it->graph->store_type == LSUP_STORE_MEM) {
			
 
				+        it->ht_iter = LSUP_htstore_lookup (it->graph->ht_store, sspo);
			
 
				         if (ct) *ct = it->ct;
			
 
				 
			
 
				-    } else {
			
 
				-        it->mdb_iter = LSUP_mdbstore_lookup (gr->mdb_store, sspo, sc, ct);
			
 
				-    }
			
 
				+    } else it->mdb_iter = LSUP_mdbstore_lookup (
			
 
				+            it->graph->mdb_store, sspo, sc, ct);
			
 
				 
			
 
				     LSUP_striple_free (sspo);
			
 
				     LSUP_buffer_free (sc);
			
@@ -498,7 +496,7 @@ LSUP_graph_contains (const LSUP_Graph *gr, const LSUP_Triple *spo)
 
				 {
			
 
				     GraphIterator *it = LSUP_graph_lookup (gr, spo, NULL);
			
 
				     LSUP_Triple *tmp_spo = LSUP_triple_new (TERM_DUMMY, TERM_DUMMY, TERM_DUMMY);
			
 
				-    bool rc = LSUP_graph_iter_next (it, tmp_spo) != LSUP_NORESULT;
			
 
				+    bool rc = LSUP_graph_iter_next (it, tmp_spo) != LSUP_END;
			
 
				 
			
 
				     LSUP_triple_free (tmp_spo);
			
 
				     LSUP_graph_iter_free (it);
			
--- a/test/test_codec_nt.c
+++ b/test/test_codec_nt.c
@@ -32,7 +32,7 @@ init_terms (void)
 
				 static nt_str start_nt[TERM_CT] = {
			
 
				     "<urn:local:s1>",
			
 
				     "<http://example.org/p1> ",
			
 
				-    "\"hello\"^^http://www.w3.org/2001/XMLSchema#string",
			
 
				+    "\"hello\"^^<http://www.w3.org/2001/XMLSchema#string>",
			
 
				     "\"hello\" @en-US",
			
 
				     "\"hello\"@es-ES   # Does \"Hello\" in Spanish mean \"hola\"?",
			
 
				     "\"25\"   ^^<http://www.w3.org/2001/XMLSchema#integer>  ",
			
@@ -59,6 +59,9 @@ static nt_str end_nt[TERM_CT] = {
 
				 // Start NT document. It may have comments and junk whitespace.
			
 
				 static char *start_doc = (
			
 
				     "<urn:local:s1> <http://example.org/p1> \"hello\" . #  Comment here.\n"
			
 
				+    "<urn:local:s1> <http://example.org/p1> "
			
 
				+        "\"hello\"^^<http://www.w3.org/2001/XMLSchema#string> .\n"
			
 
				+    "<urn:local:s1> <http://example.org/p1>\"hello\"@en-US .\n"
			
 
				     "<urn:local:s1> <http://example.org/p1>\"hello\"@es-ES .\n"
			
 
				     "# Some comments\n# To make it a bit \n   #less boring.\n"
			
 
				     "<urn:local:s1> <http://example.org/p1> _:bn1 .\n"
			
@@ -81,8 +84,9 @@ static char *bad_doc = (
 
				 
			
 
				 // End result NT document as it should be produced by the NT codec.
			
 
				 // Lines should not be checked in strict order.
			
 
				-static char *end_doc[6] = {
			
 
				+static char *end_doc[7] = {
			
 
				     "<urn:local:s1> <http://example.org/p1> \"hello\" .\n",
			
 
				+    "<urn:local:s1> <http://example.org/p1> \"hello\"@en-US .\n",
			
 
				     "<urn:local:s1> <http://example.org/p1> \"hello\"@es-ES .\n",
			
 
				     "<urn:local:s1> <http://example.org/p1> _:bn1 .\n",
			
 
				     "_:bn1 <http://example.org/p1> \"hello\"@es-ES .\n",
			
@@ -179,7 +183,7 @@ static int test_encode_nt_graph()
 
				     LSUP_graph_free (gr);
			
 
				     //printf("Serialized graph: %s\n", out);
			
 
				 
			
 
				-    for (int i = 0; i < 6; i++)
			
 
				+    for (int i = 0; i < 7; i++)
			
 
				         ASSERT (strstr (out, end_doc[i]) != NULL, end_doc[i]);
			
 
				 
			
 
				     free (out);
			
@@ -214,11 +218,14 @@ test_decode_nt_graph()
 
				     fclose (input);
			
 
				 
			
 
				     ASSERT (err == NULL, "Error string is not NULL!");
			
 
				-    EXPECT_INT_EQ (ct, 6);
			
 
				-    EXPECT_INT_EQ (LSUP_graph_size (gr), 6);
			
 
				+    EXPECT_INT_EQ (ct, 8);
			
 
				+    EXPECT_INT_EQ (LSUP_graph_size (gr), 7);
			
 
				 
			
 
				-    for (int i = 0; i < 7; i++)
			
 
				+    for (int i = 0; i < 7; i++) {
			
 
				+        printf("Checking triple #%d... ", i);
			
 
				         EXPECT_INT_EQ (LSUP_graph_contains (gr, trp + i), 1);
			
 
				+        printf("OK.\n");
			
 
				+    }
			
 
				 
			
 
				     LSUP_graph_free (gr);
			
 
				 
			
--- a/test/test_graph.c
+++ b/test/test_graph.c
@@ -40,6 +40,10 @@ test_graph_mem_add()
 
				         printf ("OK.\n");
			
 
				     }
			
 
				 
			
 
				+    LSUP_Triple *missing_trp = LSUP_triple_new (trp[1].s, trp[6].p, trp[4].o);
			
 
				+    ASSERT (! LSUP_graph_contains (gr, missing_trp), "Triple in graph!");
			
 
				+    free (missing_trp);
			
 
				+
			
 
				     free_triples (trp); // gr copied data.
			
 
				 
			
 
				     LSUP_graph_free (gr);
			
@@ -89,6 +93,10 @@ test_graph_mdb_add()
 
				         printf ("OK.\n");
			
 
				     }
			
 
				 
			
 
				+    LSUP_Triple *missing_trp = LSUP_triple_new (trp[1].s, trp[6].p, trp[4].o);
			
 
				+    ASSERT (! LSUP_graph_contains (gr, missing_trp), "Triple in graph!");
			
 
				+    free (missing_trp);
			
 
				+
			
 
				     free_triples (trp); // gr copied data.
			
 
				 
			
 
				     LSUP_graph_free (gr);
			
--- a/test/test_term.c
+++ b/test/test_term.c
@@ -80,11 +80,12 @@ static int test_term_to_key()
 
				     LSUP_Term *uri = LSUP_uri_new ("http://hello.org");
			
 
				     LSUP_Term *lit = LSUP_term_new (LSUP_TERM_LITERAL, "hello", NULL, NULL);
			
 
				     LSUP_Term *tlit = LSUP_term_new(
			
 
				-            LSUP_TERM_LITERAL, "hello", "xsd:string", NULL);
			
 
				+            LSUP_TERM_LITERAL, "hello", DEFAULT_DTYPE, NULL);
			
 
				     LSUP_Term *tllit1 = LSUP_term_new(
			
 
				-            LSUP_TERM_LITERAL, "hello", "xsd:string", "en-US");
			
 
				+            LSUP_TERM_LITERAL, "hello", NULL, "en-US");
			
 
				     LSUP_Term *tllit2 = LSUP_term_new(
			
 
				-            LSUP_TERM_LITERAL, "hello", "xsd:string", "en-GB");
			
 
				+            LSUP_TERM_LITERAL, "hello",
			
 
				+            "http://www.w3.org/2001/XMLSchema#string", "en-GB");
			
 
				 
			
 
				     LSUP_Key uri_key = LSUP_term_hash (uri);
			
 
				     LSUP_Key lit_key = LSUP_term_hash (lit);
			
@@ -93,7 +94,7 @@ static int test_term_to_key()
 
				     LSUP_Key tllit2_key = LSUP_term_hash (tllit2);
			
 
				 
			
 
				     ASSERT (uri_key != lit_key, "URI key conflict!");
			
 
				-    ASSERT (lit_key != tlit_key, "URI key conflict!");
			
 
				+    ASSERT (lit_key == tlit_key, "URI keys differ!");
			
 
				     ASSERT (lit_key != tllit1_key, "URI key conflict!");
			
 
				     ASSERT (tlit_key != tllit1_key, "URI key conflict!");
			
 
				     ASSERT (tllit1_key != tllit2_key, "URI key conflict!");