Browse Source

Fix graph_contains function; fix wrongly passing tests.

Stefano Cossu 4 years ago
parent
commit
5f38184512
6 changed files with 74 additions and 38 deletions
  1. 42 20
      src/codec/nt_lexer.re
  2. 1 1
      src/codec_nt.c
  3. 5 7
      src/graph.c
  4. 13 6
      test/test_codec_nt.c
  5. 8 0
      test/test_graph.c
  6. 5 4
      test/test_term.c

+ 42 - 20
src/codec/nt_lexer.re

@@ -42,6 +42,19 @@ typedef struct {
 } ParseIterator;
 
 
+// TODO The opposite of this is in codec_nt.c. Find a better place for both.
+static inline char unescape_char(const char c) {
+    switch (c) {
+        case 't': return '\t';
+        case 'b': return '\b';
+        case 'n': return '\n';
+        case 'r': return '\r';
+        case 'f': return '\f';
+        default: return c;
+    }
+}
+
+
 static int fill(ParseIterator *it)
 {
     if (it->eof) {
@@ -107,31 +120,40 @@ static YYCTYPE *unescape_unicode (const YYCTYPE *esc_str, size_t size)
     size_t j = 0;
     YYCTYPE tmp_chr[5];
     for (size_t i = 0; i < size;) {
-        if (memcmp (esc_str + i, "\\u", 2) == 0) {
-            i += 2; // backslash + 'u'
+        if (esc_str[i] == '\\') {
+            i++; // Skip over '\\'
+
+            // 4-hex sequence.
+            if (esc_str[i] == 'u') {
+                i ++; // Skip over 'u'
+
+                // Use tmp_chr to hold the hex string for the code point.
+                memcpy(tmp_chr, esc_str + i, sizeof (tmp_chr) - 1);
+                tmp_chr[4] = '\0';
 
-            // Use tmp_chr to hold the hex string representing the code point.
-            memcpy(tmp_chr, esc_str + i, sizeof (tmp_chr) - 1);
-            tmp_chr[4] = '\0';
+                uint32_t tmp_val = strtol ((char*)tmp_chr, NULL, 16);
+                TRACE ("tmp_val: %d\n", tmp_val);
 
-            uint32_t tmp_val = strtol ((char*)tmp_chr, NULL, 16);
-            TRACE ("tmp_val: %d\n", tmp_val);
+                // Reuse tmp_chr to hold the byte values for the code point.
+                int nbytes = utf8_encode (tmp_val, tmp_chr);
 
-            // Reuse tmp_chr to hold the byte values for the code point.
-            int nbytes = utf8_encode (tmp_val, tmp_chr);
+                // Copy bytes into destination.
+                memcpy (uc_str + j, tmp_chr, nbytes);
+                TRACE ("UC byte value: %x %x\n", uc_str[j], uc_str[j + 1]);
 
-            // Copy bytes into destination.
-            memcpy (uc_str + j, tmp_chr, nbytes);
-            TRACE ("UC byte value: %x %x\n", uc_str[j], uc_str[j + 1]);
+                j += nbytes;
+                i += 4;
 
-            j += nbytes;
-            i += 4;
+            // 8-hex sequence.
+            } else if (esc_str[i] == 'U') {
+                i ++; // Skip over 'U'
+                fprintf (
+                        stderr,
+                        "UTF-16 sequence unescaping not yet implemented.\n");
+                return NULL; // TODO encode UTF-16
 
-        } else if (memcmp (esc_str + i, "\\U", 2) == 0) {
-            fprintf (
-                    stderr,
-                    "UTF-16 sequence unescaping not yet implemented.\n");
-            return NULL; // TODO encode UTF-16
+            // Unescape other escaped characters.
+            } else uc_str[j++] = unescape_char(esc_str[i++]);
         } else {
             // Copy ASCII char verbatim.
             uc_str[j++] = esc_str[i++];
@@ -254,7 +276,7 @@ loop:
     }
 
     BNODE {
-        YYCTYPE *data = unescape_unicode (it->tok + 2, YYCURSOR - it->tok - 1);
+        YYCTYPE *data = unescape_unicode (it->tok + 2, YYCURSOR - it->tok - 2);
 
         TRACE ("BNode data: %s\n", data);
 

+ 1 - 1
src/codec_nt.c

@@ -204,7 +204,6 @@ escape_lit (const char *in, char **out_p)
     size_t out_size = strlen (in) + 1;
 
     // Expand output string size to accommodate escape characters.
-    //size_t i = strcspn (in, LIT_ECHAR);
     for (
             size_t i = strcspn (in, LIT_ECHAR);
             i < strlen (in);
@@ -213,6 +212,7 @@ escape_lit (const char *in, char **out_p)
     }
 
     char *out = calloc (1, out_size);
+    if (UNLIKELY (!out)) return LSUP_MEM_ERR;
 
     size_t boundary;
     boundary = strcspn (in, LIT_ECHAR);

+ 5 - 7
src/graph.c

@@ -256,7 +256,6 @@ LSUP_graph_set_namespace (Graph *gr, LSUP_NSMap *nsm)
 size_t
 LSUP_graph_size (const Graph *gr)
 {
-    TRACE ("Store type: %d\n", gr->store_type);
     if (gr->store_type == LSUP_STORE_MEM)
         return LSUP_htstore_size (gr->ht_store);
 
@@ -404,13 +403,12 @@ LSUP_graph_lookup (const Graph *gr, const LSUP_Triple *spo, size_t *ct)
     LSUP_SerTriple *sspo = LSUP_striple_new_from_triple (spo);
     LSUP_Buffer *sc = LSUP_buffer_new_from_term (gr->uri);
 
-    if (gr->store_type == LSUP_STORE_MEM) {
-        it->ht_iter = LSUP_htstore_lookup (gr->ht_store, sspo);
+    if (it->graph->store_type == LSUP_STORE_MEM) {
+        it->ht_iter = LSUP_htstore_lookup (it->graph->ht_store, sspo);
         if (ct) *ct = it->ct;
 
-    } else {
-        it->mdb_iter = LSUP_mdbstore_lookup (gr->mdb_store, sspo, sc, ct);
-    }
+    } else it->mdb_iter = LSUP_mdbstore_lookup (
+            it->graph->mdb_store, sspo, sc, ct);
 
     LSUP_striple_free (sspo);
     LSUP_buffer_free (sc);
@@ -498,7 +496,7 @@ LSUP_graph_contains (const LSUP_Graph *gr, const LSUP_Triple *spo)
 {
     GraphIterator *it = LSUP_graph_lookup (gr, spo, NULL);
     LSUP_Triple *tmp_spo = LSUP_triple_new (TERM_DUMMY, TERM_DUMMY, TERM_DUMMY);
-    bool rc = LSUP_graph_iter_next (it, tmp_spo) != LSUP_NORESULT;
+    bool rc = LSUP_graph_iter_next (it, tmp_spo) != LSUP_END;
 
     LSUP_triple_free (tmp_spo);
     LSUP_graph_iter_free (it);

+ 13 - 6
test/test_codec_nt.c

@@ -32,7 +32,7 @@ init_terms (void)
 static nt_str start_nt[TERM_CT] = {
     "<urn:local:s1>",
     "<http://example.org/p1> ",
-    "\"hello\"^^http://www.w3.org/2001/XMLSchema#string",
+    "\"hello\"^^<http://www.w3.org/2001/XMLSchema#string>",
     "\"hello\" @en-US",
     "\"hello\"@es-ES   # Does \"Hello\" in Spanish mean \"hola\"?",
     "\"25\"   ^^<http://www.w3.org/2001/XMLSchema#integer>  ",
@@ -59,6 +59,9 @@ static nt_str end_nt[TERM_CT] = {
 // Start NT document. It may have comments and junk whitespace.
 static char *start_doc = (
     "<urn:local:s1> <http://example.org/p1> \"hello\" . #  Comment here.\n"
+    "<urn:local:s1> <http://example.org/p1> "
+        "\"hello\"^^<http://www.w3.org/2001/XMLSchema#string> .\n"
+    "<urn:local:s1> <http://example.org/p1>\"hello\"@en-US .\n"
     "<urn:local:s1> <http://example.org/p1>\"hello\"@es-ES .\n"
     "# Some comments\n# To make it a bit \n   #less boring.\n"
     "<urn:local:s1> <http://example.org/p1> _:bn1 .\n"
@@ -81,8 +84,9 @@ static char *bad_doc = (
 
 // End result NT document as it should be produced by the NT codec.
 // Lines should not be checked in strict order.
-static char *end_doc[6] = {
+static char *end_doc[7] = {
     "<urn:local:s1> <http://example.org/p1> \"hello\" .\n",
+    "<urn:local:s1> <http://example.org/p1> \"hello\"@en-US .\n",
     "<urn:local:s1> <http://example.org/p1> \"hello\"@es-ES .\n",
     "<urn:local:s1> <http://example.org/p1> _:bn1 .\n",
     "_:bn1 <http://example.org/p1> \"hello\"@es-ES .\n",
@@ -179,7 +183,7 @@ static int test_encode_nt_graph()
     LSUP_graph_free (gr);
     //printf("Serialized graph: %s\n", out);
 
-    for (int i = 0; i < 6; i++)
+    for (int i = 0; i < 7; i++)
         ASSERT (strstr (out, end_doc[i]) != NULL, end_doc[i]);
 
     free (out);
@@ -214,11 +218,14 @@ test_decode_nt_graph()
     fclose (input);
 
     ASSERT (err == NULL, "Error string is not NULL!");
-    EXPECT_INT_EQ (ct, 6);
-    EXPECT_INT_EQ (LSUP_graph_size (gr), 6);
+    EXPECT_INT_EQ (ct, 8);
+    EXPECT_INT_EQ (LSUP_graph_size (gr), 7);
 
-    for (int i = 0; i < 7; i++)
+    for (int i = 0; i < 7; i++) {
+        printf("Checking triple #%d... ", i);
         EXPECT_INT_EQ (LSUP_graph_contains (gr, trp + i), 1);
+        printf("OK.\n");
+    }
 
     LSUP_graph_free (gr);
 

+ 8 - 0
test/test_graph.c

@@ -40,6 +40,10 @@ test_graph_mem_add()
         printf ("OK.\n");
     }
 
+    LSUP_Triple *missing_trp = LSUP_triple_new (trp[1].s, trp[6].p, trp[4].o);
+    ASSERT (! LSUP_graph_contains (gr, missing_trp), "Triple in graph!");
+    free (missing_trp);
+
     free_triples (trp); // gr copied data.
 
     LSUP_graph_free (gr);
@@ -89,6 +93,10 @@ test_graph_mdb_add()
         printf ("OK.\n");
     }
 
+    LSUP_Triple *missing_trp = LSUP_triple_new (trp[1].s, trp[6].p, trp[4].o);
+    ASSERT (! LSUP_graph_contains (gr, missing_trp), "Triple in graph!");
+    free (missing_trp);
+
     free_triples (trp); // gr copied data.
 
     LSUP_graph_free (gr);

+ 5 - 4
test/test_term.c

@@ -80,11 +80,12 @@ static int test_term_to_key()
     LSUP_Term *uri = LSUP_uri_new ("http://hello.org");
     LSUP_Term *lit = LSUP_term_new (LSUP_TERM_LITERAL, "hello", NULL, NULL);
     LSUP_Term *tlit = LSUP_term_new(
-            LSUP_TERM_LITERAL, "hello", "xsd:string", NULL);
+            LSUP_TERM_LITERAL, "hello", DEFAULT_DTYPE, NULL);
     LSUP_Term *tllit1 = LSUP_term_new(
-            LSUP_TERM_LITERAL, "hello", "xsd:string", "en-US");
+            LSUP_TERM_LITERAL, "hello", NULL, "en-US");
     LSUP_Term *tllit2 = LSUP_term_new(
-            LSUP_TERM_LITERAL, "hello", "xsd:string", "en-GB");
+            LSUP_TERM_LITERAL, "hello",
+            "http://www.w3.org/2001/XMLSchema#string", "en-GB");
 
     LSUP_Key uri_key = LSUP_term_hash (uri);
     LSUP_Key lit_key = LSUP_term_hash (lit);
@@ -93,7 +94,7 @@ static int test_term_to_key()
     LSUP_Key tllit2_key = LSUP_term_hash (tllit2);
 
     ASSERT (uri_key != lit_key, "URI key conflict!");
-    ASSERT (lit_key != tlit_key, "URI key conflict!");
+    ASSERT (lit_key == tlit_key, "URI keys differ!");
     ASSERT (lit_key != tllit1_key, "URI key conflict!");
     ASSERT (tlit_key != tllit1_key, "URI key conflict!");
     ASSERT (tllit1_key != tllit2_key, "URI key conflict!");