Browse Source

Add UTF-8 test; remove debug statements from IRI parser.

scossu 1 week ago
parent
commit
fb9fb05794
2 changed files with 14 additions and 10 deletions
  1. 8 8
      src/term.c
  2. 6 2
      test/test_term.c

+ 8 - 8
src/term.c

@@ -933,7 +933,7 @@ parse_iri (char *iri_str, MatchCoord coord[]) {
     // Redundant if only called by term_init.
     // memset (coord, 0, sizeof(*coord));
 
-    log_debug ("Parsing IRI: %s", iri_str);
+    //log_debug ("Parsing IRI: %s", iri_str);
     // #2: ([^:/?#]+)
     while (
             *cur != ':' && *cur != '/' && *cur != '?'
@@ -948,7 +948,7 @@ parse_iri (char *iri_str, MatchCoord coord[]) {
         coord[2].offset = 0;
         coord[2].size = tmp.size;
         cur++;
-        log_debug ("Group #2: %lu, %lu", coord[2].offset, coord[2].size);
+        //log_debug ("Group #2: %lu, %lu", coord[2].offset, coord[2].size);
     } else cur = iri_str;  // Backtrack if no match.
 
     // Non-capturing: (?//([^/?#]*))?
@@ -964,20 +964,20 @@ parse_iri (char *iri_str, MatchCoord coord[]) {
         }
         coord[3].offset = tmp.offset;
         coord[3].size = tmp.size;
-        log_debug ("Group #3: %lu, %lu", coord[3].offset, coord[3].size);
+        //log_debug ("Group #3: %lu, %lu", coord[3].offset, coord[3].size);
     }
 
     // Capture group 1.
     coord[1].offset = 0;
     coord[1].size = cur - iri_str;
-    log_debug ("Group #1: %lu, %lu", coord[1].offset, coord[1].size);
+    //log_debug ("Group #1: %lu, %lu", coord[1].offset, coord[1].size);
 
     tmp.offset = cur - iri_str;
     tmp.size = 0;
 
     coord[4].offset = tmp.offset;
     coord[4].size = iri_len - tmp.offset;
-    log_debug ("Group #4: %lu, %lu", coord[4].offset, coord[4].size);
+    //log_debug ("Group #4: %lu, %lu", coord[4].offset, coord[4].size);
 
     // Non-capturing: (?[^?#]*)
     while (*cur != '?' && *cur != '#' && *cur != '\0') {
@@ -999,7 +999,7 @@ parse_iri (char *iri_str, MatchCoord coord[]) {
             // Got capture group #5.
             coord[5].offset = tmp.offset;
             coord[5].size = tmp.size;
-            log_debug ("Group #5: %lu, %lu", coord[5].offset, coord[5].size);
+            //log_debug ("Group #5: %lu, %lu", coord[5].offset, coord[5].size);
         }
     }
 
@@ -1008,12 +1008,12 @@ parse_iri (char *iri_str, MatchCoord coord[]) {
         // #6: (.*)
         coord[6].offset = ++cur - iri_str;
         coord[6].size = iri_str + iri_len - cur;
-        log_debug ("Group #6: %lu, %lu", coord[6].offset, coord[6].size);
+        //log_debug ("Group #6: %lu, %lu", coord[6].offset, coord[6].size);
     }
 
     coord[0].offset = 0;
     coord[0].size = iri_len;
-    log_debug ("Full match: %lu, %lu", coord[0].offset, coord[0].size);
+    //log_debug ("Full match: %lu, %lu", coord[0].offset, coord[0].size);
 
     return LSUP_OK;
 }

+ 6 - 2
test/test_term.c

@@ -65,7 +65,7 @@ static int test_iriref()
 
 static int test_iriref_parts()
 {
-    char *data[17][4] = {
+    char *data[18][4] = {
         {"http://example.org", "http://example.org", "", ""},
         {"http://example.org/", "http://example.org", "/", ""},
         {"http://example.org?option", "http://example.org", "?option", ""},
@@ -82,6 +82,10 @@ static int test_iriref_parts()
         {
                 "http://example.org?option#anchor",
                 "http://example.org", "?option#anchor", "anchor"},
+        {
+                "http://hanzi.edu/漢魏六朝隋碑誌索引/53?option#anchor",
+                "http://hanzi.edu", "/漢魏六朝隋碑誌索引/53?option#anchor",
+                "anchor"},
         {"ftp:///", "ftp://", "/", ""},
         {
                 "file:///usr/local/lib/liblsuprdf.so",
@@ -97,7 +101,7 @@ static int test_iriref_parts()
                 "urn:", "uuid:950404b6-0e4f-4e21-8267-c8c00e83563b", ""}
     };
 
-    for (size_t i = 0; i < 17; i++) {
+    for (size_t i = 0; i < 18; i++) {
         LSUP_Term *iri = LSUP_iriref_new(data[i][0], NULL);
         char
             *pfx = LSUP_iriref_prefix (iri),