Browse Source

ompat @prefix token.

Stefano Cossu 2 years ago
parent
commit
c4ab528a26
2 changed files with 43 additions and 63 deletions
  1. 17 29
      src/codec/grammar_ttl.y
  2. 26 34
      src/codec/lexer_ttl.re

+ 17 - 29
src/codec/grammar_ttl.y

@@ -54,11 +54,11 @@
 
 %extra_argument         { LSUP_TTLParserState *state }
 
-%left WS .
-%left EOS .
+%left PERIOD .
 %left SEMICOLON .
 %left COMMA .
-%nonassoc PFX .
+%left STRING INTEGER DOUBLE DECIMAL BOOLEAN QNAME BNODE_ID IRIREF .
+%nonassoc LANGTAG PREFIX .
 %nonassoc COLON .
 
 /*
@@ -66,28 +66,23 @@
  */
 
 turtleDoc   ::= statements EOF .
-
 statements  ::= statements statement .
 statements  ::= .
 
-statement   ::= directive .
+statement 	::= prefixID .
+statement   ::= base .
 statement   ::= triples .
 
-directive 	::= prefixID EOS .
-directive   ::= base EOS .
 
-prefixID    ::= PREFIX PFX(P) IRIREF(N) . {
+prefixID    ::= PREFIX(P) WS IRIREF(N) PERIOD . {
                 LSUP_nsmap_add (state->nsm, P, N);
             }
-prefixID    ::= PREFIX COLON IRIREF(N) . {
-                LSUP_nsmap_add (state->nsm, "", N);
-            }
 
-base        ::= BASE IRIREF(D) . {
+base        ::= BASE IRIREF(D) PERIOD . {
                 state->base = LSUP_iriref_new (D, NULL);
             }
 
-triples 	::= subject(S) predObjList(L) EOS . {
+triples 	::= subject(S) predObjList(L) PERIOD . {
                 size_t ct = LSUP_spo_list_add_triples (state->it, S, L);
                 state->ct += ct;
                 state->rc = LSUP_OK;
@@ -112,10 +107,10 @@ predObjList(A) ::= predObjList(L) SEMICOLON predicate(P) objectList(O) . {
             }
 
 %type objectList { LSUP_Term ** }
-objectList(A) ::= objectList(L) COMMA object(O) . [COMMA] {
+objectList(A) ::= objectList(L) COMMA object(O) . {
                 A = LSUP_obj_list_add (L, O);
             }
-objectList(A) ::= object(O) . [COMMA] {
+objectList(A) ::= object(O) . [IRIREF] {
                 A = calloc (sizeof (*A), 2);
                 A[0] = O;
             }
@@ -169,17 +164,17 @@ literal(A)  ::= BOOLEAN(D) . {
 blank(A)    ::= BNODE_ID(D) . {
                 A = LSUP_term_new (LSUP_TERM_BNODE, D, NULL);
             }
-blank(A)    ::= LBRACKET RBRACKET . {
+blank(A)    ::= LBRACKET RBRACKET . [BNODE_ID] {
                 A = LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL);
             }
-blank(A)    ::= LBRACKET predObjList(L) RBRACKET . {
+blank(A)    ::= LBRACKET ows predObjList(L) ows RBRACKET . [BNODE_ID] {
                 A = LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL);
                 state->ct += LSUP_spo_list_add_triples (state->it, A, L);
 
                 LSUP_pred_obj_list_free (L);
             }
-blank       ::= collection .
-blank(A)    ::= LPAREN RPAREN . {
+blank       ::= collection . [BNODE_ID]
+blank(A)    ::= LPAREN RPAREN . [BNODE_ID] {
                 A = LSUP_iriref_new ("rdf:nil", state->nsm);
             }
 
@@ -210,17 +205,10 @@ resource(A) ::= IRIREF(D) . {
                     A = rel_iri;
                 }
             }
-resource(A) ::= qname(D) . { A = LSUP_iriref_new (D, state->nsm); }
+resource(A) ::= QNAME(D) . { A = LSUP_iriref_new (D, state->nsm); }
 
-qname(A)    ::= PFX(P) IDNAME(D) . {
-                A = malloc (strlen (P) + strlen (D) + 2);
-                sprintf (A, "%s:%s", P, D);
-            }
-qname(A)    ::= COLON IDNAME(D) . {
-                A = malloc (strlen (D) + 2);
-                sprintf (A, ":%s", D);
-            }
-qname(A)    ::= COLON . { A = strndup (":", 2); }
+ows         ::= WS .
+ows         ::= .
 
 /*
  * This has been adapted from

+ 26 - 34
src/codec/lexer_ttl.re

@@ -85,6 +85,8 @@ void TTLParseTrace();
 
 static int lex (ParseIterator *it, YYCTYPE **token_p)
 {
+    const YYCTYPE *pfx;
+
     /*!re2c
 
     re2c:eof = 0;
@@ -168,41 +170,43 @@ cont: // Continue token parsing. Do not move token start pointer.
     }
 
     '<' UCHAR* '>' {
-        *token_p = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
+        *token_p = uint8_ndup (it->tok + 1, YYCURSOR - it->tok - 2);
         log_trace ("URI data: %s", *token_p);
 
         return T_IRIREF;
     }
 
-    NSTART_CHAR NAME_CHAR* {
-        *token_p = unescape_unicode (it->tok, YYCURSOR - it->tok);
-        log_trace ("ID name: %s", *token_p);
+    '@prefix' WS @pfx (PSTART_CHAR NAME_CHAR*)? ":" {
+        *token_p = uint8_ndup (pfx, YYCURSOR - pfx - 1);
+        log_trace ("Prefix declaration: '%s'", *token_p);
 
-        return T_IDNAME;
+        return T_PREFIX;
     }
 
-    PSTART_CHAR NAME_CHAR* ':' {
-        *token_p = uint8_ndup (it->tok, YYCURSOR - it->tok - 1);
-        log_trace ("Prefix name: '%s'", *token_p);
+    '@base' {
+        log_trace ("'@base' keyword.");
 
-        return T_PFX;
+        return T_BASE;
     }
 
-    WS {
-        log_trace ("Whitespace.");
-        return T_WS;
+    (PSTART_CHAR NAME_CHAR*)? ":" (NSTART_CHAR NAME_CHAR*)? {
+        *token_p = uint8_ndup (it->tok, YYCURSOR - it->tok);
+        log_trace ("ID name: %s", *token_p);
+
+        return T_QNAME;
     }
 
-    '@prefix' {
-        log_trace ("'@prefix' keyword.");
+    '_:'  NSTART_CHAR NAME_CHAR* {
+        *token_p = uint8_ndup (it->tok + 2, YYCURSOR - it->tok - 2);
+        log_trace ("BNode name: %s", *token_p);
 
-        return T_PREFIX;
+        return T_BNODE_ID;
     }
 
-    '@base' {
-        log_trace ("'@base' keyword.");
 
-        return T_BASE;
+    WS {
+        log_trace ("Whitespace.");
+        return T_WS;
     }
 
     '@' [a-z]+ ('-' [a-z0-9]+)* {
@@ -264,11 +268,9 @@ cont: // Continue token parsing. Do not move token start pointer.
     '.' {
         log_trace ("End of statement #%u.", it->stmt);
         it->stmt++;
-        return T_EOS;
+        return T_PERIOD;
     }
 
-    '_:' { goto bnode_id; }
-
     '^^' { return T_DTYPE_MARKER; }
 
     COMMENT {
@@ -276,7 +278,7 @@ cont: // Continue token parsing. Do not move token start pointer.
         goto loop;
     }
 
-    WS "a" / WS {
+    "a" / WS {
         log_trace ("RDF type shorthand 'a'.");
         return T_RDF_TYPE;
     }
@@ -300,13 +302,7 @@ schar:
         return -1;
     }
 
-    SCHAR {
-        log_trace (
-                "Continue string token at position %lu: %c",
-                YYCURSOR - it->tok, *YYCURSOR);
-
-        goto schar;
-    }
+    SCHAR { goto schar; }
 
     [\x22] {
         *token_p = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
@@ -326,11 +322,7 @@ lchar:
         return -1;
     }
 
-    LCHAR {
-        log_trace ("Continue long string token: %s", it->tok);
-
-        goto lchar;
-    }
+    LCHAR { goto lchar; }
 
     [\x22]{3} {
         *token_p = unescape_unicode (it->tok + 3, YYCURSOR - it->tok - 6);