|
@@ -133,9 +133,17 @@ loop: // Start new token.
|
|
|
|
|
|
*token_p = NULL;
|
|
|
|
|
|
-cont: // Continue token parsing.
|
|
|
+cont: // Continue token parsing. Do not move token start pointer.
|
|
|
/*!re2c
|
|
|
|
|
|
+ * {
|
|
|
+ log_warn (
|
|
|
+ "Invalid token @ %lu: %s (\\x%x)",
|
|
|
+ YYCURSOR - it->buf - 1, it->tok, *it->tok);
|
|
|
+
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+
|
|
|
$ {
|
|
|
log_trace ("End of document.");
|
|
|
return T_EOF;
|
|
@@ -148,10 +156,9 @@ cont: // Continue token parsing.
|
|
|
goto loop;
|
|
|
}
|
|
|
|
|
|
- '@prefix' {
|
|
|
- log_trace ("'@prefix' keyword.");
|
|
|
- return T_PREFIX;
|
|
|
- }
|
|
|
+ [\x22]{3} { goto lchar; }
|
|
|
+
|
|
|
+ [\x22] { goto schar; }
|
|
|
|
|
|
"true" | "false" {
|
|
|
*token_p = uint8_ndup (it->tok, YYCURSOR - it->tok);
|
|
@@ -167,11 +174,6 @@ cont: // Continue token parsing.
|
|
|
return T_IRIREF;
|
|
|
}
|
|
|
|
|
|
- NCWS 'a' / WS {
|
|
|
- log_trace ("RDF type shorthand 'a'.");
|
|
|
- return T_RDF_TYPE;
|
|
|
- }
|
|
|
-
|
|
|
NSTART_CHAR NAME_CHAR* {
|
|
|
*token_p = unescape_unicode (it->tok, YYCURSOR - it->tok);
|
|
|
log_trace ("ID name: %s", *token_p);
|
|
@@ -186,18 +188,21 @@ cont: // Continue token parsing.
|
|
|
return T_PFX;
|
|
|
}
|
|
|
|
|
|
- [\x22]{3} { goto lchar; }
|
|
|
+ WS {
|
|
|
+ log_trace ("Whitespace.");
|
|
|
+ return T_WS;
|
|
|
+ }
|
|
|
|
|
|
- [\x22] SCHAR* [\x22] {
|
|
|
- *token_p = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
|
|
|
- log_trace ("Long string: %s", *token_p);
|
|
|
+ '@prefix' {
|
|
|
+ log_trace ("'@prefix' keyword.");
|
|
|
|
|
|
- return T_STRING;
|
|
|
+ return T_PREFIX;
|
|
|
}
|
|
|
|
|
|
- WS {
|
|
|
- log_trace ("Whitespace.");
|
|
|
- return T_WS;
|
|
|
+ '@base' {
|
|
|
+ log_trace ("'@base' keyword.");
|
|
|
+
|
|
|
+ return T_BASE;
|
|
|
}
|
|
|
|
|
|
'@' [a-z]+ ('-' [a-z0-9]+)* {
|
|
@@ -242,13 +247,13 @@ cont: // Continue token parsing.
|
|
|
return T_DECIMAL;
|
|
|
}
|
|
|
|
|
|
- '(' { return T_LPAREN; }
|
|
|
+ '(' WS? { return T_LPAREN; }
|
|
|
|
|
|
- ')' { return T_RPAREN; }
|
|
|
+ WS? ')' { return T_RPAREN; }
|
|
|
|
|
|
- '[' { return T_LBRACKET; }
|
|
|
+ '[' WS? { return T_LBRACKET; }
|
|
|
|
|
|
- ']' { return T_RBRACKET; }
|
|
|
+ WS? ']' { return T_RBRACKET; }
|
|
|
|
|
|
':' { return T_COLON; }
|
|
|
|
|
@@ -262,28 +267,23 @@ cont: // Continue token parsing.
|
|
|
return T_EOS;
|
|
|
}
|
|
|
|
|
|
- '_:' { return T_BNODE_PFX; }
|
|
|
+ '_:' { goto bnode_id; }
|
|
|
|
|
|
'^^' { return T_DTYPE_MARKER; }
|
|
|
|
|
|
- '@base' {return T_BASE; }
|
|
|
-
|
|
|
COMMENT {
|
|
|
log_trace ("Comment: `%s`", it->tok);
|
|
|
goto loop;
|
|
|
}
|
|
|
|
|
|
- * {
|
|
|
- log_warn (
|
|
|
- "Invalid token @ %lu: %s (\\x%x)",
|
|
|
- YYCURSOR - it->buf - 1, it->tok, *it->tok);
|
|
|
-
|
|
|
- return -1;
|
|
|
+ WS "a" / WS {
|
|
|
+ log_trace ("RDF type shorthand 'a'.");
|
|
|
+ return T_RDF_TYPE;
|
|
|
}
|
|
|
|
|
|
*/
|
|
|
|
|
|
-lchar:
|
|
|
+schar:
|
|
|
/*!re2c
|
|
|
|
|
|
* {
|
|
@@ -294,6 +294,32 @@ lchar:
|
|
|
return -1;
|
|
|
}
|
|
|
|
|
|
+ $ {
|
|
|
+ log_warn ("Unterminated string!");
|
|
|
+
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+
|
|
|
+ SCHAR {
|
|
|
+ log_trace (
|
|
|
+ "Continue string token at position %lu: %c",
|
|
|
+ YYCURSOR - it->tok, *YYCURSOR);
|
|
|
+
|
|
|
+ goto schar;
|
|
|
+ }
|
|
|
+
|
|
|
+ [\x22] {
|
|
|
+ *token_p = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
|
|
|
+ log_trace ("String: %s", *token_p);
|
|
|
+
|
|
|
+ return T_STRING;
|
|
|
+ }
|
|
|
+
|
|
|
+ */
|
|
|
+
|
|
|
+lchar:
|
|
|
+ /*!re2c
|
|
|
+
|
|
|
$ {
|
|
|
log_warn ("Unterminated long string!");
|
|
|
|
|
@@ -312,7 +338,43 @@ lchar:
|
|
|
|
|
|
return T_STRING;
|
|
|
}
|
|
|
- */
|
|
|
+
|
|
|
+ * {
|
|
|
+ log_warn (
|
|
|
+ "Invalid token @ %lu: %s (\\x%x)",
|
|
|
+ YYCURSOR - it->buf - 1, it->tok, *it->tok);
|
|
|
+
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+
|
|
|
+ */
|
|
|
+
|
|
|
+bnode_id:
|
|
|
+ /*!re2c
|
|
|
+
|
|
|
+ NSTART_CHAR NAME_CHAR* {
|
|
|
+ *token_p = unescape_unicode (it->tok, YYCURSOR - it->tok);
|
|
|
+ log_trace ("ID name: %s", *token_p);
|
|
|
+
|
|
|
+ return T_BNODE_ID;
|
|
|
+ }
|
|
|
+
|
|
|
+ * {
|
|
|
+ log_warn (
|
|
|
+ "Invalid token @ %lu: %s (\\x%x)",
|
|
|
+ YYCURSOR - it->buf - 1, it->tok, *it->tok);
|
|
|
+
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+
|
|
|
+ $ {
|
|
|
+ log_trace ("End of document.");
|
|
|
+
|
|
|
+ return T_EOF;
|
|
|
+ }
|
|
|
+
|
|
|
+ */
|
|
|
+
|
|
|
}
|
|
|
|
|
|
|