|
@@ -27,15 +27,17 @@
|
|
typedef struct {
|
|
typedef struct {
|
|
FILE * file; // Input file handle.
|
|
FILE * file; // Input file handle.
|
|
YYCTYPE * buf, // Start of buffer.
|
|
YYCTYPE * buf, // Start of buffer.
|
|
- * lim, // Position after the last
|
|
|
|
- // available input character
|
|
|
|
- // (YYLIMIT)
|
|
|
|
|
|
+ * lim, // Position after the last available input
|
|
|
|
+ // character (YYLIMIT).
|
|
* cur, // Next input character to be read
|
|
* cur, // Next input character to be read
|
|
// (YYCURSOR)
|
|
// (YYCURSOR)
|
|
* mar, // Most recent match (YYMARKER)
|
|
* mar, // Most recent match (YYMARKER)
|
|
- * tok; // Start of current token.
|
|
|
|
- size_t ct; // Number of parsed triples.
|
|
|
|
- int eof; // if we have reached EOF (T|F)
|
|
|
|
|
|
+ * tok, // Start of current token.
|
|
|
|
+ * bol; // Address of the beginning of the current
|
|
|
|
+ // line (for debugging).
|
|
|
|
+ unsigned line; // Current line no. (for debugging).
|
|
|
|
+ unsigned ct; // Number of parsed triples.
|
|
|
|
+ bool eof; // if we have reached EOF.
|
|
/*!stags:re2c format = "YYCTYPE *@@;"; */
|
|
/*!stags:re2c format = "YYCTYPE *@@;"; */
|
|
} ParseIterator;
|
|
} ParseIterator;
|
|
|
|
|
|
@@ -49,7 +51,7 @@ static int fill(ParseIterator *it)
|
|
if (shift < 1) {
|
|
if (shift < 1) {
|
|
return 2;
|
|
return 2;
|
|
}
|
|
}
|
|
- printf ("Shifting bytes: %lu\n", shift);
|
|
|
|
|
|
+ TRACE ("Shifting bytes: %lu\n", shift);
|
|
memmove(it->buf, it->tok, it->lim - it->tok);
|
|
memmove(it->buf, it->tok, it->lim - it->tok);
|
|
it->lim -= shift;
|
|
it->lim -= shift;
|
|
it->cur -= shift;
|
|
it->cur -= shift;
|
|
@@ -68,6 +70,8 @@ static void parse_init(ParseIterator *it, FILE *file)
|
|
it->file = file;
|
|
it->file = file;
|
|
it->buf = malloc (CHUNK_SIZE + 1);
|
|
it->buf = malloc (CHUNK_SIZE + 1);
|
|
it->cur = it->mar = it->tok = it->lim = it->buf + CHUNK_SIZE;
|
|
it->cur = it->mar = it->tok = it->lim = it->buf + CHUNK_SIZE;
|
|
|
|
+ it->line = 1;
|
|
|
|
+ it->bol = it->buf;
|
|
it->ct = 0;
|
|
it->ct = 0;
|
|
it->eof = 0;
|
|
it->eof = 0;
|
|
/*!stags:re2c format = "it->@@ = NULL; "; */
|
|
/*!stags:re2c format = "it->@@ = NULL; "; */
|
|
@@ -173,8 +177,9 @@ loop:
|
|
|
|
|
|
// For unresolved and partially resolved inconsistencies of the spec, see
|
|
// For unresolved and partially resolved inconsistencies of the spec, see
|
|
// https://lists.w3.org/Archives/Public/public-rdf-comments/2017Jun/0000.html
|
|
// https://lists.w3.org/Archives/Public/public-rdf-comments/2017Jun/0000.html
|
|
- WS = [\x09\x20]+;
|
|
|
|
- EOL = WS? [\x0D\x0A]+;
|
|
|
|
|
|
+ _WS = [\x09\x20];
|
|
|
|
+ WS = _WS+;
|
|
|
|
+ EOL = [\x0D\x0A] (_WS | [\x0D\x0A])*;
|
|
DOT = [.];
|
|
DOT = [.];
|
|
HEX = [0-9A-Fa-f];
|
|
HEX = [0-9A-Fa-f];
|
|
ECHAR = [\\] [tbnrf"'\\];
|
|
ECHAR = [\\] [tbnrf"'\\];
|
|
@@ -187,25 +192,27 @@ loop:
|
|
LANGTAG = [@] [a-zA-Z]+ ("-" [a-zA-Z0-9]+)*;
|
|
LANGTAG = [@] [a-zA-Z]+ ("-" [a-zA-Z0-9]+)*;
|
|
|
|
|
|
IRIREF = [<] IRI_CHARS [>];
|
|
IRIREF = [<] IRI_CHARS [>];
|
|
- LITERAL = LITERAL_QUOTE @lit_data_e WS? ("^^" WS? @dtype_s IRIREF | @lang_s LANGTAG)?;
|
|
|
|
|
|
+ LITERAL = LITERAL_QUOTE @lit_data_e _WS* ("^^" _WS* @dtype_s IRIREF | @lang_s LANGTAG)?;
|
|
BNODE = "_:" ((PN_CHARS_U | [0-9]) ((PN_CHARS | ".")* PN_CHARS)?);
|
|
BNODE = "_:" ((PN_CHARS_U | [0-9]) ((PN_CHARS | ".")* PN_CHARS)?);
|
|
COMMENT = "#" .*;
|
|
COMMENT = "#" .*;
|
|
|
|
|
|
|
|
|
|
EOL {
|
|
EOL {
|
|
- printf("End of line.\n");
|
|
|
|
|
|
+ it->line ++;
|
|
|
|
+ it->bol = YYCURSOR;
|
|
|
|
+ TRACE("New line: #%u.\n", it->line);
|
|
return T_EOL;
|
|
return T_EOL;
|
|
}
|
|
}
|
|
|
|
|
|
$ {
|
|
$ {
|
|
- printf("End of buffer.\n");
|
|
|
|
|
|
+ TRACE(STR, "End of buffer.\n");
|
|
return T_EOF;
|
|
return T_EOF;
|
|
}
|
|
}
|
|
|
|
|
|
IRIREF {
|
|
IRIREF {
|
|
YYCTYPE *data = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
|
|
YYCTYPE *data = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
|
|
|
|
|
|
- printf ("URI data: %s\n", data);
|
|
|
|
|
|
+ TRACE ("URI data: %s\n", data);
|
|
|
|
|
|
*term = LSUP_uri_new ((char*)data);
|
|
*term = LSUP_uri_new ((char*)data);
|
|
free (data);
|
|
free (data);
|
|
@@ -217,7 +224,7 @@ loop:
|
|
// Only unescape Unicode from data.
|
|
// Only unescape Unicode from data.
|
|
size_t size = lit_data_e - it->tok - 2;
|
|
size_t size = lit_data_e - it->tok - 2;
|
|
YYCTYPE *data = unescape_unicode (it->tok + 1, size);
|
|
YYCTYPE *data = unescape_unicode (it->tok + 1, size);
|
|
- printf ("Literal data: %s\n", data);
|
|
|
|
|
|
+ TRACE ("Literal data: %s\n", data);
|
|
|
|
|
|
YYCTYPE *datatype = NULL, *lang = NULL;
|
|
YYCTYPE *datatype = NULL, *lang = NULL;
|
|
|
|
|
|
@@ -226,7 +233,7 @@ loop:
|
|
datatype = malloc (size);
|
|
datatype = malloc (size);
|
|
memcpy (datatype, dtype_s + 1, size);
|
|
memcpy (datatype, dtype_s + 1, size);
|
|
datatype [size - 1] = '\0';
|
|
datatype [size - 1] = '\0';
|
|
- printf ("datatype: %s\n", datatype);
|
|
|
|
|
|
+ TRACE ("datatype: %s\n", datatype);
|
|
}
|
|
}
|
|
|
|
|
|
if (lang_s) {
|
|
if (lang_s) {
|
|
@@ -234,7 +241,7 @@ loop:
|
|
lang = malloc (size);
|
|
lang = malloc (size);
|
|
memcpy (lang, lang_s + 1, size);
|
|
memcpy (lang, lang_s + 1, size);
|
|
lang [size - 1] = '\0';
|
|
lang [size - 1] = '\0';
|
|
- printf ("lang: %s\n", lang);
|
|
|
|
|
|
+ TRACE ("lang: %s\n", lang);
|
|
}
|
|
}
|
|
|
|
|
|
*term = LSUP_term_new (LSUP_TERM_LITERAL, (char*)data, (char*)datatype, (char*)lang);
|
|
*term = LSUP_term_new (LSUP_TERM_LITERAL, (char*)data, (char*)datatype, (char*)lang);
|
|
@@ -249,7 +256,7 @@ loop:
|
|
BNODE {
|
|
BNODE {
|
|
YYCTYPE *data = unescape_unicode (it->tok + 2, YYCURSOR - it->tok - 1);
|
|
YYCTYPE *data = unescape_unicode (it->tok + 2, YYCURSOR - it->tok - 1);
|
|
|
|
|
|
- printf ("BNode data: %s\n", data);
|
|
|
|
|
|
+ TRACE ("BNode data: %s\n", data);
|
|
|
|
|
|
*term = LSUP_term_new (LSUP_TERM_BNODE, (char*)data, NULL, NULL);
|
|
*term = LSUP_term_new (LSUP_TERM_BNODE, (char*)data, NULL, NULL);
|
|
free (data);
|
|
free (data);
|
|
@@ -258,14 +265,14 @@ loop:
|
|
}
|
|
}
|
|
|
|
|
|
DOT {
|
|
DOT {
|
|
- printf ("End of triple.\n");
|
|
|
|
|
|
+ TRACE (STR, "End of triple.\n");
|
|
it->ct ++;
|
|
it->ct ++;
|
|
|
|
|
|
return T_DOT;
|
|
return T_DOT;
|
|
}
|
|
}
|
|
|
|
|
|
WS {
|
|
WS {
|
|
- printf("Separator.\n");
|
|
|
|
|
|
+ TRACE (STR, "Separator.\n");
|
|
|
|
|
|
return T_WS;
|
|
return T_WS;
|
|
}
|
|
}
|
|
@@ -275,14 +282,14 @@ loop:
|
|
YYCTYPE *data = malloc (size);
|
|
YYCTYPE *data = malloc (size);
|
|
memcpy (data, it->tok, size);
|
|
memcpy (data, it->tok, size);
|
|
data [size - 1] = '\0';
|
|
data [size - 1] = '\0';
|
|
- printf ("Comment: `%s`\n", data);
|
|
|
|
|
|
+ TRACE ("Comment: `%s`\n", data);
|
|
free (data);
|
|
free (data);
|
|
|
|
|
|
goto loop;
|
|
goto loop;
|
|
}
|
|
}
|
|
|
|
|
|
* {
|
|
* {
|
|
- printf (
|
|
|
|
|
|
+ TRACE (
|
|
"Invalid token @ %lu: %s (\\x%x)\n",
|
|
"Invalid token @ %lu: %s (\\x%x)\n",
|
|
YYCURSOR - it->buf - 1, it->tok, *it->tok);
|
|
YYCURSOR - it->buf - 1, it->tok, *it->tok);
|
|
|
|
|
|
@@ -317,8 +324,11 @@ LSUP_nt_parse_term (const char *rep, const LSUP_NSMap *map, LSUP_Term **term)
|
|
}
|
|
}
|
|
|
|
|
|
LSUP_rc
|
|
LSUP_rc
|
|
-LSUP_nt_parse_doc (FILE *stream, LSUP_Graph **gr_p, size_t *ct)
|
|
|
|
|
|
+LSUP_nt_parse_doc (FILE *stream, LSUP_Graph **gr_p, size_t *ct, char **err_p)
|
|
{
|
|
{
|
|
|
|
+ *err_p = NULL;
|
|
|
|
+ *gr_p = NULL;
|
|
|
|
+
|
|
ParseIterator parse_it;
|
|
ParseIterator parse_it;
|
|
parse_init (&parse_it, stream);
|
|
parse_init (&parse_it, stream);
|
|
|
|
|
|
@@ -332,7 +342,23 @@ LSUP_nt_parse_doc (FILE *stream, LSUP_Graph **gr_p, size_t *ct)
|
|
int ttype = lex (&parse_it, &term);
|
|
int ttype = lex (&parse_it, &term);
|
|
|
|
|
|
if (ttype == -1) {
|
|
if (ttype == -1) {
|
|
- fprintf(stderr, "Parse error.\n");
|
|
|
|
|
|
+ char token[16];
|
|
|
|
+ strncpy (token, (const char *)parse_it.tok, 15);
|
|
|
|
+
|
|
|
|
+ char *err_start = "Parse error near token `";
|
|
|
|
+
|
|
|
|
+ char err_info [64];
|
|
|
|
+ sprintf(
|
|
|
|
+ err_info, "[...]' at line %u, character %ld.\n",
|
|
|
|
+ parse_it.line, parse_it.cur - parse_it.bol);
|
|
|
|
+
|
|
|
|
+ size_t err_size = strlen (err_start) + strlen (token)
|
|
|
|
+ + strlen (err_info) + 1;
|
|
|
|
+ char *err_str = malloc (err_size);
|
|
|
|
+ sprintf (err_str, "%s%s%s", err_start, token, err_info);
|
|
|
|
+
|
|
|
|
+ *err_p = err_str;
|
|
|
|
+
|
|
goto fail;
|
|
goto fail;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -347,7 +373,7 @@ LSUP_nt_parse_doc (FILE *stream, LSUP_Graph **gr_p, size_t *ct)
|
|
|
|
|
|
if (ct) *ct = parse_it.ct;
|
|
if (ct) *ct = parse_it.ct;
|
|
|
|
|
|
- TRACE ("Parsed %lu triples.\n", parse_it.ct);
|
|
|
|
|
|
+ TRACE ("Parsed %u triples.\n", parse_it.ct);
|
|
TRACE ("Graph size: %lu\n", LSUP_graph_size (gr));
|
|
TRACE ("Graph size: %lu\n", LSUP_graph_size (gr));
|
|
|
|
|
|
LSUP_term_free (term);
|
|
LSUP_term_free (term);
|