|
@@ -1,18 +1,13 @@
|
|
|
-#include <errno.h>
|
|
|
-#include <stdint.h>
|
|
|
-#include <stdio.h>
|
|
|
-#include <stdlib.h>
|
|
|
-#include <string.h>
|
|
|
-
|
|
|
#include "graph.h"
|
|
|
#include "src/codec/nt_grammar.h"
|
|
|
+#include "nt_parser.h"
|
|
|
|
|
|
|
|
|
#define YYCTYPE unsigned char
|
|
|
-#define YYCURSOR in->cur
|
|
|
-#define YYMARKER in->mar
|
|
|
-#define YYLIMIT in->lim
|
|
|
-#define YYFILL fill(in) == 0
|
|
|
+#define YYCURSOR it->cur
|
|
|
+#define YYMARKER it->mar
|
|
|
+#define YYLIMIT it->lim
|
|
|
+#define YYFILL fill(it) == 0
|
|
|
|
|
|
/**
|
|
|
* Max chunk size passed to scanner at each iteration.
|
|
@@ -42,61 +37,61 @@ typedef struct {
|
|
|
size_t ct; // Number of parsed triples.
|
|
|
int eof; // if we have reached EOF (T|F)
|
|
|
/*!stags:re2c format = "YYCTYPE *@@;"; */
|
|
|
-} Input;
|
|
|
+} ParseIterator;
|
|
|
|
|
|
|
|
|
-static int fill(Input *in)
|
|
|
+static int fill(ParseIterator *it)
|
|
|
{
|
|
|
- if (in->eof) {
|
|
|
+ if (it->eof) {
|
|
|
return 1;
|
|
|
}
|
|
|
- const size_t shift = in->tok - in->buf;
|
|
|
+ const size_t shift = it->tok - it->buf;
|
|
|
if (shift < 1) {
|
|
|
return 2;
|
|
|
}
|
|
|
printf ("Shifting bytes: %lu\n", shift);
|
|
|
- memmove(in->buf, in->tok, in->lim - in->tok);
|
|
|
- in->lim -= shift;
|
|
|
- in->cur -= shift;
|
|
|
- in->mar -= shift;
|
|
|
- in->tok -= shift;
|
|
|
- in->lim += fread(in->lim, 1, shift, in->file);
|
|
|
- /*!stags:re2c format = "if (in->@@) in->@@ -= shift; "; */
|
|
|
- in->lim[0] = 0;
|
|
|
- in->eof |= in->lim < in->buf + CHUNK_SIZE;
|
|
|
+ memmove(it->buf, it->tok, it->lim - it->tok);
|
|
|
+ it->lim -= shift;
|
|
|
+ it->cur -= shift;
|
|
|
+ it->mar -= shift;
|
|
|
+ it->tok -= shift;
|
|
|
+ it->lim += fread(it->lim, 1, shift, it->file);
|
|
|
+ /*!stags:re2c format = "if (it->@@) it->@@ -= shift; "; */
|
|
|
+ it->lim[0] = 0;
|
|
|
+ it->eof |= it->lim < it->buf + CHUNK_SIZE;
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
|
|
|
-static void init(Input *in, FILE *file)
|
|
|
+static void parse_init(ParseIterator *it, FILE *file)
|
|
|
{
|
|
|
- in->file = file;
|
|
|
- in->buf = malloc (CHUNK_SIZE + 1);
|
|
|
- in->cur = in->mar = in->tok = in->lim = in->buf + CHUNK_SIZE;
|
|
|
- in->ct = 0;
|
|
|
- in->eof = 0;
|
|
|
- /*!stags:re2c format = "in->@@ = NULL; "; */
|
|
|
- fill (in);
|
|
|
+ it->file = file;
|
|
|
+ it->buf = malloc (CHUNK_SIZE + 1);
|
|
|
+ it->cur = it->mar = it->tok = it->lim = it->buf + CHUNK_SIZE;
|
|
|
+ it->ct = 0;
|
|
|
+ it->eof = 0;
|
|
|
+ /*!stags:re2c format = "it->@@ = NULL; "; */
|
|
|
+ fill (it);
|
|
|
}
|
|
|
|
|
|
|
|
|
// TODO Make buffer extensible if a token is larger than the current buf size.
|
|
|
-static int __attribute__((unused)) extend (Input *in)
|
|
|
+static int __attribute__((unused)) extend (ParseIterator *it)
|
|
|
{
|
|
|
- size_t delta = YYLIMIT - in->buf + CHUNK_SIZE;
|
|
|
- YYCTYPE *tmp = realloc (in->buf, delta);
|
|
|
+ size_t delta = YYLIMIT - it->buf + CHUNK_SIZE;
|
|
|
+ YYCTYPE *tmp = realloc (it->buf, delta);
|
|
|
if (!tmp) return ENOMEM;
|
|
|
|
|
|
- in->lim += delta;
|
|
|
+ it->lim += delta;
|
|
|
|
|
|
- in->buf = tmp;
|
|
|
+ it->buf = tmp;
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
|
|
|
-static void done (Input *in)
|
|
|
-{ free (in->buf); }
|
|
|
+static void parse_done (ParseIterator *it)
|
|
|
+{ free (it->buf); }
|
|
|
|
|
|
|
|
|
/** @brief Replace \uxxxx and \Uxxxxxxxx with Unicode bytes.
|
|
@@ -157,13 +152,13 @@ void ParseFree();
|
|
|
|
|
|
// Lexer.
|
|
|
|
|
|
-static int lex (Input *in, LSUP_Term **term)
|
|
|
+static int lex (ParseIterator *it, LSUP_Term **term)
|
|
|
{
|
|
|
const YYCTYPE *lit_data_e, *dtype_s, *lang_s;
|
|
|
|
|
|
loop:
|
|
|
|
|
|
- in->tok = in->cur;
|
|
|
+ it->tok = it->cur;
|
|
|
|
|
|
*term = NULL;
|
|
|
|
|
@@ -171,7 +166,7 @@ loop:
|
|
|
re2c:eof = 0;
|
|
|
re2c:flags:8 = 1;
|
|
|
re2c:flags:tags = 1;
|
|
|
- re2c:tags:expression = "in->@@";
|
|
|
+ re2c:tags:expression = "it->@@";
|
|
|
re2c:api:style = functions;
|
|
|
re2c:define:YYFILL:naked = 1;
|
|
|
|
|
@@ -208,7 +203,7 @@ loop:
|
|
|
}
|
|
|
|
|
|
IRIREF {
|
|
|
- YYCTYPE *data = unescape_unicode (in->tok + 1, YYCURSOR - in->tok - 2);
|
|
|
+ YYCTYPE *data = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
|
|
|
|
|
|
printf ("URI data: %s\n", data);
|
|
|
|
|
@@ -220,8 +215,8 @@ loop:
|
|
|
|
|
|
LITERAL {
|
|
|
// Only unescape Unicode from data.
|
|
|
- size_t size = lit_data_e - in->tok - 2;
|
|
|
- YYCTYPE *data = unescape_unicode (in->tok + 1, size);
|
|
|
+ size_t size = lit_data_e - it->tok - 2;
|
|
|
+ YYCTYPE *data = unescape_unicode (it->tok + 1, size);
|
|
|
printf ("Literal data: %s\n", data);
|
|
|
|
|
|
YYCTYPE *datatype = NULL, *lang = NULL;
|
|
@@ -252,7 +247,7 @@ loop:
|
|
|
}
|
|
|
|
|
|
BNODE {
|
|
|
- YYCTYPE *data = unescape_unicode (in->tok + 2, YYCURSOR - in->tok - 1);
|
|
|
+ YYCTYPE *data = unescape_unicode (it->tok + 2, YYCURSOR - it->tok - 1);
|
|
|
|
|
|
printf ("BNode data: %s\n", data);
|
|
|
|
|
@@ -264,7 +259,7 @@ loop:
|
|
|
|
|
|
DOT {
|
|
|
printf ("End of triple.\n");
|
|
|
- in->ct ++;
|
|
|
+ it->ct ++;
|
|
|
|
|
|
return T_DOT;
|
|
|
}
|
|
@@ -273,13 +268,12 @@ loop:
|
|
|
printf("Separator.\n");
|
|
|
|
|
|
return T_WS;
|
|
|
- //goto loop;
|
|
|
}
|
|
|
|
|
|
COMMENT {
|
|
|
- size_t size = YYCURSOR - in->tok + 1;
|
|
|
+ size_t size = YYCURSOR - it->tok + 1;
|
|
|
YYCTYPE *data = malloc (size);
|
|
|
- memcpy (data, in->tok, size);
|
|
|
+ memcpy (data, it->tok, size);
|
|
|
data [size - 1] = '\0';
|
|
|
printf ("Comment: `%s`\n", data);
|
|
|
free (data);
|
|
@@ -290,7 +284,7 @@ loop:
|
|
|
* {
|
|
|
printf (
|
|
|
"Invalid token @ %lu: %s (\\x%x)\n",
|
|
|
- YYCURSOR - in->buf - 1, in->tok, *in->tok);
|
|
|
+ YYCURSOR - it->buf - 1, it->tok, *it->tok);
|
|
|
|
|
|
return -1;
|
|
|
}
|
|
@@ -299,22 +293,33 @@ loop:
|
|
|
}
|
|
|
|
|
|
|
|
|
-int main(int argc, char *argv[])
|
|
|
+LSUP_rc
|
|
|
+LSUP_nt_parse_term (const char *rep, const LSUP_NSMap *map, LSUP_Term **term)
|
|
|
{
|
|
|
- Input input;
|
|
|
+ FILE *stream = fmemopen ((void *)rep, strlen (rep), "r");
|
|
|
|
|
|
- if (argc != 2) {
|
|
|
- fprintf (stderr, "One argument required.\n");
|
|
|
- return -1;
|
|
|
- }
|
|
|
+ ParseIterator it;
|
|
|
+ parse_init (&it, stream);
|
|
|
|
|
|
- FILE *fh = fopen (argv[1], "r");
|
|
|
- if (!fh) {
|
|
|
- fprintf (stderr, "Error opening file.\n");
|
|
|
- return -1;
|
|
|
+ int ttype = lex (&it, term);
|
|
|
+
|
|
|
+ parse_done (&it);
|
|
|
+
|
|
|
+ switch (ttype) {
|
|
|
+ case T_IRIREF:
|
|
|
+ case T_LITERAL:
|
|
|
+ case T_BNODE:
|
|
|
+ return LSUP_OK;
|
|
|
+ default:
|
|
|
+ return LSUP_VALUE_ERR;
|
|
|
}
|
|
|
+}
|
|
|
|
|
|
- init (&input, fh);
|
|
|
+LSUP_rc
|
|
|
+LSUP_nt_parse_doc (FILE *stream, LSUP_Graph **gr_p, size_t *ct)
|
|
|
+{
|
|
|
+ ParseIterator parse_it;
|
|
|
+ parse_init (&parse_it, stream);
|
|
|
|
|
|
void *parser = ParseAlloc (malloc);
|
|
|
|
|
@@ -323,17 +328,14 @@ int main(int argc, char *argv[])
|
|
|
LSUP_Term *term = NULL;
|
|
|
|
|
|
for (;;) {
|
|
|
- int ttype = lex (&input, &term);
|
|
|
+ int ttype = lex (&parse_it, &term);
|
|
|
|
|
|
if (ttype == -1) {
|
|
|
fprintf(stderr, "Parse error.\n");
|
|
|
- break;
|
|
|
+ goto fail;
|
|
|
}
|
|
|
|
|
|
- printf ("Token #%d\n", ttype);
|
|
|
-
|
|
|
Parse (parser, ttype, term, it);
|
|
|
- //if (term) LSUP_term_free (term);
|
|
|
|
|
|
if (ttype == T_EOF) break;
|
|
|
};
|
|
@@ -342,17 +344,23 @@ int main(int argc, char *argv[])
|
|
|
|
|
|
LSUP_graph_add_done (it);
|
|
|
|
|
|
- if (term) LSUP_term_free (term);
|
|
|
+ if (ct) *ct = parse_it.ct;
|
|
|
+
|
|
|
+ TRACE ("Parsed %lu triples.\n", parse_it.ct);
|
|
|
+ TRACE ("Graph size: %lu\n", LSUP_graph_size (gr));
|
|
|
+
|
|
|
+ LSUP_term_free (term);
|
|
|
|
|
|
ParseFree (parser, free);
|
|
|
- fclose (fh);
|
|
|
- done (&input);
|
|
|
+ parse_done (&parse_it);
|
|
|
+
|
|
|
+ *gr_p = gr;
|
|
|
|
|
|
- printf ("Parsed %lu triples.\n", input.ct);
|
|
|
+ return LSUP_OK;
|
|
|
|
|
|
- printf ("Graph size: %lu\n", LSUP_graph_size (gr));
|
|
|
+fail:
|
|
|
LSUP_graph_free (gr);
|
|
|
|
|
|
- return 0;
|
|
|
+ return LSUP_VALUE_ERR;
|
|
|
}
|
|
|
|