|
@@ -12,32 +12,27 @@
|
|
|
/**
|
|
|
* Max chunk size passed to scanner at each iteration.
|
|
|
*/
|
|
|
-#define CHUNK_SIZE 256
|
|
|
-
|
|
|
-/* Max possible token size. If a matching patten is not found, the scanner
|
|
|
- * keeps pulling data from input until a) a match is unambiguously found, or
|
|
|
- * not found; or b) EOF is reached; or c) the size of the buffer being searched
|
|
|
- * exceeds this size. Setting this to 0 disables any limit, which means that a
|
|
|
- * bad token might consume the whole input and, possibly, exhaust the available
|
|
|
- * memory and throw an error.
|
|
|
- */
|
|
|
-#define MAX_TOKEN_SIZE 8192
|
|
|
+#ifdef LSUP_RDF_STREAM_CHUNK_SIZE
|
|
|
+#define CHUNK_SIZE LSUP_RDF_STREAM_CHUNK_SIZE
|
|
|
+#else
|
|
|
+#define CHUNK_SIZE 8192
|
|
|
+#endif
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
- FILE * file; // Input file handle.
|
|
|
- YYCTYPE * buf, // Start of buffer.
|
|
|
- * lim, // Position after the last available input
|
|
|
- // character (YYLIMIT).
|
|
|
- * cur, // Next input character to be read
|
|
|
- // (YYCURSOR)
|
|
|
- * mar, // Most recent match (YYMARKER)
|
|
|
- * tok, // Start of current token.
|
|
|
- * bol; // Address of the beginning of the current
|
|
|
- // line (for debugging).
|
|
|
- unsigned line; // Current line no. (for debugging).
|
|
|
- unsigned ct; // Number of parsed triples.
|
|
|
- bool eof; // if we have reached EOF.
|
|
|
+ FILE * fh; // Input file handle.
|
|
|
+ YYCTYPE buf[CHUNK_SIZE + 1],// Start of buffer.
|
|
|
+ * lim, // Position after the last available
|
|
|
+ // input character (YYLIMIT).
|
|
|
+ * cur, // Next input character to be read
|
|
|
+ // (YYCURSOR)
|
|
|
+ * mar, // Most recent match (YYMARKER)
|
|
|
+ * tok, // Start of current token.
|
|
|
+ * bol; // Address of the beginning of the
|
|
|
+ // current line (for debugging).
|
|
|
+ unsigned line; // Current line no. (for debugging).
|
|
|
+ unsigned ct; // Number of parsed triples.
|
|
|
+ bool eof; // if we have reached EOF.
|
|
|
/*!stags:re2c format = "YYCTYPE *@@;"; */
|
|
|
} ParseIterator;
|
|
|
|
|
@@ -70,7 +65,7 @@ static int fill(ParseIterator *it)
|
|
|
it->cur -= shift;
|
|
|
it->mar -= shift;
|
|
|
it->tok -= shift;
|
|
|
- it->lim += fread(it->lim, 1, shift, it->file);
|
|
|
+ it->lim += fread(it->lim, 1, shift, it->fh);
|
|
|
/*!stags:re2c format = "if (it->@@) it->@@ -= shift; "; */
|
|
|
it->lim[0] = 0;
|
|
|
it->eof |= it->lim < it->buf + CHUNK_SIZE;
|
|
@@ -78,10 +73,9 @@ static int fill(ParseIterator *it)
|
|
|
}
|
|
|
|
|
|
|
|
|
-static void parse_init(ParseIterator *it, FILE *file)
|
|
|
+static void parse_init(ParseIterator *it, FILE *fh)
|
|
|
{
|
|
|
- it->file = file;
|
|
|
- it->buf = malloc (CHUNK_SIZE + 1);
|
|
|
+ it->fh = fh;
|
|
|
it->cur = it->mar = it->tok = it->lim = it->buf + CHUNK_SIZE;
|
|
|
it->line = 1;
|
|
|
it->bol = it->buf;
|
|
@@ -92,25 +86,6 @@ static void parse_init(ParseIterator *it, FILE *file)
|
|
|
}
|
|
|
|
|
|
|
|
|
-// TODO Make buffer extensible if a token is larger than the current buf size.
|
|
|
-static int __attribute__((unused)) extend (ParseIterator *it)
|
|
|
-{
|
|
|
- size_t delta = YYLIMIT - it->buf + CHUNK_SIZE;
|
|
|
- YYCTYPE *tmp = realloc (it->buf, delta);
|
|
|
- if (!tmp) return ENOMEM;
|
|
|
-
|
|
|
- it->lim += delta;
|
|
|
-
|
|
|
- it->buf = tmp;
|
|
|
-
|
|
|
- return 0;
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-static void parse_done (ParseIterator *it)
|
|
|
-{ free (it->buf); }
|
|
|
-
|
|
|
-
|
|
|
/** @brief Replace \uxxxx and \Uxxxxxxxx with Unicode bytes.
|
|
|
*/
|
|
|
static YYCTYPE *unescape_unicode (const YYCTYPE *esc_str, size_t size)
|
|
@@ -323,15 +298,14 @@ loop:
|
|
|
LSUP_rc
|
|
|
LSUP_nt_parse_term (const char *rep, const LSUP_NSMap *map, LSUP_Term **term)
|
|
|
{
|
|
|
- FILE *stream = fmemopen ((void *)rep, strlen (rep), "r");
|
|
|
+ FILE *fh = fmemopen ((void *)rep, strlen (rep), "r");
|
|
|
|
|
|
ParseIterator it;
|
|
|
- parse_init (&it, stream);
|
|
|
+ parse_init (&it, fh);
|
|
|
|
|
|
int ttype = lex (&it, term);
|
|
|
|
|
|
- parse_done (&it);
|
|
|
- fclose (stream);
|
|
|
+ fclose (fh);
|
|
|
|
|
|
switch (ttype) {
|
|
|
case T_IRIREF:
|
|
@@ -344,13 +318,13 @@ LSUP_nt_parse_term (const char *rep, const LSUP_NSMap *map, LSUP_Term **term)
|
|
|
}
|
|
|
|
|
|
LSUP_rc
|
|
|
-LSUP_nt_parse_doc (FILE *stream, LSUP_Graph **gr_p, size_t *ct, char **err_p)
|
|
|
+LSUP_nt_parse_doc (FILE *fh, LSUP_Graph **gr_p, size_t *ct, char **err_p)
|
|
|
{
|
|
|
*err_p = NULL;
|
|
|
*gr_p = NULL;
|
|
|
|
|
|
ParseIterator parse_it;
|
|
|
- parse_init (&parse_it, stream);
|
|
|
+ parse_init (&parse_it, fh);
|
|
|
|
|
|
void *parser = ParseAlloc (malloc);
|
|
|
|
|
@@ -407,7 +381,6 @@ LSUP_nt_parse_doc (FILE *stream, LSUP_Graph **gr_p, size_t *ct, char **err_p)
|
|
|
finally:
|
|
|
Parse (parser, 0, NULL, it);
|
|
|
ParseFree (parser, free);
|
|
|
- parse_done (&parse_it);
|
|
|
|
|
|
LSUP_graph_add_done (it);
|
|
|
LSUP_term_free (term);
|