|
@@ -1,7 +1,21 @@
|
|
#include "volksdata/codec/parser_nt.h"
|
|
#include "volksdata/codec/parser_nt.h"
|
|
#include "volksdata/codec/tokens_nt.h"
|
|
#include "volksdata/codec/tokens_nt.h"
|
|
|
|
+//#include "volksdata/codec/parser_common.h"
|
|
|
|
|
|
|
|
|
|
|
|
+/** BEGIN duplicate section
|
|
|
|
+ * This section is bit-by-bit identical in NT and TTL lexers. The copy in
|
|
|
|
+ * include/volksdata/codec/parser_common.h should be used, but some re2c tags
|
|
|
|
+ * are not being parsed in that location.
|
|
|
|
+ */
|
|
|
|
+
|
|
|
|
+/** @brief TTL is UTF-8 encoded.
|
|
|
|
+ *
|
|
|
|
+ * @sa https://www.w3.org/TeamSubmission/turtle/#sec-grammar
|
|
|
|
+ *
|
|
|
|
+ * `char` should be considered to be UTF-8 throughout this library, however,
|
|
|
|
+ * setting YYCTYPE to char generates case labels outside of the char range.
|
|
|
|
+ */
|
|
#define YYCTYPE uint8_t
|
|
#define YYCTYPE uint8_t
|
|
#define YYCURSOR it->cur
|
|
#define YYCURSOR it->cur
|
|
#define YYMARKER it->mar
|
|
#define YYMARKER it->mar
|
|
@@ -10,20 +24,20 @@
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
typedef struct {
|
|
- FILE *fh; ///< Input file handle.
|
|
|
|
- const char *sh; ///< Input string. Exclusive with fh.
|
|
|
|
- YYCTYPE buf[CHUNK_SIZE], ///< Start of buffer.
|
|
|
|
- *lim, ///< Position after the last available
|
|
|
|
- ///< input character (YYLIMIT).
|
|
|
|
- *cur, ///< Next input character to be read
|
|
|
|
- ///< (YYCURSOR)
|
|
|
|
- *mar, ///< Most recent match (YYMARKER)
|
|
|
|
- *tok, ///< Start of current token.
|
|
|
|
- *bol; ///< Address of the beginning of the
|
|
|
|
- ///< current line (for debugging).
|
|
|
|
- unsigned line; ///< Current line no. (for debugging).
|
|
|
|
- unsigned ct; ///< Number of parsed triples.
|
|
|
|
- bool eof; ///< if we have reached EOF.
|
|
|
|
|
|
+ FILE * fh; ///< Input file handle.
|
|
|
|
+ const char * sh; ///< Input string. Exclusive with fh.
|
|
|
|
+ size_t buf_size; ///< Initial allocation for buffer.
|
|
|
|
+ YYCTYPE * buf, ///< Start of buffer.
|
|
|
|
+ * lim, ///< Position after the last available
|
|
|
|
+ ///< input character (YYLIMIT).
|
|
|
|
+ * cur, ///< Next input character to be read (YYCURSOR)
|
|
|
|
+ * mar, ///< Most recent match (YYMARKER)
|
|
|
|
+ * tok, ///< Start of current token.
|
|
|
|
+ * bol; ///< Address of the beginning of the
|
|
|
|
+ ///< current line (for debugging).
|
|
|
|
+ unsigned line; ///< Current line no. (for debugging).
|
|
|
|
+ unsigned ct; ///< Number of statements parsed.
|
|
|
|
+ bool eof; ///< if we have reached EOF.
|
|
/*!stags:re2c format = "YYCTYPE *@@;"; */
|
|
/*!stags:re2c format = "YYCTYPE *@@;"; */
|
|
} ParseIterator;
|
|
} ParseIterator;
|
|
|
|
|
|
@@ -33,9 +47,17 @@ static int fill(ParseIterator *it)
|
|
if (it->eof) {
|
|
if (it->eof) {
|
|
return 1;
|
|
return 1;
|
|
}
|
|
}
|
|
- const size_t shift = it->tok - it->buf;
|
|
|
|
- if (shift < 1) {
|
|
|
|
- return 2;
|
|
|
|
|
|
+ size_t shift = it->tok - it->buf;
|
|
|
|
+
|
|
|
|
+ // If buffer is too small for the lexeme, double the capacity.
|
|
|
|
+ while (shift < 1) {
|
|
|
|
+ it->buf_size = 2 * it->buf_size;
|
|
|
|
+ it->buf = realloc (it->buf, it->buf_size);
|
|
|
|
+ if (!it->buf) {
|
|
|
|
+ log_error ("Memory allocation error.");
|
|
|
|
+ return -1;
|
|
|
|
+ }
|
|
|
|
+ shift = it->tok - it->buf;
|
|
}
|
|
}
|
|
LOG_DEBUG("Shifting bytes: %lu", shift);
|
|
LOG_DEBUG("Shifting bytes: %lu", shift);
|
|
memmove (it->buf, it->tok, it->lim - it->tok);
|
|
memmove (it->buf, it->tok, it->lim - it->tok);
|
|
@@ -43,9 +65,7 @@ static int fill(ParseIterator *it)
|
|
it->cur -= shift;
|
|
it->cur -= shift;
|
|
it->mar -= shift;
|
|
it->mar -= shift;
|
|
it->tok -= shift;
|
|
it->tok -= shift;
|
|
- if (it->fh) it->lim += fread (it->lim, 1, shift, it->fh);
|
|
|
|
- // With a string handle, assume the whole input fits in CHUNK_SIZE.
|
|
|
|
- else it->lim = memcpy (it->lim, it->sh, sizeof(it->buf));
|
|
|
|
|
|
+ it->lim += fread (it->lim, 1, shift, it->fh);
|
|
/*!stags:re2c format = "if (it->@@) it->@@ -= shift; "; */
|
|
/*!stags:re2c format = "if (it->@@) it->@@ -= shift; "; */
|
|
it->lim[0] = 0;
|
|
it->lim[0] = 0;
|
|
it->eof |= it->lim < it->buf + CHUNK_SIZE - 1;
|
|
it->eof |= it->lim < it->buf + CHUNK_SIZE - 1;
|
|
@@ -62,19 +82,44 @@ static int fill(ParseIterator *it)
|
|
*
|
|
*
|
|
* @param[in] sh String to read from. This is exclusive with fh.
|
|
* @param[in] sh String to read from. This is exclusive with fh.
|
|
*/
|
|
*/
|
|
-static void parse_init(ParseIterator *it, FILE *fh, const char *sh)
|
|
|
|
|
|
+static void parse_init (ParseIterator *it, FILE *fh, const char *sh)
|
|
{
|
|
{
|
|
- it->fh = fh;
|
|
|
|
- it->sh = sh;
|
|
|
|
- it->cur = it->mar = it->tok = it->lim = it->buf + CHUNK_SIZE - 1;
|
|
|
|
|
|
+ if(fh) {
|
|
|
|
+ // Stream handling. It engages YYFILL and reads by chunks.
|
|
|
|
+ /*!re2c
|
|
|
|
+ re2c:yyfill:enable = 1;
|
|
|
|
+ */
|
|
|
|
+ it->fh = fh;
|
|
|
|
+ it->sh = NULL;
|
|
|
|
+ it->buf_size = CHUNK_SIZE;
|
|
|
|
+ it->buf = malloc(it->buf_size);
|
|
|
|
+ if (!it->buf) log_error ("Error allocating lexer buffer.");
|
|
|
|
+ it->cur = it->mar = it->tok = it->lim = it->buf + it->buf_size - 1;
|
|
|
|
+ it->bol = it->buf;
|
|
|
|
+ it->eof = 0;
|
|
|
|
+ } else {
|
|
|
|
+ // String handling. Uses the provided string as the buffer.
|
|
|
|
+ /*!re2c
|
|
|
|
+ re2c:yyfill:enable = 0;
|
|
|
|
+ */
|
|
|
|
+ it->fh = NULL;
|
|
|
|
+ it->sh = sh;
|
|
|
|
+ it->buf_size = strlen(sh) + 1;
|
|
|
|
+ it->buf = NULL;
|
|
|
|
+ it->cur = it->tok = (YYCTYPE*)it->sh;
|
|
|
|
+ it->lim = it->mar = it->cur + it->buf_size - 1;
|
|
|
|
+ it->bol = it->cur;
|
|
|
|
+ it->eof = 1;
|
|
|
|
+ }
|
|
it->line = 1;
|
|
it->line = 1;
|
|
- it->bol = it->buf;
|
|
|
|
it->ct = 0;
|
|
it->ct = 0;
|
|
- it->eof = 0;
|
|
|
|
/*!stags:re2c format = "it->@@ = NULL; "; */
|
|
/*!stags:re2c format = "it->@@ = NULL; "; */
|
|
- fill (it);
|
|
|
|
|
|
+
|
|
|
|
+ if (it->fh) fill (it);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/** END duplicate section */
|
|
|
|
+
|
|
|
|
|
|
// Parser interface. Required here to silence linters.
|
|
// Parser interface. Required here to silence linters.
|
|
void *NTParseAlloc();
|
|
void *NTParseAlloc();
|
|
@@ -232,7 +277,7 @@ loop:
|
|
}
|
|
}
|
|
|
|
|
|
* {
|
|
* {
|
|
- LOG_DEBUG(
|
|
|
|
|
|
+ log_error (
|
|
"Invalid token @ %lu: %s (\\x%x)",
|
|
"Invalid token @ %lu: %s (\\x%x)",
|
|
YYCURSOR - it->buf - 1, it->tok, *it->tok);
|
|
YYCURSOR - it->buf - 1, it->tok, *it->tok);
|
|
|
|
|
|
@@ -251,6 +296,8 @@ VOLK_nt_parse_term (const char *rep, VOLK_Term **term)
|
|
|
|
|
|
int ttype = lex (&it, term);
|
|
int ttype = lex (&it, term);
|
|
|
|
|
|
|
|
+ free (it.buf);
|
|
|
|
+
|
|
switch (ttype) {
|
|
switch (ttype) {
|
|
case T_IRIREF:
|
|
case T_IRIREF:
|
|
case T_LITERAL:
|
|
case T_LITERAL:
|
|
@@ -268,6 +315,11 @@ VOLK_nt_parse_doc (
|
|
*err_p = NULL;
|
|
*err_p = NULL;
|
|
*gr_p = NULL;
|
|
*gr_p = NULL;
|
|
|
|
|
|
|
|
+ if (!fh && !sh) {
|
|
|
|
+ log_error ("Neither file handle nor string input provided.");
|
|
|
|
+ return VOLK_VALUE_ERR;
|
|
|
|
+ }
|
|
|
|
+
|
|
ParseIterator parse_it;
|
|
ParseIterator parse_it;
|
|
parse_init (&parse_it, fh, sh);
|
|
parse_init (&parse_it, fh, sh);
|
|
|
|
|
|
@@ -332,6 +384,8 @@ finally: ;
|
|
NTParse (parser, 0, NULL, it);
|
|
NTParse (parser, 0, NULL, it);
|
|
NTParseFree (parser, free);
|
|
NTParseFree (parser, free);
|
|
|
|
|
|
|
|
+ free (parse_it.buf);
|
|
|
|
+
|
|
VOLK_graph_add_done (it);
|
|
VOLK_graph_add_done (it);
|
|
VOLK_term_free (term);
|
|
VOLK_term_free (term);
|
|
|
|
|