4 次代码提交 828c297075 ... 568d6ec5c1

作者 SHA1 备注 提交日期
  scossu 568d6ec5c1 Fix issue with large token. 1 周之前
  scossu 6347cf5add Fix ttl tests with BNodes. 1 周之前
  scossu 9df20a2594 WIP fixing W3C TTL tests. 1 周之前
  scossu 656e53c4a5 WIP ISO C; partly fixed codec; tests still failing. 1 周之前

+ 7 - 4
README.md

@@ -132,10 +132,13 @@ Builds and installs the library with debug symbols in `~/.local`.
 
 `VOLK_RDF_STREAM_CHUNK_SIZE`: Size of RDF decoding buffer, i.e., maximum size
 of a chunk of RDF data fed to the parser when decoding a RDF file into a graph.
-This should be larger than the maximum expected size of a single term in your
-RDF source. The default value is 8192, which is mildly conservative. If you
-experience parsing errors on decoding, and they happen to be on a term such a
-very long string literal, try recompiling the library with a larger value.
+This is set to 4Kb by default and is heap-allocated, so that if a RDF term
+is larger than this size, it is automatically increased for the duration of the
+decoding session. Systems with more memory may benefit from a larger buffer.
+
+`VOLK_TEST_LARGE`: Used with `make test`, `make memtest`, etc. to thoroughly
+test codecs on very large triple sets. Normally these tests are skipped to
+speed up development when no codec changes are involved.
 
 ## Embedding & linking
 

+ 1 - 1
include/volksdata/codec.h

@@ -13,7 +13,7 @@
 #ifdef VOLK_RDF_STREAM_CHUNK_SIZE
 #define CHUNK_SIZE VOLK_RDF_STREAM_CHUNK_SIZE
 #else
-#define CHUNK_SIZE 8192
+#define CHUNK_SIZE 4096
 #endif
 
 

+ 127 - 0
include/volksdata/codec/parser_common.h

@@ -0,0 +1,127 @@
+#ifndef _VOLK_PARSER_COMMON_H
+#define _VOLK_PARSER_COMMON_H
+
+#include "volksdata/codec.h"
+
+
+/** @brief TTL is UTF-8 encoded.
+ *
+ * @sa https://www.w3.org/TeamSubmission/turtle/#sec-grammar
+ *
+ * `char` should be considered to be UTF-8 throughout this library, however,
+ * setting YYCTYPE to char generates case labels outside of the char range.
+ */
+#define YYCTYPE     uint8_t
+#define YYCURSOR    it->cur
+#define YYMARKER    it->mar
+#define YYLIMIT     it->lim
+#define YYFILL      fill(it) == 0
+
+
+typedef struct {
+    FILE          * fh;         ///< Input file handle.
+    const char    * sh;         ///< Input string. Exclusive with fh.
+    size_t          buf_size;   ///< Initial allocation for buffer.
+    YYCTYPE       * buf,        ///< Start of buffer.
+                  * lim,        ///< Position after the last available
+                                ///<   input character (YYLIMIT).
+                  * cur,        ///< Next input character to be read (YYCURSOR)
+                  * mar,        ///< Most recent match (YYMARKER)
+                  * tok,        ///< Start of current token.
+                  * bol;        ///< Address of the beginning of the
+                                ///<   current line (for debugging).
+    unsigned        line;       ///< Current line no. (for debugging).
+    unsigned        ct;         ///< Number of statements parsed.
+    bool            eof;        ///< if we have reached EOF.
+    /*!stags:re2c format = "YYCTYPE *@@;"; */
+} ParseIterator;
+
+
+static int fill(ParseIterator *it);
+
+
+/** @brief Initialize parser.
+ *
+ * @param[in] it iterator handle to be initialized.
+ *
+ * @param[in] fh Open file handle to read from. This is exclusive with sh. If
+ *  both fh and sh are provided, fh has precedence.
+ *
+ * @param[in] sh String to read from. This is exclusive with fh.
+ */
+static void parse_init (ParseIterator *it, FILE *fh, const char *sh)
+{
+    if(fh) {
+        // Stream handling. It engages YYFILL and reads by chunks.
+        it->fh = fh;
+        it->sh = NULL;
+        it->buf_size = CHUNK_SIZE;
+        it->buf = malloc(it->buf_size);
+        if (!it->buf) log_error ("Error allocating lexer buffer.");
+        it->cur = it->mar = it->tok = it->lim = it->buf + it->buf_size - 1;
+        it->bol = it->buf;
+        it->eof = false;
+        it->lim[0] = 0;
+    } else {
+        // String handling. Uses the provided string as the buffer.
+        it->fh = NULL;
+        it->sh = sh;
+        it->buf_size = strlen(sh) + 1;
+        it->buf = NULL;
+        it->cur = it->tok = (YYCTYPE*)it->sh;
+        it->lim = it->mar = it->cur + it->buf_size - 1;
+        it->bol = it->cur;
+        it->eof = true;
+    }
+    it->line = 1;
+    it->ct = 0;
+    /*!stags:re2c format = "it->@@ = NULL; "; */
+}
+
+
+int
+fill(ParseIterator *it)
+{
+    log_debug ("Filling codec buffer @ %p.", it->buf);
+    if (it->eof) return 1;
+
+    size_t shift = it->tok - it->buf;
+    size_t used = it->lim - it->tok;
+
+    // If buffer is too small for the lexeme, double the capacity.
+    if (shift < 1) {
+        YYCTYPE *old_buf = it->buf;
+        shift += it->buf_size;
+        it->buf_size *= 2;
+        LOG_DEBUG ("Reallocating buffer to %zu bytes.", it->buf_size);
+        it->buf = realloc (it->buf, it->buf_size);
+        if (!it->buf) {
+            log_error ("Memory allocation error.");
+            return -1;
+        }
+        // Move all relative points if address changed.
+        size_t reloc_off = it->buf - old_buf;
+        it->cur += reloc_off;
+        it->tok += reloc_off;
+        it->lim += reloc_off;
+        it->mar += reloc_off;
+    } else {
+        LOG_DEBUG("Shifting bytes: %zu", shift);
+        memmove (it->buf, it->tok, used);
+        LOG_TRACE ("Limit offset before reading data: %zu", it->lim - it->tok);
+        it->lim -= shift;
+        it->cur -= shift;
+        it->mar -= shift;
+        it->tok -= shift;
+    }
+    it->lim += fread (it->lim, 1, it->buf_size - used - 1, it->fh);
+    /*!stags:re2c format = "if (it->@@) it->@@ -= shift; "; */
+    LOG_TRACE ("Cursor offset from last token: %zu", it->cur - it->tok);
+    LOG_TRACE ("Limit offset from last token: %zu", it->lim - it->tok);
+    it->lim[0] = 0;
+    it->eof = it->lim < it->buf + it->buf_size - 1;
+    return 0;
+}
+
+
+#endif // _VOLK_PARSER_COMMON_H

+ 7 - 6
include/volksdata/codec/tokens_ttl.h

@@ -6,8 +6,8 @@
 #define T_DOUBLE                           6
 #define T_DECIMAL                          7
 #define T_BOOLEAN                          8
-#define T_QNAME                            9
-#define T_BNODE_ID                        10
+#define T_BNODE_ID                         9
+#define T_QNAME                           10
 #define T_IRIREF                          11
 #define T_LANGTAG                         12
 #define T_PREFIX                          13
@@ -17,7 +17,8 @@
 #define T_BASE                            17
 #define T_RDF_TYPE                        18
 #define T_DTYPE_MARKER                    19
-#define T_LBRACKET                        20
-#define T_RBRACKET                        21
-#define T_LPAREN                          22
-#define T_RPAREN                          23
+#define T_ANON                            20
+#define T_LBRACKET                        21
+#define T_RBRACKET                        22
+#define T_LPAREN                          23
+#define T_RPAREN                          24

+ 1 - 1
src/codec/Makefile

@@ -48,7 +48,7 @@ $(BUILDDIR)/%_dbg.o: %.c
 
 # Parser C sources.
 parser_%.c: lexer_%.re grammar_%.c ../codec.c
-	$(LEXER) $< -o $@ -T --case-ranges
+	$(LEXER) $< -o $@ -T --case-ranges -W
 
 
 .PRECIOUS: grammar_%.c $(CODEC_INCLUDE_DIR)/tokens_%.h

+ 289 - 276
src/codec/grammar_ttl.c

@@ -23,8 +23,8 @@
 #define T_DOUBLE                          6
 #define T_DECIMAL                         7
 #define T_BOOLEAN                         8
-#define T_QNAME                           9
-#define T_BNODE_ID                       10
+#define T_BNODE_ID                        9
+#define T_QNAME                          10
 #define T_IRIREF                         11
 #define T_LANGTAG                        12
 #define T_PREFIX                         13
@@ -34,10 +34,11 @@
 #define T_BASE                           17
 #define T_RDF_TYPE                       18
 #define T_DTYPE_MARKER                   19
-#define T_LBRACKET                       20
-#define T_RBRACKET                       21
-#define T_LPAREN                         22
-#define T_RPAREN                         23
+#define T_ANON                           20
+#define T_LBRACKET                       21
+#define T_RBRACKET                       22
+#define T_LPAREN                         23
+#define T_RPAREN                         24
 #endif
 /**************** End token definitions ***************************************/
 
@@ -97,16 +98,16 @@
 #endif
 /************* Begin control #defines *****************************************/
 #define YYCODETYPE unsigned char
-#define YYNOCODE 41
+#define YYNOCODE 42
 #define YYACTIONTYPE unsigned char
 #define TTLParseTOKENTYPE  char * 
 typedef union {
   int yyinit;
   TTLParseTOKENTYPE yy0;
-  VOLK_LinkMap * yy2;
-  VOLK_Term * yy12;
-  char * yy33;
-  VOLK_TermSet * yy60;
+  char * yy1;
+  VOLK_TermSet * yy22;
+  VOLK_LinkMap * yy34;
+  VOLK_Term * yy50;
 } YYMINORTYPE;
 #ifndef YYSTACKDEPTH
 #define YYSTACKDEPTH 100
@@ -124,7 +125,7 @@ typedef union {
 #define YYNSTATE             28
 #define YYNRULE              41
 #define YYNRULE_WITH_ACTION  27
-#define YYNTOKEN             24
+#define YYNTOKEN             25
 #define YY_MAX_SHIFT         27
 #define YY_MIN_SHIFTREDUCE   63
 #define YY_MAX_SHIFTREDUCE   103
@@ -199,51 +200,51 @@ typedef union {
 **  yy_default[]       Default action for each state.
 **
 *********** Begin parsing tables **********************************************/
-#define YY_ACTTAB_COUNT (111)
+#define YY_ACTTAB_COUNT (115)
 static const YYACTIONTYPE yy_action[] = {
- /*     0 */   105,    1,   65,    9,   17,   77,   78,   79,   80,   89,
- /*    10 */    81,   88,   17,   77,   78,   79,   80,   89,   81,   88,
- /*    20 */     7,   14,    2,   84,   14,   89,   81,   88,    7,   26,
- /*    30 */     2,   27,   96,   23,    5,  120,    7,  102,    2,  135,
- /*    40 */   135,  135,  135,   15,  130,  130,  130,  130,  130,  115,
- /*    50 */   116,    3,  116,  131,  131,  131,  131,  131,   11,   10,
- /*    60 */    19,  114,  114,  114,  114,  114,   20,  114,  114,  114,
- /*    70 */   114,  114,  113,  113,  113,  113,  113,   89,   83,   88,
- /*    80 */    16,   13,    4,   89,   13,   88,   73,    8,   66,   82,
- /*    90 */    18,   13,   73,  102,   13,   89,   89,   88,   88,    6,
- /*   100 */    85,   75,   64,   21,   22,   73,   63,   25,   12,   24,
- /*   110 */   134,
+ /*     0 */   105,    1,   65,    9,   17,   77,   78,   79,   80,   81,
+ /*    10 */    89,   88,   17,   77,   78,   79,   80,   81,   89,   88,
+ /*    20 */    82,    7,   14,    2,   84,   14,   16,   13,   82,    7,
+ /*    30 */    13,    2,  120,   81,   89,   88,    5,   26,  102,   27,
+ /*    40 */    96,   23,   18,   13,   82,    7,   13,    2,  135,  135,
+ /*    50 */   135,  135,   15,  130,  130,  130,  130,  130,  115,  116,
+ /*    60 */     3,  116,   89,   88,    4,  131,  131,  131,  131,  131,
+ /*    70 */    11,   19,  114,  114,  114,  114,  114,   20,  114,  114,
+ /*    80 */   114,  114,  114,   66,  113,  113,  113,  113,  113,   89,
+ /*    90 */    88,  102,   89,   88,   10,   75,    8,   73,    6,   85,
+ /*   100 */    73,   64,   12,   22,   21,   63,   25,   24,  134,  106,
+ /*   110 */   106,  106,  106,  106,   83,
 };
 static const YYCODETYPE yy_lookahead[] = {
- /*     0 */    24,   25,    1,    2,    4,    5,    6,    7,    8,    9,
+ /*     0 */    25,   26,    1,    2,    4,    5,    6,    7,    8,    9,
  /*    10 */    10,   11,    4,    5,    6,    7,    8,    9,   10,   11,
- /*    20 */    20,   33,   22,   23,   36,    9,   10,   11,   20,   13,
- /*    30 */    22,   15,   16,   17,   31,   36,   20,   16,   22,   26,
- /*    40 */    27,   28,   29,   30,   35,   36,   37,   38,   39,   36,
- /*    50 */    37,   31,   39,   35,   36,   37,   38,   39,   40,    2,
- /*    60 */    34,   35,   36,   37,   38,   39,   34,   35,   36,   37,
- /*    70 */    38,   39,   35,   36,   37,   38,   39,    9,   21,   11,
- /*    80 */    32,   33,   31,    9,   36,   11,   18,   31,    1,   21,
- /*    90 */    32,   33,   18,   16,   36,    9,    9,   11,   11,    3,
- /*   100 */    23,   12,    1,   11,   16,   18,    1,   16,   19,   11,
- /*   110 */     0,   41,   41,   41,   41,   41,   41,   41,   41,   41,
- /*   120 */    41,   41,   41,   41,   41,   41,   41,   41,   41,   41,
- /*   130 */    41,   41,   41,   41,   41,
+ /*    20 */    20,   21,   34,   23,   24,   37,   33,   34,   20,   21,
+ /*    30 */    37,   23,   37,    9,   10,   11,   32,   13,   16,   15,
+ /*    40 */    16,   17,   33,   34,   20,   21,   37,   23,   27,   28,
+ /*    50 */    29,   30,   31,   36,   37,   38,   39,   40,   37,   38,
+ /*    60 */    32,   40,   10,   11,   32,   36,   37,   38,   39,   40,
+ /*    70 */    41,   35,   36,   37,   38,   39,   40,   35,   36,   37,
+ /*    80 */    38,   39,   40,    1,   36,   37,   38,   39,   40,   10,
+ /*    90 */    11,   16,   10,   11,    2,   12,   32,   18,    3,   24,
+ /*   100 */    18,    1,   19,   16,   11,    1,   16,   11,    0,   42,
+ /*   110 */    42,   42,   42,   42,   22,   42,   42,   42,   42,   42,
+ /*   120 */    42,   42,   42,   42,   42,   42,   42,   42,   42,   42,
+ /*   130 */    42,   42,   42,   42,   25,   25,   25,   25,   25,   25,
 };
 #define YY_SHIFT_COUNT    (27)
 #define YY_SHIFT_MIN      (0)
-#define YY_SHIFT_MAX      (110)
+#define YY_SHIFT_MAX      (108)
 static const unsigned char yy_shift_ofst[] = {
- /*     0 */   111,   16,    0,    8,    8,    8,    8,   68,   74,   87,
- /*    10 */    74,   77,   86,   21,   21,   21,   57,   89,    1,   96,
- /*    20 */    96,  101,   92,   88,  105,   98,   91,  110,
+ /*     0 */   115,   24,    0,    8,    8,    8,    8,   79,   79,   82,
+ /*    10 */    79,   75,   52,   22,   22,   22,   92,   83,    1,   95,
+ /*    20 */    95,  100,   93,   87,  104,   96,   90,  108,
 };
 #define YY_REDUCE_COUNT (15)
-#define YY_REDUCE_MIN   (-24)
-#define YY_REDUCE_MAX   (58)
+#define YY_REDUCE_MIN   (-25)
+#define YY_REDUCE_MAX   (64)
 static const signed char yy_reduce_ofst[] = {
- /*     0 */   -24,   13,   18,   26,   32,    9,   37,   48,   58,  -12,
- /*    10 */   -12,    3,   -1,   20,   51,   56,
+ /*     0 */   -25,   21,   29,   36,   42,   17,   48,   -7,    9,  -12,
+ /*    10 */   -12,    4,   -5,   28,   32,   64,
 };
 static const YYACTIONTYPE yy_default[] = {
  /*     0 */   136,  104,  104,  104,  104,  104,  104,  104,  104,  104,
@@ -365,8 +366,8 @@ static const char *const yyTokenName[] = {
   /*    6 */ "DOUBLE",
   /*    7 */ "DECIMAL",
   /*    8 */ "BOOLEAN",
-  /*    9 */ "QNAME",
-  /*   10 */ "BNODE_ID",
+  /*    9 */ "BNODE_ID",
+  /*   10 */ "QNAME",
   /*   11 */ "IRIREF",
   /*   12 */ "LANGTAG",
   /*   13 */ "PREFIX",
@@ -376,27 +377,28 @@ static const char *const yyTokenName[] = {
   /*   17 */ "BASE",
   /*   18 */ "RDF_TYPE",
   /*   19 */ "DTYPE_MARKER",
-  /*   20 */ "LBRACKET",
-  /*   21 */ "RBRACKET",
-  /*   22 */ "LPAREN",
-  /*   23 */ "RPAREN",
-  /*   24 */ "turtleDoc",
-  /*   25 */ "statements",
-  /*   26 */ "statement",
-  /*   27 */ "prefixID",
-  /*   28 */ "base",
-  /*   29 */ "triples",
-  /*   30 */ "subject",
-  /*   31 */ "ows",
-  /*   32 */ "predObjList",
-  /*   33 */ "predicate",
-  /*   34 */ "objectList",
-  /*   35 */ "object",
-  /*   36 */ "resource",
-  /*   37 */ "blank",
-  /*   38 */ "literal",
-  /*   39 */ "collection",
-  /*   40 */ "itemList",
+  /*   20 */ "ANON",
+  /*   21 */ "LBRACKET",
+  /*   22 */ "RBRACKET",
+  /*   23 */ "LPAREN",
+  /*   24 */ "RPAREN",
+  /*   25 */ "turtleDoc",
+  /*   26 */ "statements",
+  /*   27 */ "statement",
+  /*   28 */ "prefixID",
+  /*   29 */ "base",
+  /*   30 */ "triples",
+  /*   31 */ "subject",
+  /*   32 */ "ows",
+  /*   33 */ "predObjList",
+  /*   34 */ "predicate",
+  /*   35 */ "objectList",
+  /*   36 */ "object",
+  /*   37 */ "resource",
+  /*   38 */ "blank",
+  /*   39 */ "literal",
+  /*   40 */ "collection",
+  /*   41 */ "itemList",
 };
 #endif /* defined(YYCOVERAGE) || !defined(NDEBUG) */
 
@@ -423,7 +425,7 @@ static const char *const yyRuleName[] = {
  /*  16 */ "literal ::= DECIMAL",
  /*  17 */ "literal ::= BOOLEAN",
  /*  18 */ "blank ::= BNODE_ID",
- /*  19 */ "blank ::= LBRACKET RBRACKET",
+ /*  19 */ "blank ::= ANON",
  /*  20 */ "blank ::= LBRACKET predObjList RBRACKET",
  /*  21 */ "blank ::= LPAREN RPAREN",
  /*  22 */ "collection ::= LPAREN itemList RPAREN",
@@ -579,8 +581,8 @@ static void yy_destructor(
     case 6: /* DOUBLE */
     case 7: /* DECIMAL */
     case 8: /* BOOLEAN */
-    case 9: /* QNAME */
-    case 10: /* BNODE_ID */
+    case 9: /* BNODE_ID */
+    case 10: /* QNAME */
     case 11: /* IRIREF */
     case 12: /* LANGTAG */
     case 13: /* PREFIX */
@@ -590,42 +592,43 @@ static void yy_destructor(
     case 17: /* BASE */
     case 18: /* RDF_TYPE */
     case 19: /* DTYPE_MARKER */
-    case 20: /* LBRACKET */
-    case 21: /* RBRACKET */
-    case 22: /* LPAREN */
-    case 23: /* RPAREN */
+    case 20: /* ANON */
+    case 21: /* LBRACKET */
+    case 22: /* RBRACKET */
+    case 23: /* LPAREN */
+    case 24: /* RPAREN */
 {
 #line 36 "grammar_ttl.y"
  (void) state; free ((yypminor->yy0)); 
-#line 625 "../../build/grammar_ttl.c"
+#line 628 "../../build/grammar_ttl.c"
 }
       break;
-    case 30: /* subject */
-    case 33: /* predicate */
-    case 35: /* object */
-    case 36: /* resource */
-    case 37: /* blank */
-    case 38: /* literal */
-    case 39: /* collection */
+    case 31: /* subject */
+    case 34: /* predicate */
+    case 36: /* object */
+    case 37: /* resource */
+    case 38: /* blank */
+    case 39: /* literal */
+    case 40: /* collection */
 {
-#line 120 "grammar_ttl.y"
- VOLK_term_free ((yypminor->yy12)); 
-#line 638 "../../build/grammar_ttl.c"
+#line 129 "grammar_ttl.y"
+ VOLK_term_free ((yypminor->yy50)); 
+#line 641 "../../build/grammar_ttl.c"
 }
       break;
-    case 32: /* predObjList */
+    case 33: /* predObjList */
 {
 #line 97 "grammar_ttl.y"
- VOLK_link_map_free ((yypminor->yy2)); 
-#line 645 "../../build/grammar_ttl.c"
+ VOLK_link_map_free ((yypminor->yy34)); 
+#line 648 "../../build/grammar_ttl.c"
 }
       break;
-    case 34: /* objectList */
-    case 40: /* itemList */
+    case 35: /* objectList */
+    case 41: /* itemList */
 {
-#line 108 "grammar_ttl.y"
- VOLK_term_set_free ((yypminor->yy60)); 
-#line 653 "../../build/grammar_ttl.c"
+#line 117 "grammar_ttl.y"
+ VOLK_term_set_free ((yypminor->yy22)); 
+#line 656 "../../build/grammar_ttl.c"
 }
       break;
 /********* End destructor definitions *****************************************/
@@ -847,7 +850,7 @@ static void yyStackOverflow(yyParser *yypParser){
 
     log_error ("Stack oveflow in TTL parsing.");
     state->rc = VOLK_MEM_ERR;
-#line 875 "../../build/grammar_ttl.c"
+#line 878 "../../build/grammar_ttl.c"
 /******** End %stack_overflow code ********************************************/
    TTLParseARG_STORE /* Suppress warning about unused %extra_argument var */
    TTLParseCTX_STORE
@@ -919,47 +922,47 @@ static void yy_shift(
 /* For rule J, yyRuleInfoLhs[J] contains the symbol on the left-hand side
 ** of that rule */
 static const YYCODETYPE yyRuleInfoLhs[] = {
-    27,  /* (0) prefixID ::= PREFIX WS IRIREF PERIOD */
-    28,  /* (1) base ::= BASE WS IRIREF PERIOD */
-    29,  /* (2) triples ::= subject ows predObjList PERIOD */
-    29,  /* (3) triples ::= subject ows predObjList SEMICOLON PERIOD */
-    32,  /* (4) predObjList ::= predicate ows objectList */
-    32,  /* (5) predObjList ::= predObjList SEMICOLON predicate ows objectList */
-    34,  /* (6) objectList ::= objectList COMMA object */
-    34,  /* (7) objectList ::= object */
-    30,  /* (8) subject ::= resource */
-    30,  /* (9) subject ::= blank */
-    33,  /* (10) predicate ::= RDF_TYPE */
-    38,  /* (11) literal ::= STRING */
-    38,  /* (12) literal ::= STRING LANGTAG */
-    38,  /* (13) literal ::= STRING DTYPE_MARKER resource */
-    38,  /* (14) literal ::= INTEGER */
-    38,  /* (15) literal ::= DOUBLE */
-    38,  /* (16) literal ::= DECIMAL */
-    38,  /* (17) literal ::= BOOLEAN */
-    37,  /* (18) blank ::= BNODE_ID */
-    37,  /* (19) blank ::= LBRACKET RBRACKET */
-    37,  /* (20) blank ::= LBRACKET predObjList RBRACKET */
-    37,  /* (21) blank ::= LPAREN RPAREN */
-    39,  /* (22) collection ::= LPAREN itemList RPAREN */
-    40,  /* (23) itemList ::= itemList ows object */
-    40,  /* (24) itemList ::= object */
-    36,  /* (25) resource ::= IRIREF */
-    36,  /* (26) resource ::= QNAME */
-    24,  /* (27) turtleDoc ::= statements EOF */
-    25,  /* (28) statements ::= statements statement */
-    25,  /* (29) statements ::= */
-    26,  /* (30) statement ::= prefixID */
-    26,  /* (31) statement ::= base */
-    26,  /* (32) statement ::= triples */
-    26,  /* (33) statement ::= WS */
-    33,  /* (34) predicate ::= resource */
-    35,  /* (35) object ::= resource */
-    35,  /* (36) object ::= blank */
-    35,  /* (37) object ::= literal */
-    37,  /* (38) blank ::= collection */
-    31,  /* (39) ows ::= WS */
-    31,  /* (40) ows ::= */
+    28,  /* (0) prefixID ::= PREFIX WS IRIREF PERIOD */
+    29,  /* (1) base ::= BASE WS IRIREF PERIOD */
+    30,  /* (2) triples ::= subject ows predObjList PERIOD */
+    30,  /* (3) triples ::= subject ows predObjList SEMICOLON PERIOD */
+    33,  /* (4) predObjList ::= predicate ows objectList */
+    33,  /* (5) predObjList ::= predObjList SEMICOLON predicate ows objectList */
+    35,  /* (6) objectList ::= objectList COMMA object */
+    35,  /* (7) objectList ::= object */
+    31,  /* (8) subject ::= resource */
+    31,  /* (9) subject ::= blank */
+    34,  /* (10) predicate ::= RDF_TYPE */
+    39,  /* (11) literal ::= STRING */
+    39,  /* (12) literal ::= STRING LANGTAG */
+    39,  /* (13) literal ::= STRING DTYPE_MARKER resource */
+    39,  /* (14) literal ::= INTEGER */
+    39,  /* (15) literal ::= DOUBLE */
+    39,  /* (16) literal ::= DECIMAL */
+    39,  /* (17) literal ::= BOOLEAN */
+    38,  /* (18) blank ::= BNODE_ID */
+    38,  /* (19) blank ::= ANON */
+    38,  /* (20) blank ::= LBRACKET predObjList RBRACKET */
+    38,  /* (21) blank ::= LPAREN RPAREN */
+    40,  /* (22) collection ::= LPAREN itemList RPAREN */
+    41,  /* (23) itemList ::= itemList ows object */
+    41,  /* (24) itemList ::= object */
+    37,  /* (25) resource ::= IRIREF */
+    37,  /* (26) resource ::= QNAME */
+    25,  /* (27) turtleDoc ::= statements EOF */
+    26,  /* (28) statements ::= statements statement */
+    26,  /* (29) statements ::= */
+    27,  /* (30) statement ::= prefixID */
+    27,  /* (31) statement ::= base */
+    27,  /* (32) statement ::= triples */
+    27,  /* (33) statement ::= WS */
+    34,  /* (34) predicate ::= resource */
+    36,  /* (35) object ::= resource */
+    36,  /* (36) object ::= blank */
+    36,  /* (37) object ::= literal */
+    38,  /* (38) blank ::= collection */
+    32,  /* (39) ows ::= WS */
+    32,  /* (40) ows ::= */
 };
 
 /* For rule J, yyRuleInfoNRhs[J] contains the negative of the number
@@ -984,7 +987,7 @@ static const signed char yyRuleInfoNRhs[] = {
    -1,  /* (16) literal ::= DECIMAL */
    -1,  /* (17) literal ::= BOOLEAN */
    -1,  /* (18) blank ::= BNODE_ID */
-   -2,  /* (19) blank ::= LBRACKET RBRACKET */
+   -1,  /* (19) blank ::= ANON */
    -3,  /* (20) blank ::= LBRACKET predObjList RBRACKET */
    -2,  /* (21) blank ::= LPAREN RPAREN */
    -3,  /* (22) collection ::= LPAREN itemList RPAREN */
@@ -1055,7 +1058,7 @@ static YYACTIONTYPE yy_reduce(
                 free (yymsp[-3].minor.yy0);
                 free (yymsp[-1].minor.yy0);
             }
-#line 1083 "../../build/grammar_ttl.c"
+#line 1086 "../../build/grammar_ttl.c"
   yy_destructor(yypParser,16,&yymsp[-2].minor);
   yy_destructor(yypParser,1,&yymsp[0].minor);
         break;
@@ -1068,7 +1071,7 @@ static YYACTIONTYPE yy_reduce(
 
                 free (yymsp[-1].minor.yy0);
             }
-#line 1096 "../../build/grammar_ttl.c"
+#line 1099 "../../build/grammar_ttl.c"
   yy_destructor(yypParser,16,&yymsp[-2].minor);
   yy_destructor(yypParser,1,&yymsp[0].minor);
 }
@@ -1076,262 +1079,272 @@ static YYACTIONTYPE yy_reduce(
       case 2: /* triples ::= subject ows predObjList PERIOD */
 #line 77 "grammar_ttl.y"
 {
-                size_t ct = VOLK_graph_add_link_map (state->it, yymsp[-1].minor.yy2);
+                size_t ct = VOLK_graph_add_link_map (state->it, yymsp[-1].minor.yy34);
                 state->ct += ct;
                 state->rc = VOLK_OK;
                 LOG_TRACE("Added %lu triples.", ct);
 
-                VOLK_term_free (yymsp[-3].minor.yy12);
-                VOLK_link_map_free (yymsp[-1].minor.yy2);
+                VOLK_term_free (yymsp[-3].minor.yy50);
+                VOLK_link_map_free (yymsp[-1].minor.yy34);
             }
-#line 1112 "../../build/grammar_ttl.c"
+#line 1115 "../../build/grammar_ttl.c"
   yy_destructor(yypParser,1,&yymsp[0].minor);
         break;
       case 3: /* triples ::= subject ows predObjList SEMICOLON PERIOD */
 #line 86 "grammar_ttl.y"
 {
-                size_t ct = VOLK_graph_add_link_map (state->it, yymsp[-2].minor.yy2);
+                size_t ct = VOLK_graph_add_link_map (state->it, yymsp[-2].minor.yy34);
                 state->ct += ct;
                 state->rc = VOLK_OK;
                 LOG_TRACE("Added %lu triples.", ct);
 
-                VOLK_term_free (yymsp[-4].minor.yy12);
-                VOLK_link_map_free (yymsp[-2].minor.yy2);
+                VOLK_term_free (yymsp[-4].minor.yy50);
+                VOLK_link_map_free (yymsp[-2].minor.yy34);
             }
-#line 1126 "../../build/grammar_ttl.c"
+#line 1129 "../../build/grammar_ttl.c"
   yy_destructor(yypParser,2,&yymsp[-1].minor);
   yy_destructor(yypParser,1,&yymsp[0].minor);
         break;
       case 4: /* predObjList ::= predicate ows objectList */
 #line 98 "grammar_ttl.y"
 {
-                yylhsminor.yy2 = VOLK_link_map_new (state->lms, VOLK_LINK_OUTBOUND);
-                VOLK_link_map_add (yylhsminor.yy2, yymsp[-2].minor.yy12, yymsp[0].minor.yy60);
+                VOLK_Term *s;
+                if (state->lms) s = state->lms;
+                else {
+                    // TODO This may be brittle. It is not verifying the
+                    // full BNode syntax.
+                    log_info ("Link map subject not present. Assuming BNode.");
+                    s = VOLK_bnode_new (NULL);
+                }
+                yylhsminor.yy34 = VOLK_link_map_new (s, VOLK_LINK_OUTBOUND);
+                VOLK_link_map_add (yylhsminor.yy34, yymsp[-2].minor.yy50, yymsp[0].minor.yy22);
+                if (s != state->lms) VOLK_term_free (s);
             }
-#line 1136 "../../build/grammar_ttl.c"
-  yymsp[-2].minor.yy2 = yylhsminor.yy2;
+#line 1148 "../../build/grammar_ttl.c"
+  yymsp[-2].minor.yy34 = yylhsminor.yy34;
         break;
       case 5: /* predObjList ::= predObjList SEMICOLON predicate ows objectList */
-#line 102 "grammar_ttl.y"
+#line 111 "grammar_ttl.y"
 {
-                VOLK_link_map_add (yymsp[-4].minor.yy2, yymsp[-2].minor.yy12, yymsp[0].minor.yy60);
-                yylhsminor.yy2 = yymsp[-4].minor.yy2;
+                VOLK_link_map_add (yymsp[-4].minor.yy34, yymsp[-2].minor.yy50, yymsp[0].minor.yy22);
+                yylhsminor.yy34 = yymsp[-4].minor.yy34;
             }
-#line 1145 "../../build/grammar_ttl.c"
+#line 1157 "../../build/grammar_ttl.c"
   yy_destructor(yypParser,2,&yymsp[-3].minor);
-  yymsp[-4].minor.yy2 = yylhsminor.yy2;
+  yymsp[-4].minor.yy34 = yylhsminor.yy34;
         break;
       case 6: /* objectList ::= objectList COMMA object */
-#line 109 "grammar_ttl.y"
+#line 118 "grammar_ttl.y"
 {
-                if (VOLK_term_set_add (yymsp[-2].minor.yy60, yymsp[0].minor.yy12, NULL) == VOLK_NOACTION)
-                    VOLK_term_free (yymsp[0].minor.yy12);
-                yylhsminor.yy60 = yymsp[-2].minor.yy60;
+                if (VOLK_term_set_add (yymsp[-2].minor.yy22, yymsp[0].minor.yy50, NULL) == VOLK_NOACTION)
+                    VOLK_term_free (yymsp[0].minor.yy50);
+                yylhsminor.yy22 = yymsp[-2].minor.yy22;
             }
-#line 1156 "../../build/grammar_ttl.c"
+#line 1168 "../../build/grammar_ttl.c"
   yy_destructor(yypParser,3,&yymsp[-1].minor);
-  yymsp[-2].minor.yy60 = yylhsminor.yy60;
+  yymsp[-2].minor.yy22 = yylhsminor.yy22;
         break;
       case 7: /* objectList ::= object */
-#line 114 "grammar_ttl.y"
+#line 123 "grammar_ttl.y"
 {
-                yylhsminor.yy60 = VOLK_term_set_new();
-                VOLK_term_set_add (yylhsminor.yy60, yymsp[0].minor.yy12, NULL);
+                yylhsminor.yy22 = VOLK_term_set_new();
+                VOLK_term_set_add (yylhsminor.yy22, yymsp[0].minor.yy50, NULL);
             }
-#line 1166 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy60 = yylhsminor.yy60;
+#line 1178 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy22 = yylhsminor.yy22;
         break;
       case 8: /* subject ::= resource */
       case 9: /* subject ::= blank */ yytestcase(yyruleno==9);
-#line 121 "grammar_ttl.y"
-{ state->lms = yymsp[0].minor.yy12; }
-#line 1173 "../../build/grammar_ttl.c"
+#line 130 "grammar_ttl.y"
+{ state->lms = yymsp[0].minor.yy50; }
+#line 1185 "../../build/grammar_ttl.c"
         break;
       case 10: /* predicate ::= RDF_TYPE */
 {  yy_destructor(yypParser,18,&yymsp[0].minor);
-#line 127 "grammar_ttl.y"
-{ yymsp[0].minor.yy12 = VOLK_iriref_new_ns ("rdf:type"); }
-#line 1179 "../../build/grammar_ttl.c"
+#line 136 "grammar_ttl.y"
+{ yymsp[0].minor.yy50 = VOLK_iriref_new_ns ("rdf:type"); }
+#line 1191 "../../build/grammar_ttl.c"
 }
         break;
       case 11: /* literal ::= STRING */
-#line 137 "grammar_ttl.y"
+#line 146 "grammar_ttl.y"
 {
-                yylhsminor.yy12 = VOLK_literal_new (yymsp[0].minor.yy0, NULL);
-                LOG_TRACE("Created plain literal: \"%s\"", yylhsminor.yy12->data);
+                yylhsminor.yy50 = VOLK_literal_new (yymsp[0].minor.yy0, NULL);
+                LOG_TRACE("Created plain literal: \"%s\"", yylhsminor.yy50->data);
                 free (yymsp[0].minor.yy0);
             }
-#line 1189 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy12 = yylhsminor.yy12;
+#line 1201 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy50 = yylhsminor.yy50;
         break;
       case 12: /* literal ::= STRING LANGTAG */
-#line 142 "grammar_ttl.y"
+#line 151 "grammar_ttl.y"
 {
-                yylhsminor.yy12 = VOLK_lt_literal_new (yymsp[-1].minor.yy0, yymsp[0].minor.yy0);
-                LOG_TRACE("Created LT-literal: \"%s\"@%s", yylhsminor.yy12->data, yylhsminor.yy12->lang);
+                yylhsminor.yy50 = VOLK_lt_literal_new (yymsp[-1].minor.yy0, yymsp[0].minor.yy0);
+                LOG_TRACE("Created LT-literal: \"%s\"@%s", yylhsminor.yy50->data, yylhsminor.yy50->lang);
                 free (yymsp[-1].minor.yy0);
                 free (yymsp[0].minor.yy0);
             }
-#line 1200 "../../build/grammar_ttl.c"
-  yymsp[-1].minor.yy12 = yylhsminor.yy12;
+#line 1212 "../../build/grammar_ttl.c"
+  yymsp[-1].minor.yy50 = yylhsminor.yy50;
         break;
       case 13: /* literal ::= STRING DTYPE_MARKER resource */
-#line 148 "grammar_ttl.y"
+#line 157 "grammar_ttl.y"
 {
-                yylhsminor.yy12 = VOLK_literal_new (yymsp[-2].minor.yy0, yymsp[0].minor.yy12);
+                yylhsminor.yy50 = VOLK_literal_new (yymsp[-2].minor.yy0, yymsp[0].minor.yy50);
                 LOG_TRACE(
                         "Created DT-literal: \"%s\"^^%s",
-                        yylhsminor.yy12->data, yylhsminor.yy12->datatype);
+                        yylhsminor.yy50->data, yylhsminor.yy50->datatype);
                 free (yymsp[-2].minor.yy0);
             }
-#line 1212 "../../build/grammar_ttl.c"
+#line 1224 "../../build/grammar_ttl.c"
   yy_destructor(yypParser,19,&yymsp[-1].minor);
-  yymsp[-2].minor.yy12 = yylhsminor.yy12;
+  yymsp[-2].minor.yy50 = yylhsminor.yy50;
         break;
       case 14: /* literal ::= INTEGER */
-#line 155 "grammar_ttl.y"
+#line 164 "grammar_ttl.y"
 {
-                yylhsminor.yy12 = VOLK_literal_new (yymsp[0].minor.yy0, VOLK_iriref_new_ns ("xsd:integer"));
+                yylhsminor.yy50 = VOLK_literal_new (yymsp[0].minor.yy0, VOLK_iriref_new_ns ("xsd:integer"));
                 free (yymsp[0].minor.yy0);
             }
-#line 1222 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy12 = yylhsminor.yy12;
+#line 1234 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy50 = yylhsminor.yy50;
         break;
       case 15: /* literal ::= DOUBLE */
-#line 159 "grammar_ttl.y"
+#line 168 "grammar_ttl.y"
 {
-                yylhsminor.yy12 = VOLK_literal_new (yymsp[0].minor.yy0, VOLK_iriref_new_ns ("xsd:double"));
+                yylhsminor.yy50 = VOLK_literal_new (yymsp[0].minor.yy0, VOLK_iriref_new_ns ("xsd:double"));
                 free (yymsp[0].minor.yy0);
             }
-#line 1231 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy12 = yylhsminor.yy12;
+#line 1243 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy50 = yylhsminor.yy50;
         break;
       case 16: /* literal ::= DECIMAL */
-#line 163 "grammar_ttl.y"
+#line 172 "grammar_ttl.y"
 {
-                yylhsminor.yy12 = VOLK_literal_new (yymsp[0].minor.yy0, VOLK_iriref_new_ns ("xsd:decimal"));
+                yylhsminor.yy50 = VOLK_literal_new (yymsp[0].minor.yy0, VOLK_iriref_new_ns ("xsd:decimal"));
                 free (yymsp[0].minor.yy0);
             }
-#line 1240 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy12 = yylhsminor.yy12;
+#line 1252 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy50 = yylhsminor.yy50;
         break;
       case 17: /* literal ::= BOOLEAN */
-#line 167 "grammar_ttl.y"
+#line 176 "grammar_ttl.y"
 {
-                yylhsminor.yy12 = VOLK_literal_new (yymsp[0].minor.yy0, VOLK_iriref_new_ns ("xsd:boolean"));
+                yylhsminor.yy50 = VOLK_literal_new (yymsp[0].minor.yy0, VOLK_iriref_new_ns ("xsd:boolean"));
                 free (yymsp[0].minor.yy0);
             }
-#line 1249 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy12 = yylhsminor.yy12;
+#line 1261 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy50 = yylhsminor.yy50;
         break;
       case 18: /* blank ::= BNODE_ID */
-#line 174 "grammar_ttl.y"
+#line 183 "grammar_ttl.y"
 {
-                yylhsminor.yy12 = VOLK_bnode_new (yymsp[0].minor.yy0);
-                LOG_TRACE("Created blank node: _:%s", yylhsminor.yy12->data);
+                yylhsminor.yy50 = VOLK_bnode_new (yymsp[0].minor.yy0);
+                LOG_TRACE("Created blank node: _:%s", yylhsminor.yy50->data);
                 free (yymsp[0].minor.yy0);
             }
-#line 1259 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy12 = yylhsminor.yy12;
+#line 1271 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy50 = yylhsminor.yy50;
         break;
-      case 19: /* blank ::= LBRACKET RBRACKET */
-{  yy_destructor(yypParser,20,&yymsp[-1].minor);
-#line 179 "grammar_ttl.y"
+      case 19: /* blank ::= ANON */
+{  yy_destructor(yypParser,20,&yymsp[0].minor);
+#line 188 "grammar_ttl.y"
 {
-                yymsp[-1].minor.yy12 = VOLK_bnode_new (NULL);
-                LOG_TRACE("Created empty list BN: _:%s", yymsp[-1].minor.yy12->data);
+                LOG_TRACE ("Found empty BNode.");
+                yymsp[0].minor.yy50 = VOLK_bnode_new (NULL);
+                LOG_TRACE("Created empty list BN: _:%s", yymsp[0].minor.yy50->data);
             }
-#line 1269 "../../build/grammar_ttl.c"
-  yy_destructor(yypParser,21,&yymsp[0].minor);
+#line 1282 "../../build/grammar_ttl.c"
 }
         break;
       case 20: /* blank ::= LBRACKET predObjList RBRACKET */
-{  yy_destructor(yypParser,20,&yymsp[-2].minor);
-#line 183 "grammar_ttl.y"
+{  yy_destructor(yypParser,21,&yymsp[-2].minor);
+#line 193 "grammar_ttl.y"
 {
-                yymsp[-2].minor.yy12 = VOLK_bnode_new (NULL);
-                state->lms = yymsp[-2].minor.yy12;
-                state->ct += VOLK_graph_add_link_map (state->it, yymsp[-1].minor.yy2);
-                LOG_TRACE("Created list BN: _:%s", yymsp[-2].minor.yy12->data);
+                LOG_TRACE ("Found BNode with data.");
+                yymsp[-2].minor.yy50 = VOLK_bnode_new (NULL);
+                state->lms = yymsp[-2].minor.yy50;
+                state->ct += VOLK_graph_add_link_map (state->it, yymsp[-1].minor.yy34);
+                LOG_TRACE("Created list BN: _:%s", yymsp[-2].minor.yy50->data);
 
-                VOLK_link_map_free (yymsp[-1].minor.yy2);
+                VOLK_link_map_free (yymsp[-1].minor.yy34);
             }
-#line 1284 "../../build/grammar_ttl.c"
-  yy_destructor(yypParser,21,&yymsp[0].minor);
+#line 1297 "../../build/grammar_ttl.c"
+  yy_destructor(yypParser,22,&yymsp[0].minor);
 }
         break;
       case 21: /* blank ::= LPAREN RPAREN */
-{  yy_destructor(yypParser,22,&yymsp[-1].minor);
-#line 192 "grammar_ttl.y"
+{  yy_destructor(yypParser,23,&yymsp[-1].minor);
+#line 203 "grammar_ttl.y"
 {
-                yymsp[-1].minor.yy12 = VOLK_iriref_new_ns ("rdf:nil");
-                LOG_TRACE("Created list terminator: %s", yymsp[-1].minor.yy12->data);
+                yymsp[-1].minor.yy50 = VOLK_iriref_new_ns ("rdf:nil");
+                LOG_TRACE("Created list terminator: %s", yymsp[-1].minor.yy50->data);
             }
-#line 1295 "../../build/grammar_ttl.c"
-  yy_destructor(yypParser,23,&yymsp[0].minor);
+#line 1308 "../../build/grammar_ttl.c"
+  yy_destructor(yypParser,24,&yymsp[0].minor);
 }
         break;
       case 22: /* collection ::= LPAREN itemList RPAREN */
-{  yy_destructor(yypParser,22,&yymsp[-2].minor);
-#line 201 "grammar_ttl.y"
+{  yy_destructor(yypParser,23,&yymsp[-2].minor);
+#line 212 "grammar_ttl.y"
 {
-                yymsp[-2].minor.yy12 = VOLK_bnode_add_collection (state->it, yymsp[-1].minor.yy60);
-                VOLK_term_set_free (yymsp[-1].minor.yy60);
+                yymsp[-2].minor.yy50 = VOLK_bnode_add_collection (state->it, yymsp[-1].minor.yy22);
+                VOLK_term_set_free (yymsp[-1].minor.yy22);
             }
-#line 1306 "../../build/grammar_ttl.c"
-  yy_destructor(yypParser,23,&yymsp[0].minor);
+#line 1319 "../../build/grammar_ttl.c"
+  yy_destructor(yypParser,24,&yymsp[0].minor);
 }
         break;
       case 23: /* itemList ::= itemList ows object */
-#line 208 "grammar_ttl.y"
+#line 219 "grammar_ttl.y"
 {
-                if (VOLK_term_set_add (yymsp[-2].minor.yy60, yymsp[0].minor.yy12, NULL) == VOLK_NOACTION)
-                    VOLK_term_free (yymsp[0].minor.yy12);
-                yylhsminor.yy60 = yymsp[-2].minor.yy60;
+                if (VOLK_term_set_add (yymsp[-2].minor.yy22, yymsp[0].minor.yy50, NULL) == VOLK_NOACTION)
+                    VOLK_term_free (yymsp[0].minor.yy50);
+                yylhsminor.yy22 = yymsp[-2].minor.yy22;
             }
-#line 1317 "../../build/grammar_ttl.c"
-  yymsp[-2].minor.yy60 = yylhsminor.yy60;
+#line 1330 "../../build/grammar_ttl.c"
+  yymsp[-2].minor.yy22 = yylhsminor.yy22;
         break;
       case 24: /* itemList ::= object */
-#line 213 "grammar_ttl.y"
+#line 224 "grammar_ttl.y"
 {
-                yylhsminor.yy60 = VOLK_term_set_new ();
-                VOLK_term_set_add (yylhsminor.yy60, yymsp[0].minor.yy12, NULL);
+                yylhsminor.yy22 = VOLK_term_set_new ();
+                VOLK_term_set_add (yylhsminor.yy22, yymsp[0].minor.yy50, NULL);
             }
-#line 1326 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy60 = yylhsminor.yy60;
+#line 1339 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy22 = yylhsminor.yy22;
         break;
       case 25: /* resource ::= IRIREF */
-#line 220 "grammar_ttl.y"
+#line 231 "grammar_ttl.y"
 {
                 VOLK_Term *rel_iri = VOLK_iriref_new (yymsp[0].minor.yy0);
                 free (yymsp[0].minor.yy0);
                 if (state->base) {
-                    yylhsminor.yy12 = VOLK_iriref_new_abs (rel_iri, state->base);
+                    yylhsminor.yy50 = VOLK_iriref_new_abs (rel_iri, state->base);
                     VOLK_term_free (rel_iri);
                 } else {
-                    yylhsminor.yy12 = rel_iri;
+                    yylhsminor.yy50 = rel_iri;
                 }
-                LOG_TRACE("Created IRI: <%s>", yylhsminor.yy12->data);
+                LOG_TRACE("Created IRI: <%s>", yylhsminor.yy50->data);
             }
-#line 1342 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy12 = yylhsminor.yy12;
+#line 1355 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy50 = yylhsminor.yy50;
         break;
       case 26: /* resource ::= QNAME */
-#line 231 "grammar_ttl.y"
+#line 242 "grammar_ttl.y"
 {
-                yylhsminor.yy12 = VOLK_iriref_new_ns (yymsp[0].minor.yy0);
-                LOG_TRACE("Created IRI: %s", yylhsminor.yy12->data);
+                yylhsminor.yy50 = VOLK_iriref_new_ns (yymsp[0].minor.yy0);
+                LOG_TRACE("Created IRI: %s", yylhsminor.yy50->data);
                 free (yymsp[0].minor.yy0);
             }
-#line 1352 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy12 = yylhsminor.yy12;
+#line 1365 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy50 = yylhsminor.yy50;
         break;
       case 27: /* turtleDoc ::= statements EOF */
 #line 54 "grammar_ttl.y"
 {
 }
-#line 1359 "../../build/grammar_ttl.c"
+#line 1372 "../../build/grammar_ttl.c"
   yy_destructor(yypParser,15,&yymsp[0].minor);
         break;
       case 33: /* statement ::= WS */
@@ -1340,40 +1353,40 @@ static YYACTIONTYPE yy_reduce(
 #line 61 "grammar_ttl.y"
 {
 }
-#line 1368 "../../build/grammar_ttl.c"
+#line 1381 "../../build/grammar_ttl.c"
 }
         break;
       case 34: /* predicate ::= resource */
       case 35: /* object ::= resource */ yytestcase(yyruleno==35);
-{  yy_destructor(yypParser,36,&yymsp[0].minor);
-#line 126 "grammar_ttl.y"
+{  yy_destructor(yypParser,37,&yymsp[0].minor);
+#line 135 "grammar_ttl.y"
 {
 }
-#line 1377 "../../build/grammar_ttl.c"
+#line 1390 "../../build/grammar_ttl.c"
 }
         break;
       case 36: /* object ::= blank */
-{  yy_destructor(yypParser,37,&yymsp[0].minor);
-#line 132 "grammar_ttl.y"
+{  yy_destructor(yypParser,38,&yymsp[0].minor);
+#line 141 "grammar_ttl.y"
 {
 }
-#line 1385 "../../build/grammar_ttl.c"
+#line 1398 "../../build/grammar_ttl.c"
 }
         break;
       case 37: /* object ::= literal */
-{  yy_destructor(yypParser,38,&yymsp[0].minor);
-#line 133 "grammar_ttl.y"
+{  yy_destructor(yypParser,39,&yymsp[0].minor);
+#line 142 "grammar_ttl.y"
 {
 }
-#line 1393 "../../build/grammar_ttl.c"
+#line 1406 "../../build/grammar_ttl.c"
 }
         break;
       case 38: /* blank ::= collection */
-{  yy_destructor(yypParser,39,&yymsp[0].minor);
-#line 191 "grammar_ttl.y"
+{  yy_destructor(yypParser,40,&yymsp[0].minor);
+#line 202 "grammar_ttl.y"
 {
 }
-#line 1401 "../../build/grammar_ttl.c"
+#line 1414 "../../build/grammar_ttl.c"
 }
         break;
       default:
@@ -1428,7 +1441,7 @@ static void yy_parse_failed(
 
     log_error ("TTL parse error. Cannot continue.");
     state->rc = VOLK_PARSE_ERR;
-#line 1456 "../../build/grammar_ttl.c"
+#line 1469 "../../build/grammar_ttl.c"
 /************ End %parse_failure code *****************************************/
   TTLParseARG_STORE /* Suppress warning about unused %extra_argument variable */
   TTLParseCTX_STORE
@@ -1451,7 +1464,7 @@ static void yy_syntax_error(
 
     // Fail immediately on first error.
     yy_parse_failed (yypParser);
-#line 1479 "../../build/grammar_ttl.c"
+#line 1492 "../../build/grammar_ttl.c"
 /************ End %syntax_error code ******************************************/
   TTLParseARG_STORE /* Suppress warning about unused %extra_argument variable */
   TTLParseCTX_STORE

+ 14 - 3
src/codec/grammar_ttl.y

@@ -43,7 +43,7 @@
 %left PERIOD .
 %left SEMICOLON .
 %left COMMA .
-%left STRING INTEGER DOUBLE DECIMAL BOOLEAN QNAME BNODE_ID IRIREF .
+%left STRING INTEGER DOUBLE DECIMAL BOOLEAN BNODE_ID QNAME IRIREF .
 %nonassoc LANGTAG PREFIX .
 %nonassoc COLON .
 
@@ -96,8 +96,17 @@ triples 	::= subject(S) ows predObjList(L) SEMICOLON PERIOD . [PERIOD] {
 %type predObjList       { VOLK_LinkMap * }
 %destructor predObjList { VOLK_link_map_free ($$); }
 predObjList(A) ::= predicate(P) ows objectList(O) . [SEMICOLON] {
-                A = VOLK_link_map_new (state->lms, VOLK_LINK_OUTBOUND);
+                VOLK_Term *s;
+                if (state->lms) s = state->lms;
+                else {
+                    // TODO This may be brittle. It is not verifying the
+                    // full BNode syntax.
+                    log_info ("Link map subject not present. Assuming BNode.");
+                    s = VOLK_bnode_new (NULL);
+                }
+                A = VOLK_link_map_new (s, VOLK_LINK_OUTBOUND);
                 VOLK_link_map_add (A, P, O);
+                if (s != state->lms) VOLK_term_free (s);
             }
 predObjList(A) ::= predObjList(L) SEMICOLON predicate(P) ows objectList(O) . {
                 VOLK_link_map_add (L, P, O);
@@ -176,11 +185,13 @@ blank(A)    ::= BNODE_ID(D) . {
                 LOG_TRACE("Created blank node: _:%s", A->data);
                 free (D);
             }
-blank(A)    ::= LBRACKET RBRACKET . [BNODE_ID] {
+blank(A)    ::= ANON . [BNODE_ID] {
+                LOG_TRACE ("Found empty BNode.");
                 A = VOLK_bnode_new (NULL);
                 LOG_TRACE("Created empty list BN: _:%s", A->data);
             }
 blank(A)    ::= LBRACKET predObjList(L) RBRACKET . [BNODE_ID] {
+                LOG_TRACE ("Found BNode with data.");
                 A = VOLK_bnode_new (NULL);
                 state->lms = A;
                 state->ct += VOLK_graph_add_link_map (state->it, L);

+ 113 - 49
src/codec/lexer_nt.re

@@ -1,7 +1,21 @@
 #include "volksdata/codec/parser_nt.h"
 #include "volksdata/codec/tokens_nt.h"
+//#include "volksdata/codec/parser_common.h"
 
 
+/** BEGIN duplicate section
+ * This section is bit-by-bit identical in NT and TTL lexers. The copy in
+ * include/volksdata/codec/parser_common.h should be used, but some re2c tags
+ * are not being parsed in that location.
+ */
+
+/** @brief TTL is UTF-8 encoded.
+ *
+ * @sa https://www.w3.org/TeamSubmission/turtle/#sec-grammar
+ *
+ * `char` should be considered to be UTF-8 throughout this library, however,
+ * setting YYCTYPE to char generates case labels outside of the char range.
+ */
 #define YYCTYPE     uint8_t
 #define YYCURSOR    it->cur
 #define YYMARKER    it->mar
@@ -10,47 +24,25 @@
 
 
 typedef struct {
-    FILE           *fh;                 ///< Input file handle.
-    const char     *sh;                 ///< Input string. Exclusive with fh.
-    YYCTYPE         buf[CHUNK_SIZE],    ///< Start of buffer.
-                   *lim,                ///< Position after the last available
-                                        ///<   input character (YYLIMIT).
-                   *cur,                ///< Next input character to be read
-                                        ///<   (YYCURSOR)
-                   *mar,                ///< Most recent match (YYMARKER)
-                   *tok,                ///< Start of current token.
-                   *bol;                ///< Address of the beginning of the
-                                        ///<   current line (for debugging).
-    unsigned        line;               ///< Current line no. (for debugging).
-    unsigned        ct;                 ///< Number of parsed triples.
-    bool            eof;                ///< if we have reached EOF.
+    FILE          * fh;         ///< Input file handle.
+    const char    * sh;         ///< Input string. Exclusive with fh.
+    size_t          buf_size;   ///< Initial allocation for buffer.
+    YYCTYPE       * buf,        ///< Start of buffer.
+                  * lim,        ///< Position after the last available
+                                ///<   input character (YYLIMIT).
+                  * cur,        ///< Next input character to be read (YYCURSOR)
+                  * mar,        ///< Most recent match (YYMARKER)
+                  * tok,        ///< Start of current token.
+                  * bol;        ///< Address of the beginning of the
+                                ///<   current line (for debugging).
+    unsigned        line;       ///< Current line no. (for debugging).
+    unsigned        ct;         ///< Number of statements parsed.
+    bool            eof;        ///< if we have reached EOF.
     /*!stags:re2c format = "YYCTYPE *@@;"; */
 } ParseIterator;
 
 
-static int fill(ParseIterator *it)
-{
-    if (it->eof) {
-        return 1;
-    }
-    const size_t shift = it->tok - it->buf;
-    if (shift < 1) {
-        return 2;
-    }
-    LOG_DEBUG("Shifting bytes: %lu", shift);
-    memmove (it->buf, it->tok, it->lim - it->tok);
-    it->lim -= shift;
-    it->cur -= shift;
-    it->mar -= shift;
-    it->tok -= shift;
-    if (it->fh) it->lim += fread (it->lim, 1, shift, it->fh);
-    // With a string handle, assume the whole input fits in CHUNK_SIZE.
-    else it->lim = memcpy (it->lim, it->sh, sizeof(it->buf));
-    /*!stags:re2c format = "if (it->@@) it->@@ -= shift; "; */
-    it->lim[0] = 0;
-    it->eof |= it->lim < it->buf + CHUNK_SIZE - 1;
-    return 0;
-}
+static int fill(ParseIterator *it);
 
 
 /** @brief Initialize parser.
@@ -62,20 +54,83 @@ static int fill(ParseIterator *it)
  *
  * @param[in] sh String to read from. This is exclusive with fh.
  */
-static void parse_init(ParseIterator *it, FILE *fh, const char *sh)
+static void parse_init (ParseIterator *it, FILE *fh, const char *sh)
 {
-    it->fh = fh;
-    it->sh = sh;
-    it->cur = it->mar = it->tok = it->lim = it->buf + CHUNK_SIZE - 1;
+    if(fh) {
+        // Stream handling. It engages YYFILL and reads by chunks.
+        it->fh = fh;
+        it->sh = NULL;
+        it->buf_size = CHUNK_SIZE;
+        it->buf = malloc(it->buf_size);
+        if (!it->buf) log_error ("Error allocating lexer buffer.");
+        it->cur = it->mar = it->tok = it->lim = it->buf + it->buf_size - 1;
+        it->bol = it->buf;
+        it->eof = false;
+        it->lim[0] = 0;
+    } else {
+        // String handling. Uses the provided string as the buffer.
+        it->fh = NULL;
+        it->sh = sh;
+        it->buf_size = strlen(sh) + 1;
+        it->buf = NULL;
+        it->cur = it->tok = (YYCTYPE*)it->sh;
+        it->lim = it->mar = it->cur + it->buf_size - 1;
+        it->bol = it->cur;
+        it->eof = true;
+    }
     it->line = 1;
-    it->bol = it->buf;
     it->ct = 0;
-    it->eof = 0;
     /*!stags:re2c format = "it->@@ = NULL; "; */
-    fill (it);
 }
 
 
+int
+fill(ParseIterator *it)
+{
+    log_debug ("Filling codec buffer @ %p.", it->buf);
+    if (it->eof) return 1;
+
+    size_t shift = it->tok - it->buf;
+    size_t used = it->lim - it->tok;
+
+    // If buffer is too small for the lexeme, double the capacity.
+    if (shift < 1) {
+        YYCTYPE *old_buf = it->buf;
+        shift += it->buf_size;
+        it->buf_size *= 2;
+        LOG_DEBUG ("Reallocating buffer to %zu bytes.", it->buf_size);
+        it->buf = realloc (it->buf, it->buf_size);
+        if (!it->buf) {
+            log_error ("Memory allocation error.");
+            return -1;
+        }
+        // Move all relative points if address changed.
+        size_t reloc_off = it->buf - old_buf;
+        it->cur += reloc_off;
+        it->tok += reloc_off;
+        it->lim += reloc_off;
+        it->mar += reloc_off;
+    } else {
+        LOG_DEBUG("Shifting bytes: %zu", shift);
+        memmove (it->buf, it->tok, used);
+        LOG_TRACE ("Limit offset before reading data: %zu", it->lim - it->tok);
+        it->lim -= shift;
+        it->cur -= shift;
+        it->mar -= shift;
+        it->tok -= shift;
+    }
+    it->lim += fread (it->lim, 1, it->buf_size - used - 1, it->fh);
+    /*!stags:re2c format = "if (it->@@) it->@@ -= shift; "; */
+    LOG_TRACE ("Cursor offset from last token: %zu", it->cur - it->tok);
+    LOG_TRACE ("Limit offset from last token: %zu", it->lim - it->tok);
+    it->lim[0] = 0;
+    it->eof = it->lim < it->buf + it->buf_size - 1;
+    return 0;
+}
+
+/** END duplicate section */
+
+
 // Parser interface. Required here to silence linters.
 void *NTParseAlloc();
 void NTParse();
@@ -150,7 +205,7 @@ loop:
         *term = VOLK_iriref_new ((char*)data);
         free (data);
 
-        if (!UNLIKELY (term)) return -1;
+        if (UNLIKELY (!term)) return -1;
         return T_IRIREF;
     }
 
@@ -191,7 +246,7 @@ loop:
         free (data);
         free (metadata);
 
-        if (!UNLIKELY (term)) return -1;
+        if (UNLIKELY (!term)) return -1;
         return T_LITERAL;
     }
 
@@ -203,7 +258,7 @@ loop:
         *term = VOLK_term_new (VOLK_TERM_BNODE, (char*)data, NULL);
         free (data);
 
-        if (!UNLIKELY (term)) return -1;
+        if (UNLIKELY (!term)) return -1;
         return T_BNODE;
     }
 
@@ -232,8 +287,8 @@ loop:
     }
 
     * {
-        LOG_DEBUG(
-            "Invalid token @ %lu: %s (\\x%x)",
+        log_error (
+            "Invalid token @ %p: %s (\\x%x)",
             YYCURSOR - it->buf - 1, it->tok, *it->tok);
 
         return -1;
@@ -251,6 +306,8 @@ VOLK_nt_parse_term (const char *rep, VOLK_Term **term)
 
     int ttype = lex (&it, term);
 
+    free (it.buf);
+
     switch (ttype) {
         case T_IRIREF:
         case T_LITERAL:
@@ -268,6 +325,11 @@ VOLK_nt_parse_doc (
     *err_p = NULL;
     *gr_p = NULL;
 
+    if (!fh && !sh) {
+        log_error ("Neither file handle nor string input provided.");
+        return VOLK_VALUE_ERR;
+    }
+
     ParseIterator parse_it;
     parse_init (&parse_it, fh, sh);
 
@@ -332,6 +394,8 @@ finally: ;
     NTParse (parser, 0, NULL, it);
     NTParseFree (parser, free);
 
+    free (parse_it.buf);
+
     VOLK_graph_add_done (it);
     VOLK_term_free (term);
 

+ 126 - 64
src/codec/lexer_ttl.re

@@ -1,6 +1,12 @@
 #include "volksdata/codec/parser_ttl.h"
 #include "volksdata/codec/tokens_ttl.h"
+//#include "volksdata/codec/parser_common.h"
 
+/** BEGIN duplicate section
+ * This section is bit-by-bit identical in NT and TTL lexers. The copy in
+ * include/volksdata/codec/parser_common.h should be used, but some re2c tags
+ * are not being parsed in that location.
+ */
 
 /** @brief TTL is UTF-8 encoded.
  *
@@ -17,73 +23,117 @@
 
 
 typedef struct {
-    FILE          * fh;                 ///< Input file handle.
-    const char    * sh;                 ///< Input string. Exclusive with fh.
-    YYCTYPE         buf[CHUNK_SIZE],    ///< Start of buffer.
-                  * lim,                ///< Position after the last available
-                                        ///<   input character (YYLIMIT).
-                  * cur,                ///< Next input character to be read
-                                        ///<   (YYCURSOR)
-                  * mar,                ///< Most recent match (YYMARKER)
-                  * tok,                ///< Start of current token.
-                  * bol;                ///< Address of the beginning of the
-                                        ///<   current line (for debugging).
-    unsigned        line;               ///< Current line no. (for debugging).
-    unsigned        stmt;               ///< Current statement.
-    bool            eof;                ///< if we have reached EOF.
+    FILE          * fh;         ///< Input file handle.
+    const char    * sh;         ///< Input string. Exclusive with fh.
+    size_t          buf_size;   ///< Initial allocation for buffer.
+    YYCTYPE       * buf,        ///< Start of buffer.
+                  * lim,        ///< Position after the last available
+                                ///<   input character (YYLIMIT).
+                  * cur,        ///< Next input character to be read (YYCURSOR)
+                  * mar,        ///< Most recent match (YYMARKER)
+                  * tok,        ///< Start of current token.
+                  * bol;        ///< Address of the beginning of the
+                                ///<   current line (for debugging).
+    unsigned        line;       ///< Current line no. (for debugging).
+    unsigned        ct;         ///< Number of statements parsed.
+    bool            eof;        ///< if we have reached EOF.
     /*!stags:re2c format = "YYCTYPE *@@;"; */
 } ParseIterator;
 
-typedef struct {
-    YYCTYPE *       data;
-    size_t          size;
-} ParserToken;
+
+static int fill(ParseIterator *it);
 
 
-static int fill (ParseIterator *it)
+/** @brief Initialize parser.
+ *
+ * @param[in] it iterator handle to be initialized.
+ *
+ * @param[in] fh Open file handle to read from. This is exclusive with sh. If
+ *  both fh and sh are provided, fh has precedence.
+ *
+ * @param[in] sh String to read from. This is exclusive with fh.
+ */
+static void parse_init (ParseIterator *it, FILE *fh, const char *sh)
 {
-    if (it->eof) {
-        return 1;
+    if(fh) {
+        // Stream handling. It engages YYFILL and reads by chunks.
+        it->fh = fh;
+        it->sh = NULL;
+        it->buf_size = CHUNK_SIZE;
+        it->buf = malloc(it->buf_size);
+        if (!it->buf) log_error ("Error allocating lexer buffer.");
+        it->cur = it->mar = it->tok = it->lim = it->buf + it->buf_size - 1;
+        it->bol = it->buf;
+        it->eof = false;
+        it->lim[0] = 0;
+    } else {
+        // String handling. Uses the provided string as the buffer.
+        it->fh = NULL;
+        it->sh = sh;
+        it->buf_size = strlen(sh) + 1;
+        it->buf = NULL;
+        it->cur = it->tok = (YYCTYPE*)it->sh;
+        it->lim = it->mar = it->cur + it->buf_size - 1;
+        it->bol = it->cur;
+        it->eof = true;
     }
-    const size_t shift = it->tok - it->buf;
+    it->line = 1;
+    it->ct = 0;
+    /*!stags:re2c format = "it->@@ = NULL; "; */
+}
+
+
+int
+fill(ParseIterator *it)
+{
+    log_debug ("Filling codec buffer @ %p.", it->buf);
+    if (it->eof) return 1;
+
+    size_t shift = it->tok - it->buf;
+    size_t used = it->lim - it->tok;
+
+    // If buffer is too small for the lexeme, double the capacity.
     if (shift < 1) {
-        return 2;
+        YYCTYPE *old_buf = it->buf;
+        shift += it->buf_size;
+        it->buf_size *= 2;
+        LOG_DEBUG ("Reallocating buffer to %zu bytes.", it->buf_size);
+        it->buf = realloc (it->buf, it->buf_size);
+        if (!it->buf) {
+            log_error ("Memory allocation error.");
+            return -1;
+        }
+        // Move all relative points if address changed.
+        size_t reloc_off = it->buf - old_buf;
+        it->cur += reloc_off;
+        it->tok += reloc_off;
+        it->lim += reloc_off;
+        it->mar += reloc_off;
+    } else {
+        LOG_DEBUG("Shifting bytes: %zu", shift);
+        memmove (it->buf, it->tok, used);
+        LOG_TRACE ("Limit offset before reading data: %zu", it->lim - it->tok);
+        it->lim -= shift;
+        it->cur -= shift;
+        it->mar -= shift;
+        it->tok -= shift;
     }
-    LOG_TRACE("Shifting bytes: %lu", shift);
-    memmove(it->buf, it->tok, it->lim - it->tok);
-    it->lim -= shift;
-    it->cur -= shift;
-    it->mar -= shift;
-    it->tok -= shift;
-    if (it->fh) it->lim += fread (it->lim, 1, shift, it->fh);
-    // With a string handle, assume the whole input fits in CHUNK_SIZE.
-    else it->lim = memcpy (it->lim, it->sh, sizeof(it->buf));
+    it->lim += fread (it->lim, 1, it->buf_size - used - 1, it->fh);
     /*!stags:re2c format = "if (it->@@) it->@@ -= shift; "; */
+    LOG_TRACE ("Cursor offset from last token: %zu", it->cur - it->tok);
+    LOG_TRACE ("Limit offset from last token: %zu", it->lim - it->tok);
     it->lim[0] = 0;
-    it->eof |= it->lim < it->buf + CHUNK_SIZE - 1;
+    it->eof = it->lim < it->buf + it->buf_size - 1;
     return 0;
 }
 
-
-static void parse_init (ParseIterator *it, FILE *fh, const char *sh)
-{
-    it->fh = fh;
-    it->sh = sh;
-    it->cur = it->mar = it->tok = it->lim = it->buf + CHUNK_SIZE - 1;
-    it->line = 1;
-    it->stmt = 1;
-    it->bol = it->buf;
-    it->eof = 0;
-    /*!stags:re2c format = "it->@@ = NULL; "; */
-    fill (it);
-}
+/** END duplicate section */
 
 
-static inline void newline (ParseIterator *it) {
-    it->line ++;
-    it->bol = YYCURSOR;
-    LOG_TRACE("New line: #%u.", it->line);
-}
+typedef struct {
+    YYCTYPE *       data;
+    size_t          size;
+} ParserToken;
 
 
 // Parser interface. Required here to silence linters.
@@ -94,6 +144,13 @@ void TTLParseFree();
 void TTLParseTrace();
 #endif
 
+
+static inline void newline (ParseIterator *it) {
+    it->line ++;
+    it->bol = YYCURSOR;
+    LOG_TRACE("New line: #%u.", it->line);
+}
+
 // Lexer.
 
 static int lex (ParseIterator *it, YYCTYPE **token_p)
@@ -102,6 +159,7 @@ static int lex (ParseIterator *it, YYCTYPE **token_p)
 
     /*!re2c
 
+    re2c:yyfill:enable = 1;
     re2c:eof = 0;
     re2c:flags:8 = 1;
     re2c:flags:tags = 1;
@@ -271,27 +329,29 @@ loop: // Start new token.
         return T_DECIMAL;
     }
 
-    '(' WS? { return T_LPAREN; }
+    '(' WS* { return T_LPAREN; }
 
-    WS? ')' { return T_RPAREN; }
+    WS* ')' { return T_RPAREN; }
 
-    '[' WS? { return T_LBRACKET; }
+    '[' WS* ']' { return T_ANON; }
 
-    WS? ']' { return T_RBRACKET; }
+    '[' WS* { return T_LBRACKET; }
+
+    WS* ']' { return T_RBRACKET; }
 
     ':' { return T_COLON; }
 
-    WS? ';' WS? {
+    WS* ';' WS* {
         LOG_TRACE("End of object list.");
 
         return T_SEMICOLON;
     }
 
-    WS? ',' WS? { return T_COMMA; }
+    WS* ',' WS* { return T_COMMA; }
 
-    WS? '.' {
-        LOG_TRACE("End of statement #%u.", it->stmt);
-        it->stmt++;
+    WS* '.' {
+        LOG_TRACE("End of statement #%u.", it->ct);
+        it->ct++;
         return T_PERIOD;
     }
 
@@ -369,12 +429,13 @@ VOLK_ttl_parse_doc (
     *err_p = NULL;
     *gr_p = NULL;
 
-    if (!fh) return VOLK_VALUE_ERR;
+    if (!fh && !sh) {
+        log_error ("Neither file handle nor string input provided.");
+        return VOLK_VALUE_ERR;
+    }
 
-    VOLK_TTLParserState *state = malloc (sizeof (*state));
+    VOLK_TTLParserState *state = calloc (1, sizeof (*state));
     if (UNLIKELY (!state)) return VOLK_MEM_ERR;
-    state->base = NULL;
-    state->ct = 0;
     state->rc = VOLK_NORESULT;
 
     ParseIterator parse_it;
@@ -439,6 +500,7 @@ finally: ;
     VOLK_rc rc = state->rc;
     LOG_TRACE("rc is %d", rc);
 
+    free (parse_it.buf);
     TTLParseFree (parser, free);
 
     VOLK_graph_add_done (state->it);

+ 142 - 78
src/codec/parser_nt.c

@@ -1,9 +1,23 @@
-/* Generated by re2c 4.1 on Wed Aug 20 17:28:48 2025 */
+/* Generated by re2c 4.1 on Fri Aug 22 19:43:48 2025 */
 #line 1 "lexer_nt.re"
 #include "volksdata/codec/parser_nt.h"
 #include "volksdata/codec/tokens_nt.h"
+//#include "volksdata/codec/parser_common.h"
 
 
+/** BEGIN duplicate section
+ * This section is bit-by-bit identical in NT and TTL lexers. The copy in
+ * include/volksdata/codec/parser_common.h should be used, but some re2c tags
+ * are not being parsed in that location.
+ */
+
+/** @brief TTL is UTF-8 encoded.
+ *
+ * @sa https://www.w3.org/TeamSubmission/turtle/#sec-grammar
+ *
+ * `char` should be considered to be UTF-8 throughout this library, however,
+ * setting YYCTYPE to char generates case labels outside of the char range.
+ */
 #define YYCTYPE     uint8_t
 #define YYCURSOR    it->cur
 #define YYMARKER    it->mar
@@ -12,55 +26,29 @@
 
 
 typedef struct {
-    FILE           *fh;                 ///< Input file handle.
-    const char     *sh;                 ///< Input string. Exclusive with fh.
-    YYCTYPE         buf[CHUNK_SIZE],    ///< Start of buffer.
-                   *lim,                ///< Position after the last available
-                                        ///<   input character (YYLIMIT).
-                   *cur,                ///< Next input character to be read
-                                        ///<   (YYCURSOR)
-                   *mar,                ///< Most recent match (YYMARKER)
-                   *tok,                ///< Start of current token.
-                   *bol;                ///< Address of the beginning of the
-                                        ///<   current line (for debugging).
-    unsigned        line;               ///< Current line no. (for debugging).
-    unsigned        ct;                 ///< Number of parsed triples.
-    bool            eof;                ///< if we have reached EOF.
+    FILE          * fh;         ///< Input file handle.
+    const char    * sh;         ///< Input string. Exclusive with fh.
+    size_t          buf_size;   ///< Initial allocation for buffer.
+    YYCTYPE       * buf,        ///< Start of buffer.
+                  * lim,        ///< Position after the last available
+                                ///<   input character (YYLIMIT).
+                  * cur,        ///< Next input character to be read (YYCURSOR)
+                  * mar,        ///< Most recent match (YYMARKER)
+                  * tok,        ///< Start of current token.
+                  * bol;        ///< Address of the beginning of the
+                                ///<   current line (for debugging).
+    unsigned        line;       ///< Current line no. (for debugging).
+    unsigned        ct;         ///< Number of statements parsed.
+    bool            eof;        ///< if we have reached EOF.
     
-#line 31 "parser_nt.c"
+#line 45 "parser_nt.c"
 YYCTYPE *yyt1;YYCTYPE *yyt2;YYCTYPE *yyt3;
-#line 27 "lexer_nt.re"
+#line 41 "lexer_nt.re"
 
 } ParseIterator;
 
 
-static int fill(ParseIterator *it)
-{
-    if (it->eof) {
-        return 1;
-    }
-    const size_t shift = it->tok - it->buf;
-    if (shift < 1) {
-        return 2;
-    }
-    LOG_DEBUG("Shifting bytes: %lu", shift);
-    memmove (it->buf, it->tok, it->lim - it->tok);
-    it->lim -= shift;
-    it->cur -= shift;
-    it->mar -= shift;
-    it->tok -= shift;
-    if (it->fh) it->lim += fread (it->lim, 1, shift, it->fh);
-    // With a string handle, assume the whole input fits in CHUNK_SIZE.
-    else it->lim = memcpy (it->lim, it->sh, sizeof(it->buf));
-    
-#line 57 "parser_nt.c"
-if (it->yyt1) it->yyt1 -= shift; if (it->yyt2) it->yyt2 -= shift; if (it->yyt3) it->yyt3 -= shift; 
-#line 49 "lexer_nt.re"
-
-    it->lim[0] = 0;
-    it->eof |= it->lim < it->buf + CHUNK_SIZE - 1;
-    return 0;
-}
+static int fill(ParseIterator *it);
 
 
 /** @brief Initialize parser.
@@ -72,24 +60,91 @@ if (it->yyt1) it->yyt1 -= shift; if (it->yyt2) it->yyt2 -= shift; if (it->yyt3)
  *
  * @param[in] sh String to read from. This is exclusive with fh.
  */
-static void parse_init(ParseIterator *it, FILE *fh, const char *sh)
+static void parse_init (ParseIterator *it, FILE *fh, const char *sh)
 {
-    it->fh = fh;
-    it->sh = sh;
-    it->cur = it->mar = it->tok = it->lim = it->buf + CHUNK_SIZE - 1;
+    if(fh) {
+        // Stream handling. It engages YYFILL and reads by chunks.
+        it->fh = fh;
+        it->sh = NULL;
+        it->buf_size = CHUNK_SIZE;
+        it->buf = malloc(it->buf_size);
+        if (!it->buf) log_error ("Error allocating lexer buffer.");
+        it->cur = it->mar = it->tok = it->lim = it->buf + it->buf_size - 1;
+        it->bol = it->buf;
+        it->eof = false;
+        it->lim[0] = 0;
+    } else {
+        // String handling. Uses the provided string as the buffer.
+        it->fh = NULL;
+        it->sh = sh;
+        it->buf_size = strlen(sh) + 1;
+        it->buf = NULL;
+        it->cur = it->tok = (YYCTYPE*)it->sh;
+        it->lim = it->mar = it->cur + it->buf_size - 1;
+        it->bol = it->cur;
+        it->eof = true;
+    }
     it->line = 1;
-    it->bol = it->buf;
     it->ct = 0;
-    it->eof = 0;
     
-#line 86 "parser_nt.c"
+#line 91 "parser_nt.c"
 it->yyt1 = NULL; it->yyt2 = NULL; it->yyt3 = NULL; 
-#line 74 "lexer_nt.re"
+#line 83 "lexer_nt.re"
 
-    fill (it);
 }
 
 
+int
+fill(ParseIterator *it)
+{
+    log_debug ("Filling codec buffer @ %p.", it->buf);
+    if (it->eof) return 1;
+
+    size_t shift = it->tok - it->buf;
+    size_t used = it->lim - it->tok;
+
+    // If buffer is too small for the lexeme, double the capacity.
+    if (shift < 1) {
+        YYCTYPE *old_buf = it->buf;
+        shift += it->buf_size;
+        it->buf_size *= 2;
+        LOG_DEBUG ("Reallocating buffer to %zu bytes.", it->buf_size);
+        it->buf = realloc (it->buf, it->buf_size);
+        if (!it->buf) {
+            log_error ("Memory allocation error.");
+            return -1;
+        }
+        // Move all relative points if address changed.
+        size_t reloc_off = it->buf - old_buf;
+        it->cur += reloc_off;
+        it->tok += reloc_off;
+        it->lim += reloc_off;
+        it->mar += reloc_off;
+    } else {
+        LOG_DEBUG("Shifting bytes: %zu", shift);
+        memmove (it->buf, it->tok, used);
+        LOG_TRACE ("Limit offset before reading data: %zu", it->lim - it->tok);
+        it->lim -= shift;
+        it->cur -= shift;
+        it->mar -= shift;
+        it->tok -= shift;
+    }
+    it->lim += fread (it->lim, 1, it->buf_size - used - 1, it->fh);
+    
+#line 135 "parser_nt.c"
+if (it->yyt1) it->yyt1 -= shift; if (it->yyt2) it->yyt2 -= shift; if (it->yyt3) it->yyt3 -= shift; 
+#line 123 "lexer_nt.re"
+
+    LOG_TRACE ("Cursor offset from last token: %zu", it->cur - it->tok);
+    LOG_TRACE ("Limit offset from last token: %zu", it->lim - it->tok);
+    it->lim[0] = 0;
+    it->eof = it->lim < it->buf + it->buf_size - 1;
+    return 0;
+}
+
+/** END duplicate section */
+
+
 // Parser interface. Required here to silence linters.
 void *NTParseAlloc();
 void NTParse();
@@ -114,7 +169,7 @@ loop:
     *term = NULL;
 
     
-#line 118 "parser_nt.c"
+#line 173 "parser_nt.c"
 {
 	YYCTYPE yych;
 	unsigned int yyaccept = 0;
@@ -140,15 +195,15 @@ yyFillLabel0:
 yy1:
 	++YYCURSOR;
 yy2:
-#line 234 "lexer_nt.re"
+#line 289 "lexer_nt.re"
 	{
-        LOG_DEBUG(
-            "Invalid token @ %lu: %s (\\x%x)",
+        log_error (
+            "Invalid token @ %p: %s (\\x%x)",
             YYCURSOR - it->buf - 1, it->tok, *it->tok);
 
         return -1;
     }
-#line 152 "parser_nt.c"
+#line 207 "parser_nt.c"
 yy3:
 	++YYCURSOR;
 yyFillLabel1:
@@ -163,13 +218,13 @@ yyFillLabel1:
 			goto yy4;
 	}
 yy4:
-#line 217 "lexer_nt.re"
+#line 272 "lexer_nt.re"
 	{
         LOG_DEBUG("Separator.");
 
         return T_WS;
     }
-#line 173 "parser_nt.c"
+#line 228 "parser_nt.c"
 yy5:
 	++YYCURSOR;
 yyFillLabel2:
@@ -185,14 +240,14 @@ yyFillLabel2:
 			goto yy6;
 	}
 yy6:
-#line 133 "lexer_nt.re"
+#line 188 "lexer_nt.re"
 	{
         it->line ++;
         it->bol = YYCURSOR;
         LOG_DEBUG("New line: #%u.", it->line);
         return T_EOL;
     }
-#line 196 "parser_nt.c"
+#line 251 "parser_nt.c"
 yy7:
 	yyaccept = 0;
 	YYMARKER = ++YYCURSOR;
@@ -234,7 +289,7 @@ yyFillLabel4:
 		default: goto yy9;
 	}
 yy9:
-#line 223 "lexer_nt.re"
+#line 278 "lexer_nt.re"
 	{
         size_t size = YYCURSOR - it->tok + 1;
         YYCTYPE *data = malloc (size);
@@ -245,17 +300,17 @@ yy9:
 
         goto loop;
     }
-#line 249 "parser_nt.c"
+#line 304 "parser_nt.c"
 yy10:
 	++YYCURSOR;
-#line 210 "lexer_nt.re"
+#line 265 "lexer_nt.re"
 	{
         LOG_DEBUG("End of triple.");
         it->ct ++;
 
         return T_DOT;
     }
-#line 259 "parser_nt.c"
+#line 314 "parser_nt.c"
 yy11:
 	yyaccept = 0;
 	YYMARKER = ++YYCURSOR;
@@ -364,7 +419,7 @@ yy17:
 	lit_data_e = it->yyt1;
 	dtype_s = it->yyt2;
 	lang_s = it->yyt3;
-#line 157 "lexer_nt.re"
+#line 212 "lexer_nt.re"
 	{
         // Only unescape Unicode from data.
         size_t size = lit_data_e - it->tok - 2;
@@ -402,10 +457,10 @@ yy17:
         free (data);
         free (metadata);
 
-        if (!UNLIKELY (term)) return -1;
+        if (UNLIKELY (!term)) return -1;
         return T_LITERAL;
     }
-#line 409 "parser_nt.c"
+#line 464 "parser_nt.c"
 yy18:
 	++YYCURSOR;
 yyFillLabel9:
@@ -601,7 +656,7 @@ yy32:
 	}
 yy33:
 	++YYCURSOR;
-#line 145 "lexer_nt.re"
+#line 200 "lexer_nt.re"
 	{
         YYCTYPE *data = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
 
@@ -610,10 +665,10 @@ yy33:
         *term = VOLK_iriref_new ((char*)data);
         free (data);
 
-        if (!UNLIKELY (term)) return -1;
+        if (UNLIKELY (!term)) return -1;
         return T_IRIREF;
     }
-#line 617 "parser_nt.c"
+#line 672 "parser_nt.c"
 yy34:
 	++YYCURSOR;
 yyFillLabel23:
@@ -856,7 +911,7 @@ yy50:
 			goto yy51;
 	}
 yy51:
-#line 198 "lexer_nt.re"
+#line 253 "lexer_nt.re"
 	{
         YYCTYPE *data = unescape_unicode (it->tok + 2, YYCURSOR - it->tok - 2);
 
@@ -865,10 +920,10 @@ yy51:
         *term = VOLK_term_new (VOLK_TERM_BNODE, (char*)data, NULL);
         free (data);
 
-        if (!UNLIKELY (term)) return -1;
+        if (UNLIKELY (!term)) return -1;
         return T_BNODE;
     }
-#line 872 "parser_nt.c"
+#line 927 "parser_nt.c"
 yy52:
 	++YYCURSOR;
 yyFillLabel39:
@@ -1670,14 +1725,14 @@ yyFillLabel95:
 			goto yy15;
 	}
 yy110:
-#line 140 "lexer_nt.re"
+#line 195 "lexer_nt.re"
 	{
         LOG_DEBUG("End of buffer.");
         return T_EOF;
     }
-#line 1679 "parser_nt.c"
+#line 1734 "parser_nt.c"
 }
-#line 242 "lexer_nt.re"
+#line 297 "lexer_nt.re"
 
 }
 
@@ -1690,6 +1745,8 @@ VOLK_nt_parse_term (const char *rep, VOLK_Term **term)
 
     int ttype = lex (&it, term);
 
+    free (it.buf);
+
     switch (ttype) {
         case T_IRIREF:
         case T_LITERAL:
@@ -1707,6 +1764,11 @@ VOLK_nt_parse_doc (
     *err_p = NULL;
     *gr_p = NULL;
 
+    if (!fh && !sh) {
+        log_error ("Neither file handle nor string input provided.");
+        return VOLK_VALUE_ERR;
+    }
+
     ParseIterator parse_it;
     parse_init (&parse_it, fh, sh);
 
@@ -1771,6 +1833,8 @@ finally: ;
     NTParse (parser, 0, NULL, it);
     NTParseFree (parser, free);
 
+    free (parse_it.buf);
+
     VOLK_graph_add_done (it);
     VOLK_term_free (term);
 

文件差异内容过多而无法显示
+ 257 - 203
src/codec/parser_ttl.c


+ 14 - 10
src/core.c

@@ -88,27 +88,31 @@ finally:
 }
 
 
-char *strndup (const char *src, size_t max)
+char *
+strndup (const char *src, size_t max)
 {
     size_t len = strlen (src);
     if (len > max) len = max;
 
-    char *res = (char*)malloc (len + 1);
-    if (res) {
-        memcpy (res, src, len);
-        res[len] = '\0';
+    char *dup;
+    dup = malloc (len + 1);
+    if (dup) {
+        memcpy (dup, src, len);
+        dup[len] = '\0';
     }
 
-    return res;
+    return dup;
 }
 
 
-char *strdup (const char *src)
+char *
+strdup (const char *src)
 {
-   char *res = (char*)malloc (strlen (src) + 1);
-   if (res) strcpy(res, src);
+   char *dup;
+   dup = malloc (strlen (src) + 1);
+   if (dup) strcpy(dup, src);
 
-   return res;
+   return dup;
 }
 
 

+ 1 - 1
src/term.c

@@ -799,7 +799,7 @@ term_init (
                 log_warn (
                         "Characters %s are not valid in a URI. Got: %s\n",
                         invalid_uri_chars, fquri);
-#if 1
+#if 0
                 // TODO This causes W3C TTL test #29 to fail. Remove?
                 return VOLK_VALUE_ERR;
 #endif

+ 67 - 0
test/test_codec_nt.c

@@ -1,3 +1,5 @@
+#include <unistd.h>
+
 #include "volksdata/codec/codec_nt.h"
 #include "test.h"
 
@@ -247,6 +249,69 @@ test_decode_nt_graph()
 }
 
 
+int
+test_decode_nt_file()
+{
+    VOLK_Graph *gr;
+    size_t ct;
+    char *err;
+    FILE *fh = fopen ("test/assets/test2.nt", "r");
+
+    EXPECT_PASS (codec.decode_graph (fh, NULL, &gr, &ct, &err));
+
+    EXPECT_INT_EQ (VOLK_graph_size (gr), 7);
+    EXPECT_INT_EQ (ct, 8);
+
+    VOLK_graph_free (gr);
+    fclose(fh);
+
+    return 0;
+}
+
+
+#define LARGE_LIT_SIZE CHUNK_SIZE * 2 + 2  // More than 2 buffer pages.
+int
+test_decode_large_lit_file()
+{
+    VOLK_Graph *gr;
+    size_t ct;
+    char *err;
+    const char *fpath = "/tmp/test_large_lit.nt";
+    FILE *fh = fopen (fpath, "w");
+
+    char *large_lit = malloc(LARGE_LIT_SIZE + 1);
+    for (unsigned i = 0; i < LARGE_LIT_SIZE; i++)
+        large_lit[i] = rand() % 25 + 65;  // A-Z
+    large_lit[LARGE_LIT_SIZE] = '\0';
+
+    fprintf (fh, "<urn:s:1> <urn:p:1> \"");
+    fprintf(fh, large_lit);
+    fprintf(fh, "\" .\n");
+    fclose(fh);
+
+    fh = fopen (fpath, "r");
+    EXPECT_PASS (codec.decode_graph (fh, NULL, &gr, &ct, &err));
+
+    EXPECT_INT_EQ (VOLK_graph_size (gr), 1);
+    EXPECT_INT_EQ (ct, 1);
+
+    VOLK_GraphIterator *it = VOLK_graph_lookup (gr, NULL, NULL, NULL, NULL);
+    VOLK_Triple *spo;
+    EXPECT_PASS (VOLK_graph_iter_next (it, &spo));
+    VOLK_graph_iter_free (it);
+
+    EXPECT_STR_EQ (spo->o->data, large_lit);
+
+    VOLK_graph_free (gr);
+    VOLK_triple_free (spo);
+    fclose(fh);
+    unlink (fpath);
+    free (large_lit);
+
+    return 0;
+}
+
+
 int
 test_decode_nt_bad_graph()
 {
@@ -281,6 +346,8 @@ int codec_nt_tests()
     RUN (test_encode_nt_graph);
     RUN (test_decode_nt_term);
     RUN (test_decode_nt_graph);
+    RUN (test_decode_nt_file);
+    RUN (test_decode_large_lit_file);
     RUN (test_decode_nt_bad_graph);
 
     free_terms (terms);

+ 5 - 5
test/test_codec_ttl.c

@@ -59,9 +59,9 @@ test_w3c_pos()
     char ch;
 
     for (int i = 0; i <= W3C_POS_TEST_CT; i++) {
-#if 1
-        // Tests 14÷16 with 10K triples is quite long. Skip them temporarily.
-        // TODO use a switch based on env var.
+#ifndef VOLK_TEST_LARGE
+        // Tests 14÷16 with 10K triples is quite long. Skip them unless
+        // explicitly requested.
         if (i > 12 && i <17) continue;
 #endif
         size_t nt_ct = 0;
@@ -126,8 +126,8 @@ int codec_ttl_tests()
     RUN (test_decode_nt_bad_graph);
     // TODO temporarily disabled; full W3C test suite at
     // https://w3c.github.io/rdf-tests/rdf/ shall replace these.
-    //RUN (test_w3c_pos);
-    //RUN (test_w3c_neg);
+    RUN (test_w3c_pos);
+    RUN (test_w3c_neg);
 
     free_terms(terms);
     for (int i = 0; i < TRP_CT; i++)

部分文件因为文件数量过多而无法显示