#25 Rewrite in ISO C11.

已合併
scossu 1 周之前 將 5 次代碼提交從 scossu/iso_c合併至 scossu/master

+ 1 - 1
Makefile

@@ -36,7 +36,7 @@ MASSIF_DUMP := $(TMPDIR)/volksdata_massif.out
 
 INCLUDE_BASE := . -Iinclude -Iext/hashmap -Iext/log/src
 INCLUDE := -I$(INCLUDE_BASE)
-_CFLAGS = -std=gnu11 -Wall -Wextra -fPIC $(INCLUDE)
+_CFLAGS = -std=c11 -Wall -Wextra -fPIC $(INCLUDE)
 CFLAGS = $(if $(DEBUG),\
 		 $(_CFLAGS) -Itest -O0 -ggdb -DDEBUG,\
 		 $(_CFLAGS) -O3 -g0)

+ 24 - 24
README.md

@@ -37,10 +37,6 @@ Test coverage is not sufficient. Documentation is fairly extensive but needs
 reformatting. This code is being integrated in higher-level projects and is
 being improved as issues arise.
 
-Portability is still under assessment. The goal is to make this code POSIX
-compatible. ANSI C compatibility is out of reach because of the LMDB
-dependency.
-
 This is my first stab at writing a C library (coming from Python) and an
 unpaid fun project, so don't be surprised if you find some gross stuff.
 
@@ -88,57 +84,61 @@ workable set of features as a standalone library:
 
 ## Building
 
-### Requirements
+### Required dependencies
 
-- It is recommended to build and run Volksdata on a Linux system. No other
-  OS has been tested so far.
-- A C compiler. This has been only tested with `gcc` so far.
+- A C compiler and standard library. This has been only tested with GCC and
+  GNU libc so far. Testing with musl libc is planned.
 - [LMDB](https://symas.com/lmdb/) libraries and headers.
 - [XXHash](https://github.com/Cyan4973/xxHash) >=0.8 libraries and headers.
 
 ### Optional dependencies
 - [re2c](https://re2c.org/) to build the RDF language lexers. Only required if
-  the codecs are changed. Otherwise, compiled lexers are included in this git
+  the lexers are changed. Otherwise, compiled lexers are included in this git
   repo.
 - [cinclude2dot](https://www.flourish.org/cinclude2dot) and
   [Graphviz](https://graphviz.org/) for generating visual dependency graph.
 
+### Conformance
+
+Volksdata is written in ISO C11. Note that LMDB uses by default POSIX-1.2011
+extensions, and may have to be compiled separately in non-POSIX systems.
 
 ### `make` commands
 
 The default `make` command (`make lib`) compiles the library. Enter `make help`
 to get an overview of the other available commands.
 
-`make install` and installs libraries and headers in the
-directories set by the environment variable `$PREFIX`. If this is unset, the
-default `/usr/local` prefix is used.
+`make install` installs libraries and headers in the directories set by the
+environment variable `$PREFIX`. If this is unset, the default `/usr/local`
+prefix is used.
 
-If `LOCAL` is set to anything else than `0`, the library will be installed in
-`$LOCAL_PREFIX` instead (by default,`$HOME/.local`.
+If `LOCAL` is set to `1`, the library will be installed in `$LOCAL_PREFIX`
+instead (by default,`$HOME/.local`).
 
-If `DEBUG` is set to anything else than `0`, the library will be compiled with
-debug symbols.
+If `DEBUG` is set to `1`, the library will be compiled with debug symbols.
 
 E.g.
 
 ```
+make DEBUG=1
 make install DEBUG=1 LOCAL=1
 ```
 
-Installs the library with debug symbols in `~/.local`.
+Builds and installs the library with debug symbols in `~/.local`.
 
+`make test` and similar always force `DEBUG=1`.
 
 ### Compile-Time defines (`-D[...]`)
 
-`DEBUG`: Set debug mode: memory map is at reduced size, logging is forced to
-TRACE level, etc.
-
 `VOLK_RDF_STREAM_CHUNK_SIZE`: Size of RDF decoding buffer, i.e., maximum size
 of a chunk of RDF data fed to the parser when decoding a RDF file into a graph.
-This should be larger than the maximum expected size of a single term in your
-RDF source. The default value is 8192, which is mildly conservative. If you
-experience parsing errors on decoding, and they happen to be on a term such a
-very long string literal, try recompiling the library with a larger value.
+This is set to 4Kb by default and is heap-allocated, so that if a RDF term
+is larger than this size, it is automatically increased for the duration of the
+decoding session. Systems with more memory may benefit from a larger buffer.
+
+`VOLK_TEST_LARGE`: Used with `make test`, `make memtest`, etc. to thoroughly
+test codecs on very large triple sets. Normally these tests are skipped to
+speed up development when no codec changes are involved.
 
 ## Embedding & linking
 

+ 2 - 2
include/volksdata/codec.h

@@ -13,7 +13,7 @@
 #ifdef VOLK_RDF_STREAM_CHUNK_SIZE
 #define CHUNK_SIZE VOLK_RDF_STREAM_CHUNK_SIZE
 #else
-#define CHUNK_SIZE 8192
+#define CHUNK_SIZE 4096
 #endif
 
 
@@ -257,7 +257,7 @@ typedef VOLK_rc (*term_decode_fn_t)(const char *rep, VOLK_Term **term);
  *  on parsing error.
  */
 typedef VOLK_rc (*gr_decode_fn_t)(
-        FILE *rep, VOLK_Graph **gr, size_t *ct, char **err);
+        FILE *fh, const char *sh, VOLK_Graph **gr, size_t *ct, char **err);
 
 
 /** @brief Codec structure.

+ 127 - 0
include/volksdata/codec/parser_common.h

@@ -0,0 +1,127 @@
+#ifndef _VOLK_PARSER_COMMON_H
+#define _VOLK_PARSER_COMMON_H
+
+#include "volksdata/codec.h"
+
+
+/** @brief TTL is UTF-8 encoded.
+ *
+ * @sa https://www.w3.org/TeamSubmission/turtle/#sec-grammar
+ *
+ * `char` should be considered to be UTF-8 throughout this library, however,
+ * setting YYCTYPE to char generates case labels outside of the char range.
+ */
+#define YYCTYPE     uint8_t
+#define YYCURSOR    it->cur
+#define YYMARKER    it->mar
+#define YYLIMIT     it->lim
+#define YYFILL      fill(it) == 0
+
+
+typedef struct {
+    FILE          * fh;         ///< Input file handle.
+    const char    * sh;         ///< Input string. Exclusive with fh.
+    size_t          buf_size;   ///< Initial allocation for buffer.
+    YYCTYPE       * buf,        ///< Start of buffer.
+                  * lim,        ///< Position after the last available
+                                ///<   input character (YYLIMIT).
+                  * cur,        ///< Next input character to be read (YYCURSOR)
+                  * mar,        ///< Most recent match (YYMARKER)
+                  * tok,        ///< Start of current token.
+                  * bol;        ///< Address of the beginning of the
+                                ///<   current line (for debugging).
+    unsigned        line;       ///< Current line no. (for debugging).
+    unsigned        ct;         ///< Number of statements parsed.
+    bool            eof;        ///< if we have reached EOF.
+    /*!stags:re2c format = "YYCTYPE *@@;"; */
+} ParseIterator;
+
+
+static int fill(ParseIterator *it);
+
+
+/** @brief Initialize parser.
+ *
+ * @param[in] it iterator handle to be initialized.
+ *
+ * @param[in] fh Open file handle to read from. This is exclusive with sh. If
+ *  both fh and sh are provided, fh has precedence.
+ *
+ * @param[in] sh String to read from. This is exclusive with fh.
+ */
+static void parse_init (ParseIterator *it, FILE *fh, const char *sh)
+{
+    if(fh) {
+        // Stream handling. It engages YYFILL and reads by chunks.
+        it->fh = fh;
+        it->sh = NULL;
+        it->buf_size = CHUNK_SIZE;
+        it->buf = malloc(it->buf_size);
+        if (!it->buf) log_error ("Error allocating lexer buffer.");
+        it->cur = it->mar = it->tok = it->lim = it->buf + it->buf_size - 1;
+        it->bol = it->buf;
+        it->eof = false;
+        it->lim[0] = 0;
+    } else {
+        // String handling. Uses the provided string as the buffer.
+        it->fh = NULL;
+        it->sh = sh;
+        it->buf_size = strlen(sh) + 1;
+        it->buf = NULL;
+        it->cur = it->tok = (YYCTYPE*)it->sh;
+        it->lim = it->mar = it->cur + it->buf_size - 1;
+        it->bol = it->cur;
+        it->eof = true;
+    }
+    it->line = 1;
+    it->ct = 0;
+    /*!stags:re2c format = "it->@@ = NULL; "; */
+}
+
+
+int
+fill(ParseIterator *it)
+{
+    log_debug ("Filling codec buffer @ %p.", it->buf);
+    if (it->eof) return 1;
+
+    size_t shift = it->tok - it->buf;
+    size_t used = it->lim - it->tok;
+
+    // If buffer is too small for the lexeme, double the capacity.
+    if (shift < 1) {
+        YYCTYPE *old_buf = it->buf;
+        shift += it->buf_size;
+        it->buf_size *= 2;
+        LOG_DEBUG ("Reallocating buffer to %zu bytes.", it->buf_size);
+        it->buf = realloc (it->buf, it->buf_size);
+        if (!it->buf) {
+            log_error ("Memory allocation error.");
+            return -1;
+        }
+        // Move all relative points if address changed.
+        size_t reloc_off = it->buf - old_buf;
+        it->cur += reloc_off;
+        it->tok += reloc_off;
+        it->lim += reloc_off;
+        it->mar += reloc_off;
+    } else {
+        LOG_DEBUG("Shifting bytes: %zu", shift);
+        memmove (it->buf, it->tok, used);
+        LOG_TRACE ("Limit offset before reading data: %zu", it->lim - it->tok);
+        it->lim -= shift;
+        it->cur -= shift;
+        it->mar -= shift;
+        it->tok -= shift;
+    }
+    it->lim += fread (it->lim, 1, it->buf_size - used - 1, it->fh);
+    /*!stags:re2c format = "if (it->@@) it->@@ -= shift; "; */
+    LOG_TRACE ("Cursor offset from last token: %zu", it->cur - it->tok);
+    LOG_TRACE ("Limit offset from last token: %zu", it->lim - it->tok);
+    it->lim[0] = 0;
+    it->eof = it->lim < it->buf + it->buf_size - 1;
+    return 0;
+}
+
+
+#endif // _VOLK_PARSER_COMMON_H

+ 2 - 1
include/volksdata/codec/parser_nt.h

@@ -33,6 +33,7 @@ VOLK_nt_parse_term (const char *rep, VOLK_Term **term);
  *  encountered. On error, `err` will contain the error message.
  */
 VOLK_rc
-VOLK_nt_parse_doc (FILE *stream, VOLK_Graph **gr, size_t *ct, char **err);
+VOLK_nt_parse_doc (
+        FILE *fh, const char *sh, VOLK_Graph **gr, size_t *ct, char **err);
 
 #endif

+ 2 - 1
include/volksdata/codec/parser_ttl.h

@@ -20,6 +20,7 @@
  *  encountered. On error, `err` will contain the error message.
  */
 VOLK_rc
-VOLK_ttl_parse_doc (FILE *stream, VOLK_Graph **gr, size_t *ct, char **err);
+VOLK_ttl_parse_doc (
+        FILE *fh, const char *sh, VOLK_Graph **gr, size_t *ct, char **err);
 
 #endif

+ 7 - 6
include/volksdata/codec/tokens_ttl.h

@@ -6,8 +6,8 @@
 #define T_DOUBLE                           6
 #define T_DECIMAL                          7
 #define T_BOOLEAN                          8
-#define T_QNAME                            9
-#define T_BNODE_ID                        10
+#define T_BNODE_ID                         9
+#define T_QNAME                           10
 #define T_IRIREF                          11
 #define T_LANGTAG                         12
 #define T_PREFIX                          13
@@ -17,7 +17,8 @@
 #define T_BASE                            17
 #define T_RDF_TYPE                        18
 #define T_DTYPE_MARKER                    19
-#define T_LBRACKET                        20
-#define T_RBRACKET                        21
-#define T_LPAREN                          22
-#define T_RPAREN                          23
+#define T_ANON                            20
+#define T_LBRACKET                        21
+#define T_RBRACKET                        22
+#define T_LPAREN                          23
+#define T_RPAREN                          24

+ 21 - 0
include/volksdata/core.h

@@ -382,6 +382,27 @@ VOLK_strerror (VOLK_rc rc);
 */
 
 
+/** @brief Replacement for GNU strndup.
+ *
+ * param[in] src String to duplicate.
+ * param[in] max Max number of characters to duplicate. The length is capped
+ *  to the smaller value between this and the source string length (characters
+ *  up to the trailing `\0`).
+ *
+ * return Duplicated string. The caller is in charge of freeing it after use.
+ */
+char *strndup (const char *src, size_t max);
+
+
+/** @brief Replacement for GNU strdup.
+ *
+ * param[in] str String to duplicate.
+ *
+ * return Duplicated string. The caller is in charge of freeing it after use.
+ */
+char *strdup (const char *src);
+
+
 /** @brief Make recursive directories.
  *
  * Modified from

+ 16 - 19
src/codec/Makefile

@@ -10,30 +10,27 @@ BUILDDIR = $(BASEDIR)/build
 
 CODEC_SRC = $(wildcard codec_*.c)
 PARSER_SRC = $(CODEC_SRC:codec_%=parser_%)
-ifneq ($(DEBUG),)
-CODEC_OBJ := $(CODEC_SRC:%.c=$(BUILDDIR)/%_dbg.o)
-else
-CODEC_OBJ := $(CODEC_SRC:%.c=$(BUILDDIR)/%.o)
-endif
-PARSER_OBJ := $(subst codec,parser,$(CODEC_OBJ))
-GRAMMAR_OBJ := $(subst codec,grammar,$(CODEC_OBJ))
+CODEC_OBJ = $(if $(DEBUG),\
+		$(CODEC_SRC:%.c=$(BUILDDIR)/%_dbg.o),\
+		$(CODEC_SRC:%.c=$(BUILDDIR)/%.o))
+
+PARSER_OBJ = $(subst codec,parser,$(CODEC_OBJ))
+GRAMMAR_OBJ = $(subst codec,grammar,$(CODEC_OBJ))
 OBJ = $(GRAMMAR_OBJ) $(PARSER_OBJ) $(CODEC_OBJ)
 
 INCLUDE := -I$(INCLUDE_DIR) -I$(BASEDIR)/ext/tpl/src -I$(BASEDIR)/ext/hashmap \
 	-I$(BASEDIR)/ext/log/src
-_CFLAGS := -std=gnu11 -Wall -fPIC $(INCLUDE)
+_CFLAGS := -std=c11 -Wall -fPIC $(INCLUDE)
 
-ifneq ($(DEBUG),)
-CFLAGS = $(_CFLAGS) -I$(BASEDIR)/test -O0 -g3 -DDEBUG
-else
-CFLAGS = $(_CFLAGS) -O3 -g0
-endif
+CFLAGS = $(if $(DEBUG),\
+		$(_CFLAGS) -I$(BASEDIR)/test -O0 -g3 -DDEBUG,\
+		$(_CFLAGS) -O3 -g0 -DNDEBUG)
 
-$(info CODEC_OBJ: $(CODEC_OBJ))
-$(info GRAMMAR_OBJ: $(GRAMMAR_OBJ))
-$(info PARSER_OBJ: $(PARSER_OBJ))
-$(info OBJ: $(OBJ))
-$(info CFLAGS: $(CFLAGS))
+#$(info CODEC_OBJ: $(CODEC_OBJ))
+#$(info GRAMMAR_OBJ: $(GRAMMAR_OBJ))
+#$(info PARSER_OBJ: $(PARSER_OBJ))
+#$(info OBJ: $(OBJ))
+#$(info CFLAGS: $(CFLAGS))
 
 .DEFAULT_GOAL := codec
 
@@ -51,7 +48,7 @@ $(BUILDDIR)/%_dbg.o: %.c
 
 # Parser C sources.
 parser_%.c: lexer_%.re grammar_%.c ../codec.c
-	$(LEXER) $< -o $@ -T --case-ranges
+	$(LEXER) $< -o $@ -T --case-ranges -W
 
 
 .PRECIOUS: grammar_%.c $(CODEC_INCLUDE_DIR)/tokens_%.h

+ 289 - 276
src/codec/grammar_ttl.c

@@ -23,8 +23,8 @@
 #define T_DOUBLE                          6
 #define T_DECIMAL                         7
 #define T_BOOLEAN                         8
-#define T_QNAME                           9
-#define T_BNODE_ID                       10
+#define T_BNODE_ID                        9
+#define T_QNAME                          10
 #define T_IRIREF                         11
 #define T_LANGTAG                        12
 #define T_PREFIX                         13
@@ -34,10 +34,11 @@
 #define T_BASE                           17
 #define T_RDF_TYPE                       18
 #define T_DTYPE_MARKER                   19
-#define T_LBRACKET                       20
-#define T_RBRACKET                       21
-#define T_LPAREN                         22
-#define T_RPAREN                         23
+#define T_ANON                           20
+#define T_LBRACKET                       21
+#define T_RBRACKET                       22
+#define T_LPAREN                         23
+#define T_RPAREN                         24
 #endif
 /**************** End token definitions ***************************************/
 
@@ -97,16 +98,16 @@
 #endif
 /************* Begin control #defines *****************************************/
 #define YYCODETYPE unsigned char
-#define YYNOCODE 41
+#define YYNOCODE 42
 #define YYACTIONTYPE unsigned char
 #define TTLParseTOKENTYPE  char * 
 typedef union {
   int yyinit;
   TTLParseTOKENTYPE yy0;
-  VOLK_LinkMap * yy2;
-  VOLK_Term * yy12;
-  char * yy33;
-  VOLK_TermSet * yy60;
+  char * yy1;
+  VOLK_TermSet * yy22;
+  VOLK_LinkMap * yy34;
+  VOLK_Term * yy50;
 } YYMINORTYPE;
 #ifndef YYSTACKDEPTH
 #define YYSTACKDEPTH 100
@@ -124,7 +125,7 @@ typedef union {
 #define YYNSTATE             28
 #define YYNRULE              41
 #define YYNRULE_WITH_ACTION  27
-#define YYNTOKEN             24
+#define YYNTOKEN             25
 #define YY_MAX_SHIFT         27
 #define YY_MIN_SHIFTREDUCE   63
 #define YY_MAX_SHIFTREDUCE   103
@@ -199,51 +200,51 @@ typedef union {
 **  yy_default[]       Default action for each state.
 **
 *********** Begin parsing tables **********************************************/
-#define YY_ACTTAB_COUNT (111)
+#define YY_ACTTAB_COUNT (115)
 static const YYACTIONTYPE yy_action[] = {
- /*     0 */   105,    1,   65,    9,   17,   77,   78,   79,   80,   89,
- /*    10 */    81,   88,   17,   77,   78,   79,   80,   89,   81,   88,
- /*    20 */     7,   14,    2,   84,   14,   89,   81,   88,    7,   26,
- /*    30 */     2,   27,   96,   23,    5,  120,    7,  102,    2,  135,
- /*    40 */   135,  135,  135,   15,  130,  130,  130,  130,  130,  115,
- /*    50 */   116,    3,  116,  131,  131,  131,  131,  131,   11,   10,
- /*    60 */    19,  114,  114,  114,  114,  114,   20,  114,  114,  114,
- /*    70 */   114,  114,  113,  113,  113,  113,  113,   89,   83,   88,
- /*    80 */    16,   13,    4,   89,   13,   88,   73,    8,   66,   82,
- /*    90 */    18,   13,   73,  102,   13,   89,   89,   88,   88,    6,
- /*   100 */    85,   75,   64,   21,   22,   73,   63,   25,   12,   24,
- /*   110 */   134,
+ /*     0 */   105,    1,   65,    9,   17,   77,   78,   79,   80,   81,
+ /*    10 */    89,   88,   17,   77,   78,   79,   80,   81,   89,   88,
+ /*    20 */    82,    7,   14,    2,   84,   14,   16,   13,   82,    7,
+ /*    30 */    13,    2,  120,   81,   89,   88,    5,   26,  102,   27,
+ /*    40 */    96,   23,   18,   13,   82,    7,   13,    2,  135,  135,
+ /*    50 */   135,  135,   15,  130,  130,  130,  130,  130,  115,  116,
+ /*    60 */     3,  116,   89,   88,    4,  131,  131,  131,  131,  131,
+ /*    70 */    11,   19,  114,  114,  114,  114,  114,   20,  114,  114,
+ /*    80 */   114,  114,  114,   66,  113,  113,  113,  113,  113,   89,
+ /*    90 */    88,  102,   89,   88,   10,   75,    8,   73,    6,   85,
+ /*   100 */    73,   64,   12,   22,   21,   63,   25,   24,  134,  106,
+ /*   110 */   106,  106,  106,  106,   83,
 };
 static const YYCODETYPE yy_lookahead[] = {
- /*     0 */    24,   25,    1,    2,    4,    5,    6,    7,    8,    9,
+ /*     0 */    25,   26,    1,    2,    4,    5,    6,    7,    8,    9,
  /*    10 */    10,   11,    4,    5,    6,    7,    8,    9,   10,   11,
- /*    20 */    20,   33,   22,   23,   36,    9,   10,   11,   20,   13,
- /*    30 */    22,   15,   16,   17,   31,   36,   20,   16,   22,   26,
- /*    40 */    27,   28,   29,   30,   35,   36,   37,   38,   39,   36,
- /*    50 */    37,   31,   39,   35,   36,   37,   38,   39,   40,    2,
- /*    60 */    34,   35,   36,   37,   38,   39,   34,   35,   36,   37,
- /*    70 */    38,   39,   35,   36,   37,   38,   39,    9,   21,   11,
- /*    80 */    32,   33,   31,    9,   36,   11,   18,   31,    1,   21,
- /*    90 */    32,   33,   18,   16,   36,    9,    9,   11,   11,    3,
- /*   100 */    23,   12,    1,   11,   16,   18,    1,   16,   19,   11,
- /*   110 */     0,   41,   41,   41,   41,   41,   41,   41,   41,   41,
- /*   120 */    41,   41,   41,   41,   41,   41,   41,   41,   41,   41,
- /*   130 */    41,   41,   41,   41,   41,
+ /*    20 */    20,   21,   34,   23,   24,   37,   33,   34,   20,   21,
+ /*    30 */    37,   23,   37,    9,   10,   11,   32,   13,   16,   15,
+ /*    40 */    16,   17,   33,   34,   20,   21,   37,   23,   27,   28,
+ /*    50 */    29,   30,   31,   36,   37,   38,   39,   40,   37,   38,
+ /*    60 */    32,   40,   10,   11,   32,   36,   37,   38,   39,   40,
+ /*    70 */    41,   35,   36,   37,   38,   39,   40,   35,   36,   37,
+ /*    80 */    38,   39,   40,    1,   36,   37,   38,   39,   40,   10,
+ /*    90 */    11,   16,   10,   11,    2,   12,   32,   18,    3,   24,
+ /*   100 */    18,    1,   19,   16,   11,    1,   16,   11,    0,   42,
+ /*   110 */    42,   42,   42,   42,   22,   42,   42,   42,   42,   42,
+ /*   120 */    42,   42,   42,   42,   42,   42,   42,   42,   42,   42,
+ /*   130 */    42,   42,   42,   42,   25,   25,   25,   25,   25,   25,
 };
 #define YY_SHIFT_COUNT    (27)
 #define YY_SHIFT_MIN      (0)
-#define YY_SHIFT_MAX      (110)
+#define YY_SHIFT_MAX      (108)
 static const unsigned char yy_shift_ofst[] = {
- /*     0 */   111,   16,    0,    8,    8,    8,    8,   68,   74,   87,
- /*    10 */    74,   77,   86,   21,   21,   21,   57,   89,    1,   96,
- /*    20 */    96,  101,   92,   88,  105,   98,   91,  110,
+ /*     0 */   115,   24,    0,    8,    8,    8,    8,   79,   79,   82,
+ /*    10 */    79,   75,   52,   22,   22,   22,   92,   83,    1,   95,
+ /*    20 */    95,  100,   93,   87,  104,   96,   90,  108,
 };
 #define YY_REDUCE_COUNT (15)
-#define YY_REDUCE_MIN   (-24)
-#define YY_REDUCE_MAX   (58)
+#define YY_REDUCE_MIN   (-25)
+#define YY_REDUCE_MAX   (64)
 static const signed char yy_reduce_ofst[] = {
- /*     0 */   -24,   13,   18,   26,   32,    9,   37,   48,   58,  -12,
- /*    10 */   -12,    3,   -1,   20,   51,   56,
+ /*     0 */   -25,   21,   29,   36,   42,   17,   48,   -7,    9,  -12,
+ /*    10 */   -12,    4,   -5,   28,   32,   64,
 };
 static const YYACTIONTYPE yy_default[] = {
  /*     0 */   136,  104,  104,  104,  104,  104,  104,  104,  104,  104,
@@ -365,8 +366,8 @@ static const char *const yyTokenName[] = {
   /*    6 */ "DOUBLE",
   /*    7 */ "DECIMAL",
   /*    8 */ "BOOLEAN",
-  /*    9 */ "QNAME",
-  /*   10 */ "BNODE_ID",
+  /*    9 */ "BNODE_ID",
+  /*   10 */ "QNAME",
   /*   11 */ "IRIREF",
   /*   12 */ "LANGTAG",
   /*   13 */ "PREFIX",
@@ -376,27 +377,28 @@ static const char *const yyTokenName[] = {
   /*   17 */ "BASE",
   /*   18 */ "RDF_TYPE",
   /*   19 */ "DTYPE_MARKER",
-  /*   20 */ "LBRACKET",
-  /*   21 */ "RBRACKET",
-  /*   22 */ "LPAREN",
-  /*   23 */ "RPAREN",
-  /*   24 */ "turtleDoc",
-  /*   25 */ "statements",
-  /*   26 */ "statement",
-  /*   27 */ "prefixID",
-  /*   28 */ "base",
-  /*   29 */ "triples",
-  /*   30 */ "subject",
-  /*   31 */ "ows",
-  /*   32 */ "predObjList",
-  /*   33 */ "predicate",
-  /*   34 */ "objectList",
-  /*   35 */ "object",
-  /*   36 */ "resource",
-  /*   37 */ "blank",
-  /*   38 */ "literal",
-  /*   39 */ "collection",
-  /*   40 */ "itemList",
+  /*   20 */ "ANON",
+  /*   21 */ "LBRACKET",
+  /*   22 */ "RBRACKET",
+  /*   23 */ "LPAREN",
+  /*   24 */ "RPAREN",
+  /*   25 */ "turtleDoc",
+  /*   26 */ "statements",
+  /*   27 */ "statement",
+  /*   28 */ "prefixID",
+  /*   29 */ "base",
+  /*   30 */ "triples",
+  /*   31 */ "subject",
+  /*   32 */ "ows",
+  /*   33 */ "predObjList",
+  /*   34 */ "predicate",
+  /*   35 */ "objectList",
+  /*   36 */ "object",
+  /*   37 */ "resource",
+  /*   38 */ "blank",
+  /*   39 */ "literal",
+  /*   40 */ "collection",
+  /*   41 */ "itemList",
 };
 #endif /* defined(YYCOVERAGE) || !defined(NDEBUG) */
 
@@ -423,7 +425,7 @@ static const char *const yyRuleName[] = {
  /*  16 */ "literal ::= DECIMAL",
  /*  17 */ "literal ::= BOOLEAN",
  /*  18 */ "blank ::= BNODE_ID",
- /*  19 */ "blank ::= LBRACKET RBRACKET",
+ /*  19 */ "blank ::= ANON",
  /*  20 */ "blank ::= LBRACKET predObjList RBRACKET",
  /*  21 */ "blank ::= LPAREN RPAREN",
  /*  22 */ "collection ::= LPAREN itemList RPAREN",
@@ -579,8 +581,8 @@ static void yy_destructor(
     case 6: /* DOUBLE */
     case 7: /* DECIMAL */
     case 8: /* BOOLEAN */
-    case 9: /* QNAME */
-    case 10: /* BNODE_ID */
+    case 9: /* BNODE_ID */
+    case 10: /* QNAME */
     case 11: /* IRIREF */
     case 12: /* LANGTAG */
     case 13: /* PREFIX */
@@ -590,42 +592,43 @@ static void yy_destructor(
     case 17: /* BASE */
     case 18: /* RDF_TYPE */
     case 19: /* DTYPE_MARKER */
-    case 20: /* LBRACKET */
-    case 21: /* RBRACKET */
-    case 22: /* LPAREN */
-    case 23: /* RPAREN */
+    case 20: /* ANON */
+    case 21: /* LBRACKET */
+    case 22: /* RBRACKET */
+    case 23: /* LPAREN */
+    case 24: /* RPAREN */
 {
 #line 36 "grammar_ttl.y"
  (void) state; free ((yypminor->yy0)); 
-#line 625 "../../build/grammar_ttl.c"
+#line 628 "../../build/grammar_ttl.c"
 }
       break;
-    case 30: /* subject */
-    case 33: /* predicate */
-    case 35: /* object */
-    case 36: /* resource */
-    case 37: /* blank */
-    case 38: /* literal */
-    case 39: /* collection */
+    case 31: /* subject */
+    case 34: /* predicate */
+    case 36: /* object */
+    case 37: /* resource */
+    case 38: /* blank */
+    case 39: /* literal */
+    case 40: /* collection */
 {
-#line 120 "grammar_ttl.y"
- VOLK_term_free ((yypminor->yy12)); 
-#line 638 "../../build/grammar_ttl.c"
+#line 129 "grammar_ttl.y"
+ VOLK_term_free ((yypminor->yy50)); 
+#line 641 "../../build/grammar_ttl.c"
 }
       break;
-    case 32: /* predObjList */
+    case 33: /* predObjList */
 {
 #line 97 "grammar_ttl.y"
- VOLK_link_map_free ((yypminor->yy2)); 
-#line 645 "../../build/grammar_ttl.c"
+ VOLK_link_map_free ((yypminor->yy34)); 
+#line 648 "../../build/grammar_ttl.c"
 }
       break;
-    case 34: /* objectList */
-    case 40: /* itemList */
+    case 35: /* objectList */
+    case 41: /* itemList */
 {
-#line 108 "grammar_ttl.y"
- VOLK_term_set_free ((yypminor->yy60)); 
-#line 653 "../../build/grammar_ttl.c"
+#line 117 "grammar_ttl.y"
+ VOLK_term_set_free ((yypminor->yy22)); 
+#line 656 "../../build/grammar_ttl.c"
 }
       break;
 /********* End destructor definitions *****************************************/
@@ -847,7 +850,7 @@ static void yyStackOverflow(yyParser *yypParser){
 
     log_error ("Stack oveflow in TTL parsing.");
     state->rc = VOLK_MEM_ERR;
-#line 875 "../../build/grammar_ttl.c"
+#line 878 "../../build/grammar_ttl.c"
 /******** End %stack_overflow code ********************************************/
    TTLParseARG_STORE /* Suppress warning about unused %extra_argument var */
    TTLParseCTX_STORE
@@ -919,47 +922,47 @@ static void yy_shift(
 /* For rule J, yyRuleInfoLhs[J] contains the symbol on the left-hand side
 ** of that rule */
 static const YYCODETYPE yyRuleInfoLhs[] = {
-    27,  /* (0) prefixID ::= PREFIX WS IRIREF PERIOD */
-    28,  /* (1) base ::= BASE WS IRIREF PERIOD */
-    29,  /* (2) triples ::= subject ows predObjList PERIOD */
-    29,  /* (3) triples ::= subject ows predObjList SEMICOLON PERIOD */
-    32,  /* (4) predObjList ::= predicate ows objectList */
-    32,  /* (5) predObjList ::= predObjList SEMICOLON predicate ows objectList */
-    34,  /* (6) objectList ::= objectList COMMA object */
-    34,  /* (7) objectList ::= object */
-    30,  /* (8) subject ::= resource */
-    30,  /* (9) subject ::= blank */
-    33,  /* (10) predicate ::= RDF_TYPE */
-    38,  /* (11) literal ::= STRING */
-    38,  /* (12) literal ::= STRING LANGTAG */
-    38,  /* (13) literal ::= STRING DTYPE_MARKER resource */
-    38,  /* (14) literal ::= INTEGER */
-    38,  /* (15) literal ::= DOUBLE */
-    38,  /* (16) literal ::= DECIMAL */
-    38,  /* (17) literal ::= BOOLEAN */
-    37,  /* (18) blank ::= BNODE_ID */
-    37,  /* (19) blank ::= LBRACKET RBRACKET */
-    37,  /* (20) blank ::= LBRACKET predObjList RBRACKET */
-    37,  /* (21) blank ::= LPAREN RPAREN */
-    39,  /* (22) collection ::= LPAREN itemList RPAREN */
-    40,  /* (23) itemList ::= itemList ows object */
-    40,  /* (24) itemList ::= object */
-    36,  /* (25) resource ::= IRIREF */
-    36,  /* (26) resource ::= QNAME */
-    24,  /* (27) turtleDoc ::= statements EOF */
-    25,  /* (28) statements ::= statements statement */
-    25,  /* (29) statements ::= */
-    26,  /* (30) statement ::= prefixID */
-    26,  /* (31) statement ::= base */
-    26,  /* (32) statement ::= triples */
-    26,  /* (33) statement ::= WS */
-    33,  /* (34) predicate ::= resource */
-    35,  /* (35) object ::= resource */
-    35,  /* (36) object ::= blank */
-    35,  /* (37) object ::= literal */
-    37,  /* (38) blank ::= collection */
-    31,  /* (39) ows ::= WS */
-    31,  /* (40) ows ::= */
+    28,  /* (0) prefixID ::= PREFIX WS IRIREF PERIOD */
+    29,  /* (1) base ::= BASE WS IRIREF PERIOD */
+    30,  /* (2) triples ::= subject ows predObjList PERIOD */
+    30,  /* (3) triples ::= subject ows predObjList SEMICOLON PERIOD */
+    33,  /* (4) predObjList ::= predicate ows objectList */
+    33,  /* (5) predObjList ::= predObjList SEMICOLON predicate ows objectList */
+    35,  /* (6) objectList ::= objectList COMMA object */
+    35,  /* (7) objectList ::= object */
+    31,  /* (8) subject ::= resource */
+    31,  /* (9) subject ::= blank */
+    34,  /* (10) predicate ::= RDF_TYPE */
+    39,  /* (11) literal ::= STRING */
+    39,  /* (12) literal ::= STRING LANGTAG */
+    39,  /* (13) literal ::= STRING DTYPE_MARKER resource */
+    39,  /* (14) literal ::= INTEGER */
+    39,  /* (15) literal ::= DOUBLE */
+    39,  /* (16) literal ::= DECIMAL */
+    39,  /* (17) literal ::= BOOLEAN */
+    38,  /* (18) blank ::= BNODE_ID */
+    38,  /* (19) blank ::= ANON */
+    38,  /* (20) blank ::= LBRACKET predObjList RBRACKET */
+    38,  /* (21) blank ::= LPAREN RPAREN */
+    40,  /* (22) collection ::= LPAREN itemList RPAREN */
+    41,  /* (23) itemList ::= itemList ows object */
+    41,  /* (24) itemList ::= object */
+    37,  /* (25) resource ::= IRIREF */
+    37,  /* (26) resource ::= QNAME */
+    25,  /* (27) turtleDoc ::= statements EOF */
+    26,  /* (28) statements ::= statements statement */
+    26,  /* (29) statements ::= */
+    27,  /* (30) statement ::= prefixID */
+    27,  /* (31) statement ::= base */
+    27,  /* (32) statement ::= triples */
+    27,  /* (33) statement ::= WS */
+    34,  /* (34) predicate ::= resource */
+    36,  /* (35) object ::= resource */
+    36,  /* (36) object ::= blank */
+    36,  /* (37) object ::= literal */
+    38,  /* (38) blank ::= collection */
+    32,  /* (39) ows ::= WS */
+    32,  /* (40) ows ::= */
 };
 
 /* For rule J, yyRuleInfoNRhs[J] contains the negative of the number
@@ -984,7 +987,7 @@ static const signed char yyRuleInfoNRhs[] = {
    -1,  /* (16) literal ::= DECIMAL */
    -1,  /* (17) literal ::= BOOLEAN */
    -1,  /* (18) blank ::= BNODE_ID */
-   -2,  /* (19) blank ::= LBRACKET RBRACKET */
+   -1,  /* (19) blank ::= ANON */
    -3,  /* (20) blank ::= LBRACKET predObjList RBRACKET */
    -2,  /* (21) blank ::= LPAREN RPAREN */
    -3,  /* (22) collection ::= LPAREN itemList RPAREN */
@@ -1055,7 +1058,7 @@ static YYACTIONTYPE yy_reduce(
                 free (yymsp[-3].minor.yy0);
                 free (yymsp[-1].minor.yy0);
             }
-#line 1083 "../../build/grammar_ttl.c"
+#line 1086 "../../build/grammar_ttl.c"
   yy_destructor(yypParser,16,&yymsp[-2].minor);
   yy_destructor(yypParser,1,&yymsp[0].minor);
         break;
@@ -1068,7 +1071,7 @@ static YYACTIONTYPE yy_reduce(
 
                 free (yymsp[-1].minor.yy0);
             }
-#line 1096 "../../build/grammar_ttl.c"
+#line 1099 "../../build/grammar_ttl.c"
   yy_destructor(yypParser,16,&yymsp[-2].minor);
   yy_destructor(yypParser,1,&yymsp[0].minor);
 }
@@ -1076,262 +1079,272 @@ static YYACTIONTYPE yy_reduce(
       case 2: /* triples ::= subject ows predObjList PERIOD */
 #line 77 "grammar_ttl.y"
 {
-                size_t ct = VOLK_graph_add_link_map (state->it, yymsp[-1].minor.yy2);
+                size_t ct = VOLK_graph_add_link_map (state->it, yymsp[-1].minor.yy34);
                 state->ct += ct;
                 state->rc = VOLK_OK;
                 LOG_TRACE("Added %lu triples.", ct);
 
-                VOLK_term_free (yymsp[-3].minor.yy12);
-                VOLK_link_map_free (yymsp[-1].minor.yy2);
+                VOLK_term_free (yymsp[-3].minor.yy50);
+                VOLK_link_map_free (yymsp[-1].minor.yy34);
             }
-#line 1112 "../../build/grammar_ttl.c"
+#line 1115 "../../build/grammar_ttl.c"
   yy_destructor(yypParser,1,&yymsp[0].minor);
         break;
       case 3: /* triples ::= subject ows predObjList SEMICOLON PERIOD */
 #line 86 "grammar_ttl.y"
 {
-                size_t ct = VOLK_graph_add_link_map (state->it, yymsp[-2].minor.yy2);
+                size_t ct = VOLK_graph_add_link_map (state->it, yymsp[-2].minor.yy34);
                 state->ct += ct;
                 state->rc = VOLK_OK;
                 LOG_TRACE("Added %lu triples.", ct);
 
-                VOLK_term_free (yymsp[-4].minor.yy12);
-                VOLK_link_map_free (yymsp[-2].minor.yy2);
+                VOLK_term_free (yymsp[-4].minor.yy50);
+                VOLK_link_map_free (yymsp[-2].minor.yy34);
             }
-#line 1126 "../../build/grammar_ttl.c"
+#line 1129 "../../build/grammar_ttl.c"
   yy_destructor(yypParser,2,&yymsp[-1].minor);
   yy_destructor(yypParser,1,&yymsp[0].minor);
         break;
       case 4: /* predObjList ::= predicate ows objectList */
 #line 98 "grammar_ttl.y"
 {
-                yylhsminor.yy2 = VOLK_link_map_new (state->lms, VOLK_LINK_OUTBOUND);
-                VOLK_link_map_add (yylhsminor.yy2, yymsp[-2].minor.yy12, yymsp[0].minor.yy60);
+                VOLK_Term *s;
+                if (state->lms) s = state->lms;
+                else {
+                    // TODO This may be brittle. It is not verifying the
+                    // full BNode syntax.
+                    log_info ("Link map subject not present. Assuming BNode.");
+                    s = VOLK_bnode_new (NULL);
+                }
+                yylhsminor.yy34 = VOLK_link_map_new (s, VOLK_LINK_OUTBOUND);
+                VOLK_link_map_add (yylhsminor.yy34, yymsp[-2].minor.yy50, yymsp[0].minor.yy22);
+                if (s != state->lms) VOLK_term_free (s);
             }
-#line 1136 "../../build/grammar_ttl.c"
-  yymsp[-2].minor.yy2 = yylhsminor.yy2;
+#line 1148 "../../build/grammar_ttl.c"
+  yymsp[-2].minor.yy34 = yylhsminor.yy34;
         break;
       case 5: /* predObjList ::= predObjList SEMICOLON predicate ows objectList */
-#line 102 "grammar_ttl.y"
+#line 111 "grammar_ttl.y"
 {
-                VOLK_link_map_add (yymsp[-4].minor.yy2, yymsp[-2].minor.yy12, yymsp[0].minor.yy60);
-                yylhsminor.yy2 = yymsp[-4].minor.yy2;
+                VOLK_link_map_add (yymsp[-4].minor.yy34, yymsp[-2].minor.yy50, yymsp[0].minor.yy22);
+                yylhsminor.yy34 = yymsp[-4].minor.yy34;
             }
-#line 1145 "../../build/grammar_ttl.c"
+#line 1157 "../../build/grammar_ttl.c"
   yy_destructor(yypParser,2,&yymsp[-3].minor);
-  yymsp[-4].minor.yy2 = yylhsminor.yy2;
+  yymsp[-4].minor.yy34 = yylhsminor.yy34;
         break;
       case 6: /* objectList ::= objectList COMMA object */
-#line 109 "grammar_ttl.y"
+#line 118 "grammar_ttl.y"
 {
-                if (VOLK_term_set_add (yymsp[-2].minor.yy60, yymsp[0].minor.yy12, NULL) == VOLK_NOACTION)
-                    VOLK_term_free (yymsp[0].minor.yy12);
-                yylhsminor.yy60 = yymsp[-2].minor.yy60;
+                if (VOLK_term_set_add (yymsp[-2].minor.yy22, yymsp[0].minor.yy50, NULL) == VOLK_NOACTION)
+                    VOLK_term_free (yymsp[0].minor.yy50);
+                yylhsminor.yy22 = yymsp[-2].minor.yy22;
             }
-#line 1156 "../../build/grammar_ttl.c"
+#line 1168 "../../build/grammar_ttl.c"
   yy_destructor(yypParser,3,&yymsp[-1].minor);
-  yymsp[-2].minor.yy60 = yylhsminor.yy60;
+  yymsp[-2].minor.yy22 = yylhsminor.yy22;
         break;
       case 7: /* objectList ::= object */
-#line 114 "grammar_ttl.y"
+#line 123 "grammar_ttl.y"
 {
-                yylhsminor.yy60 = VOLK_term_set_new();
-                VOLK_term_set_add (yylhsminor.yy60, yymsp[0].minor.yy12, NULL);
+                yylhsminor.yy22 = VOLK_term_set_new();
+                VOLK_term_set_add (yylhsminor.yy22, yymsp[0].minor.yy50, NULL);
             }
-#line 1166 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy60 = yylhsminor.yy60;
+#line 1178 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy22 = yylhsminor.yy22;
         break;
       case 8: /* subject ::= resource */
       case 9: /* subject ::= blank */ yytestcase(yyruleno==9);
-#line 121 "grammar_ttl.y"
-{ state->lms = yymsp[0].minor.yy12; }
-#line 1173 "../../build/grammar_ttl.c"
+#line 130 "grammar_ttl.y"
+{ state->lms = yymsp[0].minor.yy50; }
+#line 1185 "../../build/grammar_ttl.c"
         break;
       case 10: /* predicate ::= RDF_TYPE */
 {  yy_destructor(yypParser,18,&yymsp[0].minor);
-#line 127 "grammar_ttl.y"
-{ yymsp[0].minor.yy12 = VOLK_iriref_new_ns ("rdf:type"); }
-#line 1179 "../../build/grammar_ttl.c"
+#line 136 "grammar_ttl.y"
+{ yymsp[0].minor.yy50 = VOLK_iriref_new_ns ("rdf:type"); }
+#line 1191 "../../build/grammar_ttl.c"
 }
         break;
       case 11: /* literal ::= STRING */
-#line 137 "grammar_ttl.y"
+#line 146 "grammar_ttl.y"
 {
-                yylhsminor.yy12 = VOLK_literal_new (yymsp[0].minor.yy0, NULL);
-                LOG_TRACE("Created plain literal: \"%s\"", yylhsminor.yy12->data);
+                yylhsminor.yy50 = VOLK_literal_new (yymsp[0].minor.yy0, NULL);
+                LOG_TRACE("Created plain literal: \"%s\"", yylhsminor.yy50->data);
                 free (yymsp[0].minor.yy0);
             }
-#line 1189 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy12 = yylhsminor.yy12;
+#line 1201 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy50 = yylhsminor.yy50;
         break;
       case 12: /* literal ::= STRING LANGTAG */
-#line 142 "grammar_ttl.y"
+#line 151 "grammar_ttl.y"
 {
-                yylhsminor.yy12 = VOLK_lt_literal_new (yymsp[-1].minor.yy0, yymsp[0].minor.yy0);
-                LOG_TRACE("Created LT-literal: \"%s\"@%s", yylhsminor.yy12->data, yylhsminor.yy12->lang);
+                yylhsminor.yy50 = VOLK_lt_literal_new (yymsp[-1].minor.yy0, yymsp[0].minor.yy0);
+                LOG_TRACE("Created LT-literal: \"%s\"@%s", yylhsminor.yy50->data, yylhsminor.yy50->lang);
                 free (yymsp[-1].minor.yy0);
                 free (yymsp[0].minor.yy0);
             }
-#line 1200 "../../build/grammar_ttl.c"
-  yymsp[-1].minor.yy12 = yylhsminor.yy12;
+#line 1212 "../../build/grammar_ttl.c"
+  yymsp[-1].minor.yy50 = yylhsminor.yy50;
         break;
       case 13: /* literal ::= STRING DTYPE_MARKER resource */
-#line 148 "grammar_ttl.y"
+#line 157 "grammar_ttl.y"
 {
-                yylhsminor.yy12 = VOLK_literal_new (yymsp[-2].minor.yy0, yymsp[0].minor.yy12);
+                yylhsminor.yy50 = VOLK_literal_new (yymsp[-2].minor.yy0, yymsp[0].minor.yy50);
                 LOG_TRACE(
                         "Created DT-literal: \"%s\"^^%s",
-                        yylhsminor.yy12->data, yylhsminor.yy12->datatype);
+                        yylhsminor.yy50->data, yylhsminor.yy50->datatype);
                 free (yymsp[-2].minor.yy0);
             }
-#line 1212 "../../build/grammar_ttl.c"
+#line 1224 "../../build/grammar_ttl.c"
   yy_destructor(yypParser,19,&yymsp[-1].minor);
-  yymsp[-2].minor.yy12 = yylhsminor.yy12;
+  yymsp[-2].minor.yy50 = yylhsminor.yy50;
         break;
       case 14: /* literal ::= INTEGER */
-#line 155 "grammar_ttl.y"
+#line 164 "grammar_ttl.y"
 {
-                yylhsminor.yy12 = VOLK_literal_new (yymsp[0].minor.yy0, VOLK_iriref_new_ns ("xsd:integer"));
+                yylhsminor.yy50 = VOLK_literal_new (yymsp[0].minor.yy0, VOLK_iriref_new_ns ("xsd:integer"));
                 free (yymsp[0].minor.yy0);
             }
-#line 1222 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy12 = yylhsminor.yy12;
+#line 1234 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy50 = yylhsminor.yy50;
         break;
       case 15: /* literal ::= DOUBLE */
-#line 159 "grammar_ttl.y"
+#line 168 "grammar_ttl.y"
 {
-                yylhsminor.yy12 = VOLK_literal_new (yymsp[0].minor.yy0, VOLK_iriref_new_ns ("xsd:double"));
+                yylhsminor.yy50 = VOLK_literal_new (yymsp[0].minor.yy0, VOLK_iriref_new_ns ("xsd:double"));
                 free (yymsp[0].minor.yy0);
             }
-#line 1231 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy12 = yylhsminor.yy12;
+#line 1243 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy50 = yylhsminor.yy50;
         break;
       case 16: /* literal ::= DECIMAL */
-#line 163 "grammar_ttl.y"
+#line 172 "grammar_ttl.y"
 {
-                yylhsminor.yy12 = VOLK_literal_new (yymsp[0].minor.yy0, VOLK_iriref_new_ns ("xsd:decimal"));
+                yylhsminor.yy50 = VOLK_literal_new (yymsp[0].minor.yy0, VOLK_iriref_new_ns ("xsd:decimal"));
                 free (yymsp[0].minor.yy0);
             }
-#line 1240 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy12 = yylhsminor.yy12;
+#line 1252 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy50 = yylhsminor.yy50;
         break;
       case 17: /* literal ::= BOOLEAN */
-#line 167 "grammar_ttl.y"
+#line 176 "grammar_ttl.y"
 {
-                yylhsminor.yy12 = VOLK_literal_new (yymsp[0].minor.yy0, VOLK_iriref_new_ns ("xsd:boolean"));
+                yylhsminor.yy50 = VOLK_literal_new (yymsp[0].minor.yy0, VOLK_iriref_new_ns ("xsd:boolean"));
                 free (yymsp[0].minor.yy0);
             }
-#line 1249 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy12 = yylhsminor.yy12;
+#line 1261 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy50 = yylhsminor.yy50;
         break;
       case 18: /* blank ::= BNODE_ID */
-#line 174 "grammar_ttl.y"
+#line 183 "grammar_ttl.y"
 {
-                yylhsminor.yy12 = VOLK_bnode_new (yymsp[0].minor.yy0);
-                LOG_TRACE("Created blank node: _:%s", yylhsminor.yy12->data);
+                yylhsminor.yy50 = VOLK_bnode_new (yymsp[0].minor.yy0);
+                LOG_TRACE("Created blank node: _:%s", yylhsminor.yy50->data);
                 free (yymsp[0].minor.yy0);
             }
-#line 1259 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy12 = yylhsminor.yy12;
+#line 1271 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy50 = yylhsminor.yy50;
         break;
-      case 19: /* blank ::= LBRACKET RBRACKET */
-{  yy_destructor(yypParser,20,&yymsp[-1].minor);
-#line 179 "grammar_ttl.y"
+      case 19: /* blank ::= ANON */
+{  yy_destructor(yypParser,20,&yymsp[0].minor);
+#line 188 "grammar_ttl.y"
 {
-                yymsp[-1].minor.yy12 = VOLK_bnode_new (NULL);
-                LOG_TRACE("Created empty list BN: _:%s", yymsp[-1].minor.yy12->data);
+                LOG_TRACE ("Found empty BNode.");
+                yymsp[0].minor.yy50 = VOLK_bnode_new (NULL);
+                LOG_TRACE("Created empty list BN: _:%s", yymsp[0].minor.yy50->data);
             }
-#line 1269 "../../build/grammar_ttl.c"
-  yy_destructor(yypParser,21,&yymsp[0].minor);
+#line 1282 "../../build/grammar_ttl.c"
 }
         break;
       case 20: /* blank ::= LBRACKET predObjList RBRACKET */
-{  yy_destructor(yypParser,20,&yymsp[-2].minor);
-#line 183 "grammar_ttl.y"
+{  yy_destructor(yypParser,21,&yymsp[-2].minor);
+#line 193 "grammar_ttl.y"
 {
-                yymsp[-2].minor.yy12 = VOLK_bnode_new (NULL);
-                state->lms = yymsp[-2].minor.yy12;
-                state->ct += VOLK_graph_add_link_map (state->it, yymsp[-1].minor.yy2);
-                LOG_TRACE("Created list BN: _:%s", yymsp[-2].minor.yy12->data);
+                LOG_TRACE ("Found BNode with data.");
+                yymsp[-2].minor.yy50 = VOLK_bnode_new (NULL);
+                state->lms = yymsp[-2].minor.yy50;
+                state->ct += VOLK_graph_add_link_map (state->it, yymsp[-1].minor.yy34);
+                LOG_TRACE("Created list BN: _:%s", yymsp[-2].minor.yy50->data);
 
-                VOLK_link_map_free (yymsp[-1].minor.yy2);
+                VOLK_link_map_free (yymsp[-1].minor.yy34);
             }
-#line 1284 "../../build/grammar_ttl.c"
-  yy_destructor(yypParser,21,&yymsp[0].minor);
+#line 1297 "../../build/grammar_ttl.c"
+  yy_destructor(yypParser,22,&yymsp[0].minor);
 }
         break;
       case 21: /* blank ::= LPAREN RPAREN */
-{  yy_destructor(yypParser,22,&yymsp[-1].minor);
-#line 192 "grammar_ttl.y"
+{  yy_destructor(yypParser,23,&yymsp[-1].minor);
+#line 203 "grammar_ttl.y"
 {
-                yymsp[-1].minor.yy12 = VOLK_iriref_new_ns ("rdf:nil");
-                LOG_TRACE("Created list terminator: %s", yymsp[-1].minor.yy12->data);
+                yymsp[-1].minor.yy50 = VOLK_iriref_new_ns ("rdf:nil");
+                LOG_TRACE("Created list terminator: %s", yymsp[-1].minor.yy50->data);
             }
-#line 1295 "../../build/grammar_ttl.c"
-  yy_destructor(yypParser,23,&yymsp[0].minor);
+#line 1308 "../../build/grammar_ttl.c"
+  yy_destructor(yypParser,24,&yymsp[0].minor);
 }
         break;
       case 22: /* collection ::= LPAREN itemList RPAREN */
-{  yy_destructor(yypParser,22,&yymsp[-2].minor);
-#line 201 "grammar_ttl.y"
+{  yy_destructor(yypParser,23,&yymsp[-2].minor);
+#line 212 "grammar_ttl.y"
 {
-                yymsp[-2].minor.yy12 = VOLK_bnode_add_collection (state->it, yymsp[-1].minor.yy60);
-                VOLK_term_set_free (yymsp[-1].minor.yy60);
+                yymsp[-2].minor.yy50 = VOLK_bnode_add_collection (state->it, yymsp[-1].minor.yy22);
+                VOLK_term_set_free (yymsp[-1].minor.yy22);
             }
-#line 1306 "../../build/grammar_ttl.c"
-  yy_destructor(yypParser,23,&yymsp[0].minor);
+#line 1319 "../../build/grammar_ttl.c"
+  yy_destructor(yypParser,24,&yymsp[0].minor);
 }
         break;
       case 23: /* itemList ::= itemList ows object */
-#line 208 "grammar_ttl.y"
+#line 219 "grammar_ttl.y"
 {
-                if (VOLK_term_set_add (yymsp[-2].minor.yy60, yymsp[0].minor.yy12, NULL) == VOLK_NOACTION)
-                    VOLK_term_free (yymsp[0].minor.yy12);
-                yylhsminor.yy60 = yymsp[-2].minor.yy60;
+                if (VOLK_term_set_add (yymsp[-2].minor.yy22, yymsp[0].minor.yy50, NULL) == VOLK_NOACTION)
+                    VOLK_term_free (yymsp[0].minor.yy50);
+                yylhsminor.yy22 = yymsp[-2].minor.yy22;
             }
-#line 1317 "../../build/grammar_ttl.c"
-  yymsp[-2].minor.yy60 = yylhsminor.yy60;
+#line 1330 "../../build/grammar_ttl.c"
+  yymsp[-2].minor.yy22 = yylhsminor.yy22;
         break;
       case 24: /* itemList ::= object */
-#line 213 "grammar_ttl.y"
+#line 224 "grammar_ttl.y"
 {
-                yylhsminor.yy60 = VOLK_term_set_new ();
-                VOLK_term_set_add (yylhsminor.yy60, yymsp[0].minor.yy12, NULL);
+                yylhsminor.yy22 = VOLK_term_set_new ();
+                VOLK_term_set_add (yylhsminor.yy22, yymsp[0].minor.yy50, NULL);
             }
-#line 1326 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy60 = yylhsminor.yy60;
+#line 1339 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy22 = yylhsminor.yy22;
         break;
       case 25: /* resource ::= IRIREF */
-#line 220 "grammar_ttl.y"
+#line 231 "grammar_ttl.y"
 {
                 VOLK_Term *rel_iri = VOLK_iriref_new (yymsp[0].minor.yy0);
                 free (yymsp[0].minor.yy0);
                 if (state->base) {
-                    yylhsminor.yy12 = VOLK_iriref_new_abs (rel_iri, state->base);
+                    yylhsminor.yy50 = VOLK_iriref_new_abs (rel_iri, state->base);
                     VOLK_term_free (rel_iri);
                 } else {
-                    yylhsminor.yy12 = rel_iri;
+                    yylhsminor.yy50 = rel_iri;
                 }
-                LOG_TRACE("Created IRI: <%s>", yylhsminor.yy12->data);
+                LOG_TRACE("Created IRI: <%s>", yylhsminor.yy50->data);
             }
-#line 1342 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy12 = yylhsminor.yy12;
+#line 1355 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy50 = yylhsminor.yy50;
         break;
       case 26: /* resource ::= QNAME */
-#line 231 "grammar_ttl.y"
+#line 242 "grammar_ttl.y"
 {
-                yylhsminor.yy12 = VOLK_iriref_new_ns (yymsp[0].minor.yy0);
-                LOG_TRACE("Created IRI: %s", yylhsminor.yy12->data);
+                yylhsminor.yy50 = VOLK_iriref_new_ns (yymsp[0].minor.yy0);
+                LOG_TRACE("Created IRI: %s", yylhsminor.yy50->data);
                 free (yymsp[0].minor.yy0);
             }
-#line 1352 "../../build/grammar_ttl.c"
-  yymsp[0].minor.yy12 = yylhsminor.yy12;
+#line 1365 "../../build/grammar_ttl.c"
+  yymsp[0].minor.yy50 = yylhsminor.yy50;
         break;
       case 27: /* turtleDoc ::= statements EOF */
 #line 54 "grammar_ttl.y"
 {
 }
-#line 1359 "../../build/grammar_ttl.c"
+#line 1372 "../../build/grammar_ttl.c"
   yy_destructor(yypParser,15,&yymsp[0].minor);
         break;
       case 33: /* statement ::= WS */
@@ -1340,40 +1353,40 @@ static YYACTIONTYPE yy_reduce(
 #line 61 "grammar_ttl.y"
 {
 }
-#line 1368 "../../build/grammar_ttl.c"
+#line 1381 "../../build/grammar_ttl.c"
 }
         break;
       case 34: /* predicate ::= resource */
       case 35: /* object ::= resource */ yytestcase(yyruleno==35);
-{  yy_destructor(yypParser,36,&yymsp[0].minor);
-#line 126 "grammar_ttl.y"
+{  yy_destructor(yypParser,37,&yymsp[0].minor);
+#line 135 "grammar_ttl.y"
 {
 }
-#line 1377 "../../build/grammar_ttl.c"
+#line 1390 "../../build/grammar_ttl.c"
 }
         break;
       case 36: /* object ::= blank */
-{  yy_destructor(yypParser,37,&yymsp[0].minor);
-#line 132 "grammar_ttl.y"
+{  yy_destructor(yypParser,38,&yymsp[0].minor);
+#line 141 "grammar_ttl.y"
 {
 }
-#line 1385 "../../build/grammar_ttl.c"
+#line 1398 "../../build/grammar_ttl.c"
 }
         break;
       case 37: /* object ::= literal */
-{  yy_destructor(yypParser,38,&yymsp[0].minor);
-#line 133 "grammar_ttl.y"
+{  yy_destructor(yypParser,39,&yymsp[0].minor);
+#line 142 "grammar_ttl.y"
 {
 }
-#line 1393 "../../build/grammar_ttl.c"
+#line 1406 "../../build/grammar_ttl.c"
 }
         break;
       case 38: /* blank ::= collection */
-{  yy_destructor(yypParser,39,&yymsp[0].minor);
-#line 191 "grammar_ttl.y"
+{  yy_destructor(yypParser,40,&yymsp[0].minor);
+#line 202 "grammar_ttl.y"
 {
 }
-#line 1401 "../../build/grammar_ttl.c"
+#line 1414 "../../build/grammar_ttl.c"
 }
         break;
       default:
@@ -1428,7 +1441,7 @@ static void yy_parse_failed(
 
     log_error ("TTL parse error. Cannot continue.");
     state->rc = VOLK_PARSE_ERR;
-#line 1456 "../../build/grammar_ttl.c"
+#line 1469 "../../build/grammar_ttl.c"
 /************ End %parse_failure code *****************************************/
   TTLParseARG_STORE /* Suppress warning about unused %extra_argument variable */
   TTLParseCTX_STORE
@@ -1451,7 +1464,7 @@ static void yy_syntax_error(
 
     // Fail immediately on first error.
     yy_parse_failed (yypParser);
-#line 1479 "../../build/grammar_ttl.c"
+#line 1492 "../../build/grammar_ttl.c"
 /************ End %syntax_error code ******************************************/
   TTLParseARG_STORE /* Suppress warning about unused %extra_argument variable */
   TTLParseCTX_STORE

+ 14 - 3
src/codec/grammar_ttl.y

@@ -43,7 +43,7 @@
 %left PERIOD .
 %left SEMICOLON .
 %left COMMA .
-%left STRING INTEGER DOUBLE DECIMAL BOOLEAN QNAME BNODE_ID IRIREF .
+%left STRING INTEGER DOUBLE DECIMAL BOOLEAN BNODE_ID QNAME IRIREF .
 %nonassoc LANGTAG PREFIX .
 %nonassoc COLON .
 
@@ -96,8 +96,17 @@ triples 	::= subject(S) ows predObjList(L) SEMICOLON PERIOD . [PERIOD] {
 %type predObjList       { VOLK_LinkMap * }
 %destructor predObjList { VOLK_link_map_free ($$); }
 predObjList(A) ::= predicate(P) ows objectList(O) . [SEMICOLON] {
-                A = VOLK_link_map_new (state->lms, VOLK_LINK_OUTBOUND);
+                VOLK_Term *s;
+                if (state->lms) s = state->lms;
+                else {
+                    // TODO This may be brittle. It is not verifying the
+                    // full BNode syntax.
+                    log_info ("Link map subject not present. Assuming BNode.");
+                    s = VOLK_bnode_new (NULL);
+                }
+                A = VOLK_link_map_new (s, VOLK_LINK_OUTBOUND);
                 VOLK_link_map_add (A, P, O);
+                if (s != state->lms) VOLK_term_free (s);
             }
 predObjList(A) ::= predObjList(L) SEMICOLON predicate(P) ows objectList(O) . {
                 VOLK_link_map_add (L, P, O);
@@ -176,11 +185,13 @@ blank(A)    ::= BNODE_ID(D) . {
                 LOG_TRACE("Created blank node: _:%s", A->data);
                 free (D);
             }
-blank(A)    ::= LBRACKET RBRACKET . [BNODE_ID] {
+blank(A)    ::= ANON . [BNODE_ID] {
+                LOG_TRACE ("Found empty BNode.");
                 A = VOLK_bnode_new (NULL);
                 LOG_TRACE("Created empty list BN: _:%s", A->data);
             }
 blank(A)    ::= LBRACKET predObjList(L) RBRACKET . [BNODE_ID] {
+                LOG_TRACE ("Found BNode with data.");
                 A = VOLK_bnode_new (NULL);
                 state->lms = A;
                 state->ct += VOLK_graph_add_link_map (state->it, L);

+ 128 - 51
src/codec/lexer_nt.re

@@ -1,7 +1,21 @@
 #include "volksdata/codec/parser_nt.h"
 #include "volksdata/codec/tokens_nt.h"
-
-
+//#include "volksdata/codec/parser_common.h"
+
+
+/** BEGIN duplicate section
+ * This section is bit-by-bit identical in NT and TTL lexers. The copy in
+ * include/volksdata/codec/parser_common.h should be used, but some re2c tags
+ * are not being parsed in that location.
+ */
+
+/** @brief TTL is UTF-8 encoded.
+ *
+ * @sa https://www.w3.org/TeamSubmission/turtle/#sec-grammar
+ *
+ * `char` should be considered to be UTF-8 throughout this library, however,
+ * setting YYCTYPE to char generates case labels outside of the char range.
+ */
 #define YYCTYPE     uint8_t
 #define YYCURSOR    it->cur
 #define YYMARKER    it->mar
@@ -10,57 +24,111 @@
 
 
 typedef struct {
-    FILE *          fh;                 // Input file handle.
-    YYCTYPE         buf[CHUNK_SIZE],    // Start of buffer.
-            *       lim,                // Position after the last available
-                                        //   input character (YYLIMIT).
-            *       cur,                // Next input character to be read
-                                        //   (YYCURSOR)
-            *       mar,                // Most recent match (YYMARKER)
-            *       tok,                // Start of current token.
-            *       bol;                // Address of the beginning of the
-                                        //   current line (for debugging).
-    unsigned        line;               // Current line no. (for debugging).
-    unsigned        ct;                 // Number of parsed triples.
-    bool            eof;                // if we have reached EOF.
+    FILE          * fh;         ///< Input file handle.
+    const char    * sh;         ///< Input string. Exclusive with fh.
+    size_t          buf_size;   ///< Initial allocation for buffer.
+    YYCTYPE       * buf,        ///< Start of buffer.
+                  * lim,        ///< Position after the last available
+                                ///<   input character (YYLIMIT).
+                  * cur,        ///< Next input character to be read (YYCURSOR)
+                  * mar,        ///< Most recent match (YYMARKER)
+                  * tok,        ///< Start of current token.
+                  * bol;        ///< Address of the beginning of the
+                                ///<   current line (for debugging).
+    unsigned        line;       ///< Current line no. (for debugging).
+    unsigned        ct;         ///< Number of statements parsed.
+    bool            eof;        ///< if we have reached EOF.
     /*!stags:re2c format = "YYCTYPE *@@;"; */
 } ParseIterator;
 
 
-static int fill(ParseIterator *it)
+static int fill(ParseIterator *it);
+
+
+/** @brief Initialize parser.
+ *
+ * @param[in] it iterator handle to be initialized.
+ *
+ * @param[in] fh Open file handle to read from. This is exclusive with sh. If
+ *  both fh and sh are provided, fh has precedence.
+ *
+ * @param[in] sh String to read from. This is exclusive with fh.
+ */
+static void parse_init (ParseIterator *it, FILE *fh, const char *sh)
 {
-    if (it->eof) {
-        return 1;
+    if(fh) {
+        // Stream handling. It engages YYFILL and reads by chunks.
+        it->fh = fh;
+        it->sh = NULL;
+        it->buf_size = CHUNK_SIZE;
+        it->buf = malloc(it->buf_size);
+        if (!it->buf) log_error ("Error allocating lexer buffer.");
+        it->cur = it->mar = it->tok = it->lim = it->buf + it->buf_size - 1;
+        it->bol = it->buf;
+        it->eof = false;
+        it->lim[0] = 0;
+    } else {
+        // String handling. Uses the provided string as the buffer.
+        it->fh = NULL;
+        it->sh = sh;
+        it->buf_size = strlen(sh) + 1;
+        it->buf = NULL;
+        it->cur = it->tok = (YYCTYPE*)it->sh;
+        it->lim = it->mar = it->cur + it->buf_size - 1;
+        it->bol = it->cur;
+        it->eof = true;
     }
-    const size_t shift = it->tok - it->buf;
+    it->line = 1;
+    it->ct = 0;
+    /*!stags:re2c format = "it->@@ = NULL; "; */
+}
+
+
+int
+fill(ParseIterator *it)
+{
+    log_debug ("Filling codec buffer @ %p.", it->buf);
+    if (it->eof) return 1;
+
+    size_t shift = it->tok - it->buf;
+    size_t used = it->lim - it->tok;
+
+    // If buffer is too small for the lexeme, double the capacity.
     if (shift < 1) {
-        return 2;
+        YYCTYPE *old_buf = it->buf;
+        shift += it->buf_size;
+        it->buf_size *= 2;
+        LOG_DEBUG ("Reallocating buffer to %zu bytes.", it->buf_size);
+        it->buf = realloc (it->buf, it->buf_size);
+        if (!it->buf) {
+            log_error ("Memory allocation error.");
+            return -1;
+        }
+        // Move all relative points if address changed.
+        size_t reloc_off = it->buf - old_buf;
+        it->cur += reloc_off;
+        it->tok += reloc_off;
+        it->lim += reloc_off;
+        it->mar += reloc_off;
+    } else {
+        LOG_DEBUG("Shifting bytes: %zu", shift);
+        memmove (it->buf, it->tok, used);
+        LOG_TRACE ("Limit offset before reading data: %zu", it->lim - it->tok);
+        it->lim -= shift;
+        it->cur -= shift;
+        it->mar -= shift;
+        it->tok -= shift;
     }
-    LOG_DEBUG("Shifting bytes: %lu", shift);
-    memmove(it->buf, it->tok, it->lim - it->tok);
-    it->lim -= shift;
-    it->cur -= shift;
-    it->mar -= shift;
-    it->tok -= shift;
-    it->lim += fread(it->lim, 1, shift, it->fh);
+    it->lim += fread (it->lim, 1, it->buf_size - used - 1, it->fh);
     /*!stags:re2c format = "if (it->@@) it->@@ -= shift; "; */
+    LOG_TRACE ("Cursor offset from last token: %zu", it->cur - it->tok);
+    LOG_TRACE ("Limit offset from last token: %zu", it->lim - it->tok);
     it->lim[0] = 0;
-    it->eof |= it->lim < it->buf + CHUNK_SIZE - 1;
+    it->eof = it->lim < it->buf + it->buf_size - 1;
     return 0;
 }
 
-
-static void parse_init(ParseIterator *it, FILE *fh)
-{
-    it->fh = fh;
-    it->cur = it->mar = it->tok = it->lim = it->buf + CHUNK_SIZE - 1;
-    it->line = 1;
-    it->bol = it->buf;
-    it->ct = 0;
-    it->eof = 0;
-    /*!stags:re2c format = "it->@@ = NULL; "; */
-    fill (it);
-}
+/** END duplicate section */
 
 
 // Parser interface. Required here to silence linters.
@@ -76,6 +144,9 @@ void NTParseTrace();
 static int lex (ParseIterator *it, VOLK_Term **term)
 {
     const YYCTYPE *lit_data_e, *dtype_s, *lang_s;
+    //(void) lit_data_e;
+    //(void) dtype_s;
+    //(void) lang_s;
 
 loop:
 
@@ -134,7 +205,7 @@ loop:
         *term = VOLK_iriref_new ((char*)data);
         free (data);
 
-        if (!UNLIKELY (term)) return -1;
+        if (UNLIKELY (!term)) return -1;
         return T_IRIREF;
     }
 
@@ -175,7 +246,7 @@ loop:
         free (data);
         free (metadata);
 
-        if (!UNLIKELY (term)) return -1;
+        if (UNLIKELY (!term)) return -1;
         return T_LITERAL;
     }
 
@@ -187,7 +258,7 @@ loop:
         *term = VOLK_term_new (VOLK_TERM_BNODE, (char*)data, NULL);
         free (data);
 
-        if (!UNLIKELY (term)) return -1;
+        if (UNLIKELY (!term)) return -1;
         return T_BNODE;
     }
 
@@ -216,8 +287,8 @@ loop:
     }
 
     * {
-        LOG_DEBUG(
-            "Invalid token @ %lu: %s (\\x%x)",
+        log_error (
+            "Invalid token @ %p: %s (\\x%x)",
             YYCURSOR - it->buf - 1, it->tok, *it->tok);
 
         return -1;
@@ -230,14 +301,12 @@ loop:
 VOLK_rc
 VOLK_nt_parse_term (const char *rep, VOLK_Term **term)
 {
-    FILE *fh = fmemopen ((void *)rep, strlen (rep), "r");
-
     ParseIterator it;
-    parse_init (&it, fh);
+    parse_init (&it, NULL, rep);
 
     int ttype = lex (&it, term);
 
-    fclose (fh);
+    free (it.buf);
 
     switch (ttype) {
         case T_IRIREF:
@@ -250,13 +319,19 @@ VOLK_nt_parse_term (const char *rep, VOLK_Term **term)
 }
 
 VOLK_rc
-VOLK_nt_parse_doc (FILE *fh, VOLK_Graph **gr_p, size_t *ct, char **err_p)
+VOLK_nt_parse_doc (
+        FILE *fh, const char *sh, VOLK_Graph **gr_p, size_t *ct, char **err_p)
 {
     *err_p = NULL;
     *gr_p = NULL;
 
+    if (!fh && !sh) {
+        log_error ("Neither file handle nor string input provided.");
+        return VOLK_VALUE_ERR;
+    }
+
     ParseIterator parse_it;
-    parse_init (&parse_it, fh);
+    parse_init (&parse_it, fh, sh);
 
 #ifdef DEBUG
     NTParseTrace (stdout, "NT Parser > ");
@@ -319,6 +394,8 @@ finally: ;
     NTParse (parser, 0, NULL, it);
     NTParseFree (parser, free);
 
+    free (parse_it.buf);
+
     VOLK_graph_add_done (it);
     VOLK_term_free (term);
 

+ 129 - 62
src/codec/lexer_ttl.re

@@ -1,6 +1,12 @@
 #include "volksdata/codec/parser_ttl.h"
 #include "volksdata/codec/tokens_ttl.h"
+//#include "volksdata/codec/parser_common.h"
 
+/** BEGIN duplicate section
+ * This section is bit-by-bit identical in NT and TTL lexers. The copy in
+ * include/volksdata/codec/parser_common.h should be used, but some re2c tags
+ * are not being parsed in that location.
+ */
 
 /** @brief TTL is UTF-8 encoded.
  *
@@ -17,69 +23,117 @@
 
 
 typedef struct {
-    FILE *          fh;                 // Input file handle.
-    YYCTYPE         buf[CHUNK_SIZE],    // Start of buffer.
-            *       lim,                // Position after the last available
-                                        //   input character (YYLIMIT).
-            *       cur,                // Next input character to be read
-                                        //   (YYCURSOR)
-            *       mar,                // Most recent match (YYMARKER)
-            *       tok,                // Start of current token.
-            *       bol;                // Address of the beginning of the
-                                        //   current line (for debugging).
-    unsigned        line;               // Current line no. (for debugging).
-    unsigned        stmt;               // Current statement.
-    bool            eof;                // if we have reached EOF.
+    FILE          * fh;         ///< Input file handle.
+    const char    * sh;         ///< Input string. Exclusive with fh.
+    size_t          buf_size;   ///< Initial allocation for buffer.
+    YYCTYPE       * buf,        ///< Start of buffer.
+                  * lim,        ///< Position after the last available
+                                ///<   input character (YYLIMIT).
+                  * cur,        ///< Next input character to be read (YYCURSOR)
+                  * mar,        ///< Most recent match (YYMARKER)
+                  * tok,        ///< Start of current token.
+                  * bol;        ///< Address of the beginning of the
+                                ///<   current line (for debugging).
+    unsigned        line;       ///< Current line no. (for debugging).
+    unsigned        ct;         ///< Number of statements parsed.
+    bool            eof;        ///< if we have reached EOF.
     /*!stags:re2c format = "YYCTYPE *@@;"; */
 } ParseIterator;
 
-typedef struct {
-    YYCTYPE *       data;
-    size_t          size;
-} ParserToken;
+
+static int fill(ParseIterator *it);
 
 
-static int fill (ParseIterator *it)
+/** @brief Initialize parser.
+ *
+ * @param[in] it iterator handle to be initialized.
+ *
+ * @param[in] fh Open file handle to read from. This is exclusive with sh. If
+ *  both fh and sh are provided, fh has precedence.
+ *
+ * @param[in] sh String to read from. This is exclusive with fh.
+ */
+static void parse_init (ParseIterator *it, FILE *fh, const char *sh)
 {
-    if (it->eof) {
-        return 1;
+    if(fh) {
+        // Stream handling. It engages YYFILL and reads by chunks.
+        it->fh = fh;
+        it->sh = NULL;
+        it->buf_size = CHUNK_SIZE;
+        it->buf = malloc(it->buf_size);
+        if (!it->buf) log_error ("Error allocating lexer buffer.");
+        it->cur = it->mar = it->tok = it->lim = it->buf + it->buf_size - 1;
+        it->bol = it->buf;
+        it->eof = false;
+        it->lim[0] = 0;
+    } else {
+        // String handling. Uses the provided string as the buffer.
+        it->fh = NULL;
+        it->sh = sh;
+        it->buf_size = strlen(sh) + 1;
+        it->buf = NULL;
+        it->cur = it->tok = (YYCTYPE*)it->sh;
+        it->lim = it->mar = it->cur + it->buf_size - 1;
+        it->bol = it->cur;
+        it->eof = true;
     }
-    const size_t shift = it->tok - it->buf;
+    it->line = 1;
+    it->ct = 0;
+    /*!stags:re2c format = "it->@@ = NULL; "; */
+}
+
+
+int
+fill(ParseIterator *it)
+{
+    log_debug ("Filling codec buffer @ %p.", it->buf);
+    if (it->eof) return 1;
+
+    size_t shift = it->tok - it->buf;
+    size_t used = it->lim - it->tok;
+
+    // If buffer is too small for the lexeme, double the capacity.
     if (shift < 1) {
-        return 2;
+        YYCTYPE *old_buf = it->buf;
+        shift += it->buf_size;
+        it->buf_size *= 2;
+        LOG_DEBUG ("Reallocating buffer to %zu bytes.", it->buf_size);
+        it->buf = realloc (it->buf, it->buf_size);
+        if (!it->buf) {
+            log_error ("Memory allocation error.");
+            return -1;
+        }
+        // Move all relative points if address changed.
+        size_t reloc_off = it->buf - old_buf;
+        it->cur += reloc_off;
+        it->tok += reloc_off;
+        it->lim += reloc_off;
+        it->mar += reloc_off;
+    } else {
+        LOG_DEBUG("Shifting bytes: %zu", shift);
+        memmove (it->buf, it->tok, used);
+        LOG_TRACE ("Limit offset before reading data: %zu", it->lim - it->tok);
+        it->lim -= shift;
+        it->cur -= shift;
+        it->mar -= shift;
+        it->tok -= shift;
     }
-    LOG_TRACE("Shifting bytes: %lu", shift);
-    memmove(it->buf, it->tok, it->lim - it->tok);
-    it->lim -= shift;
-    it->cur -= shift;
-    it->mar -= shift;
-    it->tok -= shift;
-    it->lim += fread(it->lim, 1, shift, it->fh);
+    it->lim += fread (it->lim, 1, it->buf_size - used - 1, it->fh);
     /*!stags:re2c format = "if (it->@@) it->@@ -= shift; "; */
+    LOG_TRACE ("Cursor offset from last token: %zu", it->cur - it->tok);
+    LOG_TRACE ("Limit offset from last token: %zu", it->lim - it->tok);
     it->lim[0] = 0;
-    it->eof |= it->lim < it->buf + CHUNK_SIZE - 1;
+    it->eof = it->lim < it->buf + it->buf_size - 1;
     return 0;
 }
 
-
-static void parse_init (ParseIterator *it, FILE *fh)
-{
-    it->fh = fh;
-    it->cur = it->mar = it->tok = it->lim = it->buf + CHUNK_SIZE - 1;
-    it->line = 1;
-    it->stmt = 1;
-    it->bol = it->buf;
-    it->eof = 0;
-    /*!stags:re2c format = "it->@@ = NULL; "; */
-    fill (it);
-}
+/** END duplicate section */
 
 
-static inline void newline (ParseIterator *it) {
-    it->line ++;
-    it->bol = YYCURSOR;
-    LOG_TRACE("New line: #%u.", it->line);
-}
+typedef struct {
+    YYCTYPE *       data;
+    size_t          size;
+} ParserToken;
 
 
 // Parser interface. Required here to silence linters.
@@ -90,6 +144,13 @@ void TTLParseFree();
 void TTLParseTrace();
 #endif
 
+
+static inline void newline (ParseIterator *it) {
+    it->line ++;
+    it->bol = YYCURSOR;
+    LOG_TRACE("New line: #%u.", it->line);
+}
+
 // Lexer.
 
 static int lex (ParseIterator *it, YYCTYPE **token_p)
@@ -98,6 +159,7 @@ static int lex (ParseIterator *it, YYCTYPE **token_p)
 
     /*!re2c
 
+    re2c:yyfill:enable = 1;
     re2c:eof = 0;
     re2c:flags:8 = 1;
     re2c:flags:tags = 1;
@@ -267,27 +329,29 @@ loop: // Start new token.
         return T_DECIMAL;
     }
 
-    '(' WS? { return T_LPAREN; }
+    '(' WS* { return T_LPAREN; }
 
-    WS? ')' { return T_RPAREN; }
+    WS* ')' { return T_RPAREN; }
 
-    '[' WS? { return T_LBRACKET; }
+    '[' WS* ']' { return T_ANON; }
 
-    WS? ']' { return T_RBRACKET; }
+    '[' WS* { return T_LBRACKET; }
+
+    WS* ']' { return T_RBRACKET; }
 
     ':' { return T_COLON; }
 
-    WS? ';' WS? {
+    WS* ';' WS* {
         LOG_TRACE("End of object list.");
 
         return T_SEMICOLON;
     }
 
-    WS? ',' WS? { return T_COMMA; }
+    WS* ',' WS* { return T_COMMA; }
 
-    WS? '.' {
-        LOG_TRACE("End of statement #%u.", it->stmt);
-        it->stmt++;
+    WS* '.' {
+        LOG_TRACE("End of statement #%u.", it->ct);
+        it->ct++;
         return T_PERIOD;
     }
 
@@ -359,21 +423,23 @@ lchar:
 
 
 VOLK_rc
-VOLK_ttl_parse_doc (FILE *fh, VOLK_Graph **gr_p, size_t *ct, char **err_p)
+VOLK_ttl_parse_doc (
+        FILE *fh, const char *sh, VOLK_Graph **gr_p, size_t *ct, char **err_p)
 {
     *err_p = NULL;
     *gr_p = NULL;
 
-    if (!fh) return VOLK_VALUE_ERR;
+    if (!fh && !sh) {
+        log_error ("Neither file handle nor string input provided.");
+        return VOLK_VALUE_ERR;
+    }
 
-    VOLK_TTLParserState *state = malloc (sizeof (*state));
+    VOLK_TTLParserState *state = calloc (1, sizeof (*state));
     if (UNLIKELY (!state)) return VOLK_MEM_ERR;
-    state->base = NULL;
-    state->ct = 0;
     state->rc = VOLK_NORESULT;
 
     ParseIterator parse_it;
-    parse_init (&parse_it, fh);
+    parse_init (&parse_it, fh, sh);
 
     void *parser = TTLParseAlloc (malloc);
 
@@ -434,6 +500,7 @@ finally: ;
     VOLK_rc rc = state->rc;
     LOG_TRACE("rc is %d", rc);
 
+    free (parse_it.buf);
     TTLParseFree (parser, free);
 
     VOLK_graph_add_done (state->it);

+ 157 - 80
src/codec/parser_nt.c

@@ -1,9 +1,23 @@
-/* Generated by re2c 4.1 on Sun Aug 17 18:56:10 2025 */
+/* Generated by re2c 4.1 on Fri Aug 22 19:43:48 2025 */
 #line 1 "lexer_nt.re"
 #include "volksdata/codec/parser_nt.h"
 #include "volksdata/codec/tokens_nt.h"
-
-
+//#include "volksdata/codec/parser_common.h"
+
+
+/** BEGIN duplicate section
+ * This section is bit-by-bit identical in NT and TTL lexers. The copy in
+ * include/volksdata/codec/parser_common.h should be used, but some re2c tags
+ * are not being parsed in that location.
+ */
+
+/** @brief TTL is UTF-8 encoded.
+ *
+ * @sa https://www.w3.org/TeamSubmission/turtle/#sec-grammar
+ *
+ * `char` should be considered to be UTF-8 throughout this library, however,
+ * setting YYCTYPE to char generates case labels outside of the char range.
+ */
 #define YYCTYPE     uint8_t
 #define YYCURSOR    it->cur
 #define YYMARKER    it->mar
@@ -12,70 +26,124 @@
 
 
 typedef struct {
-    FILE *          fh;                 // Input file handle.
-    YYCTYPE         buf[CHUNK_SIZE],    // Start of buffer.
-            *       lim,                // Position after the last available
-                                        //   input character (YYLIMIT).
-            *       cur,                // Next input character to be read
-                                        //   (YYCURSOR)
-            *       mar,                // Most recent match (YYMARKER)
-            *       tok,                // Start of current token.
-            *       bol;                // Address of the beginning of the
-                                        //   current line (for debugging).
-    unsigned        line;               // Current line no. (for debugging).
-    unsigned        ct;                 // Number of parsed triples.
-    bool            eof;                // if we have reached EOF.
+    FILE          * fh;         ///< Input file handle.
+    const char    * sh;         ///< Input string. Exclusive with fh.
+    size_t          buf_size;   ///< Initial allocation for buffer.
+    YYCTYPE       * buf,        ///< Start of buffer.
+                  * lim,        ///< Position after the last available
+                                ///<   input character (YYLIMIT).
+                  * cur,        ///< Next input character to be read (YYCURSOR)
+                  * mar,        ///< Most recent match (YYMARKER)
+                  * tok,        ///< Start of current token.
+                  * bol;        ///< Address of the beginning of the
+                                ///<   current line (for debugging).
+    unsigned        line;       ///< Current line no. (for debugging).
+    unsigned        ct;         ///< Number of statements parsed.
+    bool            eof;        ///< if we have reached EOF.
     
-#line 30 "parser_nt.c"
+#line 45 "parser_nt.c"
 YYCTYPE *yyt1;YYCTYPE *yyt2;YYCTYPE *yyt3;
-#line 26 "lexer_nt.re"
+#line 41 "lexer_nt.re"
 
 } ParseIterator;
 
 
-static int fill(ParseIterator *it)
+static int fill(ParseIterator *it);
+
+
+/** @brief Initialize parser.
+ *
+ * @param[in] it iterator handle to be initialized.
+ *
+ * @param[in] fh Open file handle to read from. This is exclusive with sh. If
+ *  both fh and sh are provided, fh has precedence.
+ *
+ * @param[in] sh String to read from. This is exclusive with fh.
+ */
+static void parse_init (ParseIterator *it, FILE *fh, const char *sh)
 {
-    if (it->eof) {
-        return 1;
+    if(fh) {
+        // Stream handling. It engages YYFILL and reads by chunks.
+        it->fh = fh;
+        it->sh = NULL;
+        it->buf_size = CHUNK_SIZE;
+        it->buf = malloc(it->buf_size);
+        if (!it->buf) log_error ("Error allocating lexer buffer.");
+        it->cur = it->mar = it->tok = it->lim = it->buf + it->buf_size - 1;
+        it->bol = it->buf;
+        it->eof = false;
+        it->lim[0] = 0;
+    } else {
+        // String handling. Uses the provided string as the buffer.
+        it->fh = NULL;
+        it->sh = sh;
+        it->buf_size = strlen(sh) + 1;
+        it->buf = NULL;
+        it->cur = it->tok = (YYCTYPE*)it->sh;
+        it->lim = it->mar = it->cur + it->buf_size - 1;
+        it->bol = it->cur;
+        it->eof = true;
     }
-    const size_t shift = it->tok - it->buf;
-    if (shift < 1) {
-        return 2;
-    }
-    LOG_DEBUG("Shifting bytes: %lu", shift);
-    memmove(it->buf, it->tok, it->lim - it->tok);
-    it->lim -= shift;
-    it->cur -= shift;
-    it->mar -= shift;
-    it->tok -= shift;
-    it->lim += fread(it->lim, 1, shift, it->fh);
+    it->line = 1;
+    it->ct = 0;
     
-#line 54 "parser_nt.c"
-if (it->yyt1) it->yyt1 -= shift; if (it->yyt2) it->yyt2 -= shift; if (it->yyt3) it->yyt3 -= shift; 
-#line 46 "lexer_nt.re"
+#line 91 "parser_nt.c"
+it->yyt1 = NULL; it->yyt2 = NULL; it->yyt3 = NULL; 
+#line 83 "lexer_nt.re"
 
-    it->lim[0] = 0;
-    it->eof |= it->lim < it->buf + CHUNK_SIZE - 1;
-    return 0;
 }
 
 
-static void parse_init(ParseIterator *it, FILE *fh)
+int
+fill(ParseIterator *it)
 {
-    it->fh = fh;
-    it->cur = it->mar = it->tok = it->lim = it->buf + CHUNK_SIZE - 1;
-    it->line = 1;
-    it->bol = it->buf;
-    it->ct = 0;
-    it->eof = 0;
+    log_debug ("Filling codec buffer @ %p.", it->buf);
+    if (it->eof) return 1;
+
+    size_t shift = it->tok - it->buf;
+    size_t used = it->lim - it->tok;
+
+    // If buffer is too small for the lexeme, double the capacity.
+    if (shift < 1) {
+        YYCTYPE *old_buf = it->buf;
+        shift += it->buf_size;
+        it->buf_size *= 2;
+        LOG_DEBUG ("Reallocating buffer to %zu bytes.", it->buf_size);
+        it->buf = realloc (it->buf, it->buf_size);
+        if (!it->buf) {
+            log_error ("Memory allocation error.");
+            return -1;
+        }
+        // Move all relative points if address changed.
+        size_t reloc_off = it->buf - old_buf;
+        it->cur += reloc_off;
+        it->tok += reloc_off;
+        it->lim += reloc_off;
+        it->mar += reloc_off;
+    } else {
+        LOG_DEBUG("Shifting bytes: %zu", shift);
+        memmove (it->buf, it->tok, used);
+        LOG_TRACE ("Limit offset before reading data: %zu", it->lim - it->tok);
+        it->lim -= shift;
+        it->cur -= shift;
+        it->mar -= shift;
+        it->tok -= shift;
+    }
+    it->lim += fread (it->lim, 1, it->buf_size - used - 1, it->fh);
     
-#line 73 "parser_nt.c"
-it->yyt1 = NULL; it->yyt2 = NULL; it->yyt3 = NULL; 
-#line 61 "lexer_nt.re"
+#line 135 "parser_nt.c"
+if (it->yyt1) it->yyt1 -= shift; if (it->yyt2) it->yyt2 -= shift; if (it->yyt3) it->yyt3 -= shift; 
+#line 123 "lexer_nt.re"
 
-    fill (it);
+    LOG_TRACE ("Cursor offset from last token: %zu", it->cur - it->tok);
+    LOG_TRACE ("Limit offset from last token: %zu", it->lim - it->tok);
+    it->lim[0] = 0;
+    it->eof = it->lim < it->buf + it->buf_size - 1;
+    return 0;
 }
 
+/** END duplicate section */
+
 
 // Parser interface. Required here to silence linters.
 void *NTParseAlloc();
@@ -90,6 +158,9 @@ void NTParseTrace();
 static int lex (ParseIterator *it, VOLK_Term **term)
 {
     const YYCTYPE *lit_data_e, *dtype_s, *lang_s;
+    //(void) lit_data_e;
+    //(void) dtype_s;
+    //(void) lang_s;
 
 loop:
 
@@ -98,7 +169,7 @@ loop:
     *term = NULL;
 
     
-#line 102 "parser_nt.c"
+#line 173 "parser_nt.c"
 {
 	YYCTYPE yych;
 	unsigned int yyaccept = 0;
@@ -124,15 +195,15 @@ yyFillLabel0:
 yy1:
 	++YYCURSOR;
 yy2:
-#line 218 "lexer_nt.re"
+#line 289 "lexer_nt.re"
 	{
-        LOG_DEBUG(
-            "Invalid token @ %lu: %s (\\x%x)",
+        log_error (
+            "Invalid token @ %p: %s (\\x%x)",
             YYCURSOR - it->buf - 1, it->tok, *it->tok);
 
         return -1;
     }
-#line 136 "parser_nt.c"
+#line 207 "parser_nt.c"
 yy3:
 	++YYCURSOR;
 yyFillLabel1:
@@ -147,13 +218,13 @@ yyFillLabel1:
 			goto yy4;
 	}
 yy4:
-#line 201 "lexer_nt.re"
+#line 272 "lexer_nt.re"
 	{
         LOG_DEBUG("Separator.");
 
         return T_WS;
     }
-#line 157 "parser_nt.c"
+#line 228 "parser_nt.c"
 yy5:
 	++YYCURSOR;
 yyFillLabel2:
@@ -169,14 +240,14 @@ yyFillLabel2:
 			goto yy6;
 	}
 yy6:
-#line 117 "lexer_nt.re"
+#line 188 "lexer_nt.re"
 	{
         it->line ++;
         it->bol = YYCURSOR;
         LOG_DEBUG("New line: #%u.", it->line);
         return T_EOL;
     }
-#line 180 "parser_nt.c"
+#line 251 "parser_nt.c"
 yy7:
 	yyaccept = 0;
 	YYMARKER = ++YYCURSOR;
@@ -218,7 +289,7 @@ yyFillLabel4:
 		default: goto yy9;
 	}
 yy9:
-#line 207 "lexer_nt.re"
+#line 278 "lexer_nt.re"
 	{
         size_t size = YYCURSOR - it->tok + 1;
         YYCTYPE *data = malloc (size);
@@ -229,17 +300,17 @@ yy9:
 
         goto loop;
     }
-#line 233 "parser_nt.c"
+#line 304 "parser_nt.c"
 yy10:
 	++YYCURSOR;
-#line 194 "lexer_nt.re"
+#line 265 "lexer_nt.re"
 	{
         LOG_DEBUG("End of triple.");
         it->ct ++;
 
         return T_DOT;
     }
-#line 243 "parser_nt.c"
+#line 314 "parser_nt.c"
 yy11:
 	yyaccept = 0;
 	YYMARKER = ++YYCURSOR;
@@ -348,7 +419,7 @@ yy17:
 	lit_data_e = it->yyt1;
 	dtype_s = it->yyt2;
 	lang_s = it->yyt3;
-#line 141 "lexer_nt.re"
+#line 212 "lexer_nt.re"
 	{
         // Only unescape Unicode from data.
         size_t size = lit_data_e - it->tok - 2;
@@ -386,10 +457,10 @@ yy17:
         free (data);
         free (metadata);
 
-        if (!UNLIKELY (term)) return -1;
+        if (UNLIKELY (!term)) return -1;
         return T_LITERAL;
     }
-#line 393 "parser_nt.c"
+#line 464 "parser_nt.c"
 yy18:
 	++YYCURSOR;
 yyFillLabel9:
@@ -585,7 +656,7 @@ yy32:
 	}
 yy33:
 	++YYCURSOR;
-#line 129 "lexer_nt.re"
+#line 200 "lexer_nt.re"
 	{
         YYCTYPE *data = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
 
@@ -594,10 +665,10 @@ yy33:
         *term = VOLK_iriref_new ((char*)data);
         free (data);
 
-        if (!UNLIKELY (term)) return -1;
+        if (UNLIKELY (!term)) return -1;
         return T_IRIREF;
     }
-#line 601 "parser_nt.c"
+#line 672 "parser_nt.c"
 yy34:
 	++YYCURSOR;
 yyFillLabel23:
@@ -840,7 +911,7 @@ yy50:
 			goto yy51;
 	}
 yy51:
-#line 182 "lexer_nt.re"
+#line 253 "lexer_nt.re"
 	{
         YYCTYPE *data = unescape_unicode (it->tok + 2, YYCURSOR - it->tok - 2);
 
@@ -849,10 +920,10 @@ yy51:
         *term = VOLK_term_new (VOLK_TERM_BNODE, (char*)data, NULL);
         free (data);
 
-        if (!UNLIKELY (term)) return -1;
+        if (UNLIKELY (!term)) return -1;
         return T_BNODE;
     }
-#line 856 "parser_nt.c"
+#line 927 "parser_nt.c"
 yy52:
 	++YYCURSOR;
 yyFillLabel39:
@@ -1654,14 +1725,14 @@ yyFillLabel95:
 			goto yy15;
 	}
 yy110:
-#line 124 "lexer_nt.re"
+#line 195 "lexer_nt.re"
 	{
         LOG_DEBUG("End of buffer.");
         return T_EOF;
     }
-#line 1663 "parser_nt.c"
+#line 1734 "parser_nt.c"
 }
-#line 226 "lexer_nt.re"
+#line 297 "lexer_nt.re"
 
 }
 
@@ -1669,14 +1740,12 @@ yy110:
 VOLK_rc
 VOLK_nt_parse_term (const char *rep, VOLK_Term **term)
 {
-    FILE *fh = fmemopen ((void *)rep, strlen (rep), "r");
-
     ParseIterator it;
-    parse_init (&it, fh);
+    parse_init (&it, NULL, rep);
 
     int ttype = lex (&it, term);
 
-    fclose (fh);
+    free (it.buf);
 
     switch (ttype) {
         case T_IRIREF:
@@ -1689,13 +1758,19 @@ VOLK_nt_parse_term (const char *rep, VOLK_Term **term)
 }
 
 VOLK_rc
-VOLK_nt_parse_doc (FILE *fh, VOLK_Graph **gr_p, size_t *ct, char **err_p)
+VOLK_nt_parse_doc (
+        FILE *fh, const char *sh, VOLK_Graph **gr_p, size_t *ct, char **err_p)
 {
     *err_p = NULL;
     *gr_p = NULL;
 
+    if (!fh && !sh) {
+        log_error ("Neither file handle nor string input provided.");
+        return VOLK_VALUE_ERR;
+    }
+
     ParseIterator parse_it;
-    parse_init (&parse_it, fh);
+    parse_init (&parse_it, fh, sh);
 
 #ifdef DEBUG
     NTParseTrace (stdout, "NT Parser > ");
@@ -1758,6 +1833,8 @@ finally: ;
     NTParse (parser, 0, NULL, it);
     NTParseFree (parser, free);
 
+    free (parse_it.buf);
+
     VOLK_graph_add_done (it);
     VOLK_term_free (term);
 

文件差異過大導致無法顯示
+ 257 - 199
src/codec/parser_ttl.c


+ 98 - 16
src/core.c

@@ -1,6 +1,7 @@
-#define _XOPEN_SOURCE 500
 #include <errno.h>
-#include <ftw.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <unistd.h>
 #include <string.h>
 
 #include "lmdb.h"
@@ -87,26 +88,107 @@ finally:
 }
 
 
-int
-unlink_cb(
-        const char *fpath, const struct stat *sb, int typeflag,
-        struct FTW *ftwbuf)
+char *
+strndup (const char *src, size_t max)
 {
-    (void) sb;
-    (void) typeflag;
-    (void) ftwbuf;
+    size_t len = strlen (src);
+    if (len > max) len = max;
+
+    char *dup;
+    dup = malloc (len + 1);
+    if (dup) {
+        memcpy (dup, src, len);
+        dup[len] = '\0';
+    }
+
+    return dup;
+}
 
-    LOG_DEBUG("Removing %s", fpath);
-    int rv = remove(fpath);
 
-    if (rv)
-        perror(fpath);
+char *
+strdup (const char *src)
+{
+   char *dup;
+   dup = malloc (strlen (src) + 1);
+   if (dup) strcpy(dup, src);
 
-    return rv;
+   return dup;
 }
 
-int rm_r(const char *path)
-{ return nftw(path, unlink_cb, 64, FTW_DEPTH | FTW_PHYS); }
+
+/** @brief Remove a directory recursively (POSIX compatible).
+ *
+ * Adapted from
+ * https://stackoverflow.com/questions/5467725/how-to-delete-a-directory-and-its-contents-in-posix-c/42596507#42596507
+ */
+VOLK_rc rm_r (const char *path)
+{
+    size_t path_len;
+    char *full_path;
+    DIR *dir;
+    struct stat stat_path, stat_entry;
+    struct dirent *entry;
+
+    // stat for the path
+    stat(path, &stat_path);
+
+    // if path does not exists or is not dir - exit with status -1
+    if (S_ISDIR(stat_path.st_mode) == 0) {
+        log_error ("%s: %s\n", "Is not directory", path);
+        return VOLK_IO_ERR;
+    }
+
+    // if not possible to read the directory for this user
+    if ((dir = opendir(path)) == NULL) {
+        log_error ("%s: %s\n", "Can`t open directory", path);
+        return VOLK_IO_ERR;
+    }
+
+    // the length of the path
+    path_len = strlen(path);
+
+    // iteration through entries in the directory
+    while ((entry = readdir(dir)) != NULL) {
+
+        // skip entries "." and ".."
+        if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+            continue;
+
+        // determine a full path of an entry
+        full_path = calloc(
+                path_len + 1 + strlen(entry->d_name) + 1, sizeof(char));
+        strcpy(full_path, path);
+        strcat(full_path, "/");
+        strcat(full_path, entry->d_name);
+
+        // stat for the entry
+        stat(full_path, &stat_entry);
+
+        // recursively remove a nested directory
+        if (S_ISDIR(stat_entry.st_mode) != 0) {
+            rm_r (full_path);
+            free (full_path);
+            continue;
+        }
+
+        // remove a file object
+        if (unlink(full_path) == 0)
+            LOG_DEBUG ("Removed a file:\t%s\n", full_path);
+        else
+            log_error ("Can't remove a file:\t%s\n", full_path);
+        free(full_path);
+    }
+
+    // remove the devastated directory and close the object of it
+    if (rmdir(path) == 0)
+        LOG_DEBUG ("Removed a directory:\t%s\n", path);
+    else
+        log_error ("Can't remove a directory:\t%s\n", path);
+
+    closedir(dir);
+
+    return VOLK_OK;
+}
 
 
 const char *

+ 1 - 1
src/term.c

@@ -799,7 +799,7 @@ term_init (
                 log_warn (
                         "Characters %s are not valid in a URI. Got: %s\n",
                         invalid_uri_chars, fquri);
-#if 1
+#if 0
                 // TODO This causes W3C TTL test #29 to fail. Remove?
                 return VOLK_VALUE_ERR;
 #endif

+ 0 - 2
test.c

@@ -13,9 +13,7 @@
 int main() {
 
     // Set env variable to test path.
-    putenv ("VOLK_MDB_STORE_PATH=" TEST_STORE_PATH);
     // Clear out database from previous test.
-    rm_r (TEST_STORE_PATH);
 
     clock_t start, end;
     double wallclock;

+ 69 - 7
test/test_codec_nt.c

@@ -1,3 +1,5 @@
+#include <unistd.h>
+
 #include "volksdata/codec/codec_nt.h"
 #include "test.h"
 
@@ -210,14 +212,11 @@ test_decode_nt_term()
 int
 test_decode_nt_graph()
 {
-    FILE *input = fmemopen ((void *)start_nt_doc, strlen (start_nt_doc), "r");
 
     VOLK_Graph *gr;
     size_t ct;
     char *err;
-    EXPECT_PASS (codec.decode_graph (input, &gr, &ct, &err));
-
-    fclose (input);
+    EXPECT_PASS (codec.decode_graph (NULL, start_nt_doc, &gr, &ct, &err));
 
     ASSERT (err == NULL, "Error string is not NULL!");
 
@@ -250,16 +249,78 @@ test_decode_nt_graph()
 }
 
 
+int
+test_decode_nt_file()
+{
+    VOLK_Graph *gr;
+    size_t ct;
+    char *err;
+    FILE *fh = fopen ("test/assets/test2.nt", "r");
+
+    EXPECT_PASS (codec.decode_graph (fh, NULL, &gr, &ct, &err));
+
+    EXPECT_INT_EQ (VOLK_graph_size (gr), 7);
+    EXPECT_INT_EQ (ct, 8);
+
+    VOLK_graph_free (gr);
+    fclose(fh);
+
+    return 0;
+}
+
+
+#define LARGE_LIT_SIZE CHUNK_SIZE * 2 + 2  // More than 2 buffer pages.
+int
+test_decode_large_lit_file()
+{
+    VOLK_Graph *gr;
+    size_t ct;
+    char *err;
+    const char *fpath = "/tmp/test_large_lit.nt";
+    FILE *fh = fopen (fpath, "w");
+
+    char *large_lit = malloc(LARGE_LIT_SIZE + 1);
+    for (unsigned i = 0; i < LARGE_LIT_SIZE; i++)
+        large_lit[i] = rand() % 25 + 65;  // A-Z
+    large_lit[LARGE_LIT_SIZE] = '\0';
+
+    fprintf (fh, "<urn:s:1> <urn:p:1> \"");
+    fprintf(fh, large_lit);
+    fprintf(fh, "\" .\n");
+    fclose(fh);
+
+    fh = fopen (fpath, "r");
+    EXPECT_PASS (codec.decode_graph (fh, NULL, &gr, &ct, &err));
+
+    EXPECT_INT_EQ (VOLK_graph_size (gr), 1);
+    EXPECT_INT_EQ (ct, 1);
+
+    VOLK_GraphIterator *it = VOLK_graph_lookup (gr, NULL, NULL, NULL, NULL);
+    VOLK_Triple *spo;
+    EXPECT_PASS (VOLK_graph_iter_next (it, &spo));
+    VOLK_graph_iter_free (it);
+
+    EXPECT_STR_EQ (spo->o->data, large_lit);
+
+    VOLK_graph_free (gr);
+    VOLK_triple_free (spo);
+    fclose(fh);
+    unlink (fpath);
+    free (large_lit);
+
+    return 0;
+}
+
+
 int
 test_decode_nt_bad_graph()
 {
     log_info ("testing illegal NT document.");
-    FILE *input = fmemopen ((void *)bad_nt_doc, strlen (bad_nt_doc), "r");
 
     VOLK_Graph *gr;
     size_t ct;
     char *err;
-    VOLK_rc rc = codec.decode_graph (input, &gr, &ct, &err);
+    VOLK_rc rc = codec.decode_graph (NULL, bad_nt_doc, &gr, &ct, &err);
     EXPECT_INT_EQ (rc, VOLK_PARSE_ERR);
 
     log_info ("Error: %s", err);
@@ -268,7 +329,6 @@ test_decode_nt_bad_graph()
     ASSERT (strstr (err, "character 16") != NULL, "Wrong error char report!");
 
     free (err);
-    fclose (input);
     VOLK_graph_free (gr);
 
     return 0;
@@ -286,6 +346,8 @@ int codec_nt_tests()
     RUN (test_encode_nt_graph);
     RUN (test_decode_nt_term);
     RUN (test_decode_nt_graph);
+    RUN (test_decode_nt_file);
+    RUN (test_decode_large_lit_file);
     RUN (test_decode_nt_bad_graph);
 
     free_terms (terms);

+ 7 - 7
test/test_codec_ttl.c

@@ -59,9 +59,9 @@ test_w3c_pos()
     char ch;
 
     for (int i = 0; i <= W3C_POS_TEST_CT; i++) {
-#if 1
-        // Tests 14÷16 with 10K triples is quite long. Skip them temporarily.
-        // TODO use a switch based on env var.
+#ifndef VOLK_TEST_LARGE
+        // Tests 14÷16 with 10K triples is quite long. Skip them unless
+        // explicitly requested.
         if (i > 12 && i <17) continue;
 #endif
         size_t nt_ct = 0;
@@ -77,7 +77,7 @@ test_w3c_pos()
             if (ch == '\n') nt_ct++;
         }
 
-        EXPECT_PASS (codec.decode_graph (test_stream, &gr, &ct, &err));
+        EXPECT_PASS (codec.decode_graph (test_stream, NULL, &gr, &ct, &err));
         EXPECT_INT_EQ (VOLK_graph_size (gr), nt_ct); // Just count NT lines.
         VOLK_graph_free (gr);
         fclose (test_stream);
@@ -103,7 +103,7 @@ test_w3c_neg()
         FILE *test_stream = fopen (test_fname, "r");
         log_info ("Testing %s", test_fname);
 
-        VOLK_rc rc = codec.decode_graph (test_stream, &gr, &ct, &err);
+        VOLK_rc rc = codec.decode_graph (test_stream, NULL, &gr, &ct, &err);
         log_info ("rc: %d", rc);
         ASSERT (rc == VOLK_PARSE_ERR, "Bad test did not raise a parse error!");
         fclose (test_stream);
@@ -126,8 +126,8 @@ int codec_ttl_tests()
     RUN (test_decode_nt_bad_graph);
     // TODO temporarily disabled; full W3C test suite at
     // https://w3c.github.io/rdf-tests/rdf/ shall replace these.
-    //RUN (test_w3c_pos);
-    //RUN (test_w3c_neg);
+    RUN (test_w3c_pos);
+    RUN (test_w3c_neg);
 
     free_terms(terms);
     for (int i = 0; i < TRP_CT; i++)

+ 1 - 1
test/test_store_mdb.c

@@ -18,7 +18,7 @@ static int test_ctx_switch()
 
     // Create enough triples to test a multi-page copy of triple data.
     // Add small buffer (4) to create a 3rd page.
-    size_t num_trp = (getpagesize() * 2 / TRP_KLEN) + 4;
+    size_t num_trp = (sysconf(_SC_PAGESIZE) * 2 / TRP_KLEN) + 4;
     VOLK_BufferTriple **tdata = malloc (num_trp * sizeof (*tdata));
     VOLK_Triple *trp = VOLK_triple_new (
         VOLK_iriref_new ("urn:s:1"),

部分文件因文件數量過多而無法顯示