Browse Source

Initial TTL codec.

Stefano Cossu 2 years ago
parent
commit
6c26c06563

+ 4 - 4
Makefile

@@ -29,8 +29,8 @@ DBG_CFLAGS = -Itest -O0 -g3 -DDEBUG
 LDFLAGS = -L. -L$(libdir) -llmdb -lxxhash -luuid
 
 CODEC_DIR = src/codec
-CODEC_SRC = $(wildcard src/codec/*_grammar.c) \
-	  		$(wildcard src/codec/*_parser.c)
+CODEC_SRC = $(wildcard src/codec/grammar_*.c) \
+	  		$(wildcard src/codec/parser_*.c)
 CODEC_OBJ = $(CODEC_SRC:.c=.o)
 CODEC_DBG_OBJ = $(CODEC_SRC:.c=_dbg.o)
 
@@ -116,12 +116,12 @@ $(CODEC_OBJ): $(CODEC_SRC)
 
 
 # Parser C sources.
-%_parser.c: %_lexer.re %_grammar.c
+parser_%.c: lexer_%.re grammar_%.c
 	$(LEXER) $< -o $@ -T --case-ranges
 
 
 # Parser generators.
-%_grammar.c: %_grammar.y
+grammar_%.c: grammar_%.y
 	$(PARSER) $< -q -m -T$(CODEC_DIR)/lempar.c -d$(CODEC_DIR)
 
 

+ 0 - 160
README.md

@@ -1,160 +0,0 @@
-# `lsup_rdf`
-
-**This project is work in progress.**
-
-Embedded RDF (and maybe later, generic graph) store and manipulation library.
-
-## Purpose
-
-The goal of this library is to provide efficient and compact handling of RDF
-data. At least a complete C API and Python bindings are planned.
-
-This library can be thought of as SQLite or BerkeleyDB for graphs. It can be
-embedded directly in a program and store persistent data without the need of
-running a server. In addition, `lsup_rdf` can perform in-memory graph
-operations such as validation, de/serialization, boolean operations, lookup,
-etc.
-
-Two graph back ends are available: a memory one based on hash maps and a
-disk-based one based on [LMDB](https://symas.com/lmdb/), an extremely fast and
-compact embedded key-store value. Graphs can be created independently with
-either back end within the same program. Triples in the persistent back end are
-fully indexed and optimized for a balance of lookup speed, data compactness,
-and write performance (in order of importance).
-
-This library was initially meant to replace RDFLib dependency and Cython code
-in [Lakesuperior](https://notabug.org/scossu/lakesuperior) in an effort to
-reduce code clutter and speed up RDF handling; it is now a project for an
-independent RDF library, but unless the contributor base expands, it will
-remain focused on serving Lakesuperior.
-
-
-## Development Status
-
-**Alpha.** The API structure is not yet stable and may change radically. The
-code may not compile, or throw a fit when run. Testing is minimal. At the
-moment this project is only intended for curious developers and researchers.
-
-This is also my first stab at writing a C library (coming from Python) and an
-unpaid fun project, so don't be surprised if you find some gross stuff.
-
-
-## Road Map
-
-### In Scope – Short Term
-
-The short-term goal is to support usage in Lakesuperior and a workable set
-of features as a standalone library:
-
-- Handling of graphs, triples, terms
-- Memory- and disk-backed (persistent) graph storage
-- Contexts (disk-backed only)
-- Handling of blank nodes
-- Namespace prefixes
-- Validation of literal and URI terms
-- Validation of RDF triples
-- Fast graph lookup using matching patterns
-- Graph boolean operations
-- Serialization and de-serialization to/from N-Triples and N-Quads
-- Serialization and de-serialization to/from Turtle and TriG
-- Compile-time configuration of max graph size (efficiency vs. capacity)
-- Python bindings
-- Basic command line utilities
-
-### Possibly In scope – Long Term
-
-- Binary serialization and hashing of graphs
-- Binary protocol for synchronizing remote replicas
-- Backend for massive distributed storage (possibly Ceph)
-- Lua bindings
-
-### Likely Out of Scope
-
-(Unless provided and maintained by external contributors)
-
-- C++ bindings
-- JSON-LD de/serialization
-- SPARQL queries (We'll see... Will definitely need help)
-
-## Building
-
-### Requirements
-
-- It is recommended to build and run LSUP_RDF on a Linux system. No other
-  OS has been tested so far.
-- A C compiler. This has been only tested with `gcc` so far.
-- [re2c](https://re2c.org/) and [Lemon](https://www.sqlite.org/lemon.html) to
-  build the RDF language parsers.
-- [cinclude2dot](https://www.flourish.org/cinclude2dot) and
-  [Graphviz](https://graphviz.org/) for generating dependency graph (optional).
-
-
-### `make` commands
-
-The default `make` command compiles the library. Enter `make help` to get an
-overview of the other available commands.
-
-`make install` installs libraries and headers in the directories set by the
-environment variable `$PREFIX`. If this is unset, the default `/usr/local`
-prefix is used.
-
-Options to compile with debug symbols are available.
-
-
-### Compile-Time Constants
-
-`DEBUG`: Set debug mode: memory map is at reduced size, logging is forced to
-TRACE level, etc.
-
-`LSUP_RDF_STREAM_CHUNK_SIZE`: Size of RDF decoding buffer, i.e., maximum size
-of a chunk of RDF data fed to the parser when decoding a RDF file into a graph.
-This should be larger than the maximum expected size of a single term in your
-RDF source. The default value is 8192, which is mildly conservative. If you
-experience parsing errors on decoding, and they happen to be on a term such a
-very long string literal, try recompiling the library with a larger value.
-
-## Embedding
-
-The generated `liblsuprdf.so` and `liblsuprdf.a` libraries can be linked
-dynamically or statically to your code. Only the `lsup_rdf.h` header, which
-recursively includes other headers in the `include` directory, needs to be
-`#include`d in the embedding code.
-
-Environment variables and/or compiler options might have to be set in order to
-find the dynamic libraries and headers in their install locations.
-
-For compilation and linking examples, refer to `test`, `memtest`, `perftest`
-and other actions in the current Makefile.
-
-
-### Environment Variables
-
-`LSUP_MDB_STORE_PATH`: The file path for the persistent store back end. For
-production use it is strongly recommended to set this to a permanent location
-on the fastest storage volume available. If unset, the current directory will
-be used. The directory must exist.
-
-`LSUP_LOGLEVEL`: A number between 0 and 5, corresponding to:
-
-- 0: `TRACE`
-- 1: `DEBUG`
-- 2: `INFO`
-- 3: `WARN`
-- 4: `ERROR`
-- 5: `FATAL`
-
-If unspecified, it is set to 3.
-
-`LSUP_MDB_MAPSIZE` Virtual memory map size. It is recommended to leave this
-alone. By default, it is set to 1Tb for 64-bit systems and 4Gb for 32-bit
-systems. The map size by itself does not use up any extra resources.
-
-
-### C API Documentation
-
-*TODO*  Almost all header files are documented. Need a doc generator.
-
-
-### Python API Documentation
-
-*TODO*

+ 0 - 47
TODO.md

@@ -1,47 +0,0 @@
-# Quick TODO list
-
-*P* = pending; *W* = working on it; *D* = done.
-
-## Critical for MVP
-
-- *D* LMDB back end
-- *D* Hash table back end
-- *D* Namespace manager
-- *D* N3 serialization / deserialization
-- *D* Environment
-- *D* Better error handling
-- *D* Logging
-- *D* Store graph
-- *D* Python bindings
-    - *D* Basic module framework
-    - *D* term, triple, graph modules
-    - *D* Codec integration
-    - *D* Graph remove and lookup ops
-    - *D* Namespace module
-    - *D* Tests (basic)
-    - *D* Subclass term types
-- *D* Namespaced IRIs
-- *D* Relative IRIs
-- *D* Flexible store interface
-- *D* Transaction control
-- *W* Turtle serialization / deserialization
-- *P* Full UTF-8 support
-- *P* Extended tests
-    - *P* C API
-    - *P* Python API
-
-
-## Non-critical for MVP
-
-- Term and triple validation
-- Enhanced graph operations
-    - Extract unique terms and 2-term tuples
-- NQ codec
-- TriG codec
-- UTF-16 support
-
-
-## Long-term
-
-- Generic graph (non RDF constrained)
-- Lua bindings

+ 4 - 4
include/codec_ttl.h

@@ -1,10 +1,10 @@
-#ifndef _LSUP_CODEC_NT_H
-#define _LSUP_CODEC_NT_H
+#ifndef _LSUP_CODEC_TTL_H
+#define _LSUP_CODEC_TTL_H
 
 #include "codec_base.h"
 
-/** @brief N-Triples codec.
+/** @brief Turtle codec.
  */
-extern const LSUP_Codec nt_codec;
+extern const LSUP_Codec ttl_codec;
 
 #endif

+ 6 - 0
include/graph.h

@@ -303,6 +303,12 @@ LSUP_rc
 LSUP_graph_iter_next (LSUP_GraphIterator *it, LSUP_Triple *spo);
 
 
+/** @brief Return the graph related to an iterator.
+ */
+const LSUP_Graph *
+LSUP_graph_iter_graph (LSUP_GraphIterator *it);
+
+
 /** @brief Free a graph iterator.
  *
  * DO NOT USE with iterators obtained with #LSUP_graph_add_init(). Use

+ 0 - 0
include/nt_parser.h → include/parser_nt.h


+ 38 - 0
include/parser_ttl.h

@@ -0,0 +1,38 @@
+#ifndef _LSUP_TTL_PARSER_H
+#define _LSUP_TTL_PARSER_H
+
+#include "graph.h"
+
+/** @brief Parse a single term.
+ *
+ * @param[in] rep N-Triples representation as a character string.
+ *
+ * @param[in] map Unused: there is no namespace prefixing in N-triples. Kept
+ *  for interface compatibility. May be NULL.
+ *
+ * @param[out] term Term to be created from the string.
+ *
+ * @return LSUP_OK on success, LSUP_VALUE_ERR if the string is not valid
+ *  N-Triples syntax for a IRI ref, Literal or BNode.
+ */
+LSUP_rc
+LSUP_ttl_parse_term (const char *rep, const LSUP_NSMap *map, LSUP_Term **term);
+
+
+/** @brief Parse a N-Triples document from a file handle.
+ *
+ * @param[in] doc N-Triples document.
+ *
+ * @param[out] Pointer to a graph handle to be created. The new graph will have
+ *  a random UUID URN.
+ *
+ * @param[out] ct If not NULL it is populated with the number of triples
+ *  parsed. This may be more than the triples in the resulting graph.
+ *
+ * @return LSUP_OK on success, LSUP_VALUE_ERR if a parsing error was
+ *  encountered. TODO Add line/char info for parsing error
+ */
+LSUP_rc
+LSUP_ttl_parse_doc (FILE *stream, LSUP_Graph **gr, size_t *ct, char **err);
+
+#endif

+ 2 - 2
include/term.h

@@ -194,10 +194,10 @@ LSUP_iriref_new (const char *data, LSUP_NSMap *nsm)
  *
  * Otherwise, the resulting IRI is relative to the full root string.
  *
- * @param[in] iri Term with an IRI relative to the webroot.
- *
  * @param[in] root Root IRI that the new IRI should be relative to.
  *
+ * @param[in] iri Term with an IRI relative to the webroot.
+ *
  * @return New absolute IRI, or NULL if either term is not an IRI.
  */
 LSUP_Term *

+ 1240 - 0
src/codec/grammar_nt.c

@@ -0,0 +1,1240 @@
+/* This file is automatically generated by Lemon from input grammar
+** source file "src/codec/grammar_nt.y". */
+/** @brief Lemon parser grammar for N-Triples.
+ *
+ * The `lemon' parser generator executable must be in your PATH:
+ * https://sqlite.org/src/doc/trunk/doc/lemon.html
+ *
+ * To generate the parser, run: `lemon ${FILE}'
+ */
+
+#include "graph.h"
+#line 38 "src/codec/grammar_nt.c"
+#include "src/codec/grammar_nt.h"
+/**************** End of %include directives **********************************/
+/* These constants specify the various numeric values for terminal symbols.
+***************** Begin token definitions *************************************/
+#if INTERFACE
+#define T_EOF                             1
+#define T_DOT                             2
+#define T_IRIREF                          3
+#define T_BNODE                           4
+#define T_LITERAL                         5
+#define T_EOL                             6
+#define T_WS                              7
+#endif
+/**************** End token definitions ***************************************/
+
+/* The next sections is a series of control #defines.
+** various aspects of the generated parser.
+**    YYCODETYPE         is the data type used to store the integer codes
+**                       that represent terminal and non-terminal symbols.
+**                       "unsigned char" is used if there are fewer than
+**                       256 symbols.  Larger types otherwise.
+**    YYNOCODE           is a number of type YYCODETYPE that is not used for
+**                       any terminal or nonterminal symbol.
+**    YYFALLBACK         If defined, this indicates that one or more tokens
+**                       (also known as: "terminal symbols") have fall-back
+**                       values which should be used if the original symbol
+**                       would not parse.  This permits keywords to sometimes
+**                       be used as identifiers, for example.
+**    YYACTIONTYPE       is the data type used for "action codes" - numbers
+**                       that indicate what to do in response to the next
+**                       token.
+**    ParseTOKENTYPE     is the data type used for minor type for terminal
+**                       symbols.  Background: A "minor type" is a semantic
+**                       value associated with a terminal or non-terminal
+**                       symbols.  For example, for an "ID" terminal symbol,
+**                       the minor type might be the name of the identifier.
+**                       Each non-terminal can have a different minor type.
+**                       Terminal symbols all have the same minor type, though.
+**                       This macros defines the minor type for terminal 
+**                       symbols.
+**    YYMINORTYPE        is the data type used for all minor types.
+**                       This is typically a union of many types, one of
+**                       which is ParseTOKENTYPE.  The entry in the union
+**                       for terminal symbols is called "yy0".
+**    YYSTACKDEPTH       is the maximum depth of the parser's stack.  If
+**                       zero the stack is dynamically sized using realloc()
+**    ParseARG_SDECL     A static variable declaration for the %extra_argument
+**    ParseARG_PDECL     A parameter declaration for the %extra_argument
+**    ParseARG_PARAM     Code to pass %extra_argument as a subroutine parameter
+**    ParseARG_STORE     Code to store %extra_argument into yypParser
+**    ParseARG_FETCH     Code to extract %extra_argument from yypParser
+**    ParseCTX_*         As ParseARG_ except for %extra_context
+**    YYERRORSYMBOL      is the code number of the error symbol.  If not
+**                       defined, then do no error processing.
+**    YYNSTATE           the combined number of states.
+**    YYNRULE            the number of rules in the grammar
+**    YYNTOKEN           Number of terminal symbols
+**    YY_MAX_SHIFT       Maximum value for shift actions
+**    YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions
+**    YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions
+**    YY_ERROR_ACTION    The yy_action[] code for syntax error
+**    YY_ACCEPT_ACTION   The yy_action[] code for accept
+**    YY_NO_ACTION       The yy_action[] code for no-op
+**    YY_MIN_REDUCE      Minimum value for reduce actions
+**    YY_MAX_REDUCE      Maximum value for reduce actions
+*/
+#ifndef INTERFACE
+# define INTERFACE 1
+#endif
+/************* Begin control #defines *****************************************/
+#define YYCODETYPE unsigned char
+#define YYNOCODE 16
+#define YYACTIONTYPE unsigned char
+#if INTERFACE
+#define ParseTOKENTYPE  LSUP_Term * 
+#endif
+typedef union {
+  int yyinit;
+  ParseTOKENTYPE yy0;
+  LSUP_Triple * yy6;
+  LSUP_Term * yy10;
+  void * yy17;
+} YYMINORTYPE;
+#ifndef YYSTACKDEPTH
+#define YYSTACKDEPTH 100
+#endif
+#if INTERFACE
+#define ParseARG_SDECL  LSUP_GraphIterator *it ;
+#define ParseARG_PDECL , LSUP_GraphIterator *it 
+#define ParseARG_PARAM ,it 
+#define ParseARG_FETCH  LSUP_GraphIterator *it =yypParser->it ;
+#define ParseARG_STORE yypParser->it =it ;
+#define ParseCTX_SDECL
+#define ParseCTX_PDECL
+#define ParseCTX_PARAM
+#define ParseCTX_FETCH
+#define ParseCTX_STORE
+#endif
+#define YYNSTATE             15
+#define YYNRULE              15
+#define YYNRULE_WITH_ACTION  1
+#define YYNTOKEN             8
+#define YY_MAX_SHIFT         14
+#define YY_MIN_SHIFTREDUCE   25
+#define YY_MAX_SHIFTREDUCE   39
+#define YY_ERROR_ACTION      40
+#define YY_ACCEPT_ACTION     41
+#define YY_NO_ACTION         42
+#define YY_MIN_REDUCE        43
+#define YY_MAX_REDUCE        57
+/************* End control #defines *******************************************/
+#define YY_NLOOKAHEAD ((int)(sizeof(yy_lookahead)/sizeof(yy_lookahead[0])))
+
+/* Define the yytestcase() macro to be a no-op if is not already defined
+** otherwise.
+**
+** Applications can choose to define yytestcase() in the %include section
+** to a macro that can assist in verifying code coverage.  For production
+** code the yytestcase() macro should be turned off.  But it is useful
+** for testing.
+*/
+#ifndef yytestcase
+# define yytestcase(X)
+#endif
+
+
+/* Next are the tables used to determine what action to take based on the
+** current state and lookahead token.  These tables are used to implement
+** functions that take a state number and lookahead value and return an
+** action integer.  
+**
+** Suppose the action integer is N.  Then the action is determined as
+** follows
+**
+**   0 <= N <= YY_MAX_SHIFT             Shift N.  That is, push the lookahead
+**                                      token onto the stack and goto state N.
+**
+**   N between YY_MIN_SHIFTREDUCE       Shift to an arbitrary state then
+**     and YY_MAX_SHIFTREDUCE           reduce by rule N-YY_MIN_SHIFTREDUCE.
+**
+**   N == YY_ERROR_ACTION               A syntax error has occurred.
+**
+**   N == YY_ACCEPT_ACTION              The parser accepts its input.
+**
+**   N == YY_NO_ACTION                  No such action.  Denotes unused
+**                                      slots in the yy_action[] table.
+**
+**   N between YY_MIN_REDUCE            Reduce by rule N-YY_MIN_REDUCE
+**     and YY_MAX_REDUCE
+**
+** The action table is constructed as a single large table named yy_action[].
+** Given state S and lookahead X, the action is computed as either:
+**
+**    (A)   N = yy_action[ yy_shift_ofst[S] + X ]
+**    (B)   N = yy_default[S]
+**
+** The (A) formula is preferred.  The B formula is used instead if
+** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X.
+**
+** The formulas above are for computing the action when the lookahead is
+** a terminal symbol.  If the lookahead is a non-terminal (as occurs after
+** a reduce action) then the yy_reduce_ofst[] array is used in place of
+** the yy_shift_ofst[] array.
+**
+** The following are the tables generated in this section:
+**
+**  yy_action[]        A single table containing all actions.
+**  yy_lookahead[]     A table containing the lookahead for each entry in
+**                     yy_action.  Used to detect hash collisions.
+**  yy_shift_ofst[]    For each state, the offset into yy_action for
+**                     shifting terminals.
+**  yy_reduce_ofst[]   For each state, the offset into yy_action for
+**                     shifting non-terminals after a reduce.
+**  yy_default[]       Default action for each state.
+**
+*********** Begin parsing tables **********************************************/
+#define YY_ACTTAB_COUNT (30)
+static const YYACTIONTYPE yy_action[] = {
+ /*     0 */     4,    5,    8,   10,   41,    1,   11,    3,   33,   34,
+ /*    10 */    35,   14,   36,   39,   36,    9,   39,   39,   12,   30,
+ /*    20 */    31,   32,    3,    6,   25,   37,    2,    7,   13,   44,
+};
+static const YYCODETYPE yy_lookahead[] = {
+ /*     0 */     8,   11,    9,   14,   12,   13,   14,   15,    3,    4,
+ /*    10 */     5,    1,    6,    7,    6,    8,    7,    7,   15,    3,
+ /*    20 */     4,    3,   15,   10,    2,    6,   15,   15,   14,    0,
+ /*    30 */    16,   16,   16,   16,   16,   16,   16,   16,
+};
+#define YY_SHIFT_COUNT    (14)
+#define YY_SHIFT_MIN      (0)
+#define YY_SHIFT_MAX      (29)
+static const unsigned char yy_shift_ofst[] = {
+ /*     0 */     6,   10,    5,   16,    8,    9,    9,   18,    9,    8,
+ /*    10 */    19,   19,   22,   19,   29,
+};
+#define YY_REDUCE_COUNT (9)
+#define YY_REDUCE_MIN   (-11)
+#define YY_REDUCE_MAX   (14)
+static const signed char yy_reduce_ofst[] = {
+ /*     0 */    -8,    7,  -10,   -7,  -11,    3,   11,   13,   12,   14,
+};
+static const YYACTIONTYPE yy_default[] = {
+ /*     0 */    56,   56,   40,   40,   40,   56,   56,   40,   56,   40,
+ /*    10 */    46,   45,   40,   47,   40,
+};
+/********** End of lemon-generated parsing tables *****************************/
+
+/* The next table maps tokens (terminal symbols) into fallback tokens.  
+** If a construct like the following:
+** 
+**      %fallback ID X Y Z.
+**
+** appears in the grammar, then ID becomes a fallback token for X, Y,
+** and Z.  Whenever one of the tokens X, Y, or Z is input to the parser
+** but it does not parse, the type of the token is changed to ID and
+** the parse is retried before an error is thrown.
+**
+** This feature can be used, for example, to cause some keywords in a language
+** to revert to identifiers if they keyword does not apply in the context where
+** it appears.
+*/
+#ifdef YYFALLBACK
+static const YYCODETYPE yyFallback[] = {
+};
+#endif /* YYFALLBACK */
+
+/* The following structure represents a single element of the
+** parser's stack.  Information stored includes:
+**
+**   +  The state number for the parser at this level of the stack.
+**
+**   +  The value of the token stored at this level of the stack.
+**      (In other words, the "major" token.)
+**
+**   +  The semantic value stored at this level of the stack.  This is
+**      the information used by the action routines in the grammar.
+**      It is sometimes called the "minor" token.
+**
+** After the "shift" half of a SHIFTREDUCE action, the stateno field
+** actually contains the reduce action for the second half of the
+** SHIFTREDUCE.
+*/
+struct yyStackEntry {
+  YYACTIONTYPE stateno;  /* The state-number, or reduce action in SHIFTREDUCE */
+  YYCODETYPE major;      /* The major token value.  This is the code
+                         ** number for the token at this stack level */
+  YYMINORTYPE minor;     /* The user-supplied minor token value.  This
+                         ** is the value of the token  */
+};
+typedef struct yyStackEntry yyStackEntry;
+
+/* The state of the parser is completely contained in an instance of
+** the following structure */
+struct yyParser {
+  yyStackEntry *yytos;          /* Pointer to top element of the stack */
+#ifdef YYTRACKMAXSTACKDEPTH
+  int yyhwm;                    /* High-water mark of the stack */
+#endif
+#ifndef YYNOERRORRECOVERY
+  int yyerrcnt;                 /* Shifts left before out of the error */
+#endif
+  ParseARG_SDECL                /* A place to hold %extra_argument */
+  ParseCTX_SDECL                /* A place to hold %extra_context */
+#if YYSTACKDEPTH<=0
+  int yystksz;                  /* Current side of the stack */
+  yyStackEntry *yystack;        /* The parser's stack */
+  yyStackEntry yystk0;          /* First stack entry */
+#else
+  yyStackEntry yystack[YYSTACKDEPTH];  /* The parser's stack */
+  yyStackEntry *yystackEnd;            /* Last entry in the stack */
+#endif
+};
+typedef struct yyParser yyParser;
+
+#ifndef NDEBUG
+#include <stdio.h>
+#include <assert.h>
+static FILE *yyTraceFILE = 0;
+static char *yyTracePrompt = 0;
+#endif /* NDEBUG */
+
+#ifndef NDEBUG
+/* 
+** Turn parser tracing on by giving a stream to which to write the trace
+** and a prompt to preface each trace message.  Tracing is turned off
+** by making either argument NULL 
+**
+** Inputs:
+** <ul>
+** <li> A FILE* to which trace output should be written.
+**      If NULL, then tracing is turned off.
+** <li> A prefix string written at the beginning of every
+**      line of trace output.  If NULL, then tracing is
+**      turned off.
+** </ul>
+**
+** Outputs:
+** None.
+*/
+void ParseTrace(FILE *TraceFILE, char *zTracePrompt){
+  yyTraceFILE = TraceFILE;
+  yyTracePrompt = zTracePrompt;
+  if( yyTraceFILE==0 ) yyTracePrompt = 0;
+  else if( yyTracePrompt==0 ) yyTraceFILE = 0;
+}
+#endif /* NDEBUG */
+
+#if defined(YYCOVERAGE) || !defined(NDEBUG)
+/* For tracing shifts, the names of all terminals and nonterminals
+** are required.  The following table supplies these names */
+static const char *const yyTokenName[] = { 
+  /*    0 */ "$",
+  /*    1 */ "EOF",
+  /*    2 */ "DOT",
+  /*    3 */ "IRIREF",
+  /*    4 */ "BNODE",
+  /*    5 */ "LITERAL",
+  /*    6 */ "EOL",
+  /*    7 */ "WS",
+  /*    8 */ "triple",
+  /*    9 */ "subject",
+  /*   10 */ "predicate",
+  /*   11 */ "object",
+  /*   12 */ "ntriplesDoc",
+  /*   13 */ "triples",
+  /*   14 */ "eol",
+  /*   15 */ "ws",
+};
+#endif /* defined(YYCOVERAGE) || !defined(NDEBUG) */
+
+#ifndef NDEBUG
+/* For tracing reduce actions, the names of all rules are required.
+*/
+static const char *const yyRuleName[] = {
+ /*   0 */ "triple ::= ws subject ws predicate ws object ws DOT",
+ /*   1 */ "ntriplesDoc ::= triples EOF",
+ /*   2 */ "triples ::= eol",
+ /*   3 */ "triples ::= triple eol",
+ /*   4 */ "triples ::= triples triple eol",
+ /*   5 */ "subject ::= IRIREF",
+ /*   6 */ "subject ::= BNODE",
+ /*   7 */ "predicate ::= IRIREF",
+ /*   8 */ "object ::= IRIREF",
+ /*   9 */ "object ::= BNODE",
+ /*  10 */ "object ::= LITERAL",
+ /*  11 */ "eol ::= EOL",
+ /*  12 */ "eol ::= eol EOL",
+ /*  13 */ "ws ::=",
+ /*  14 */ "ws ::= WS",
+};
+#endif /* NDEBUG */
+
+
+#if YYSTACKDEPTH<=0
+/*
+** Try to increase the size of the parser stack.  Return the number
+** of errors.  Return 0 on success.
+*/
+static int yyGrowStack(yyParser *p){
+  int newSize;
+  int idx;
+  yyStackEntry *pNew;
+
+  newSize = p->yystksz*2 + 100;
+  idx = p->yytos ? (int)(p->yytos - p->yystack) : 0;
+  if( p->yystack==&p->yystk0 ){
+    pNew = malloc(newSize*sizeof(pNew[0]));
+    if( pNew ) pNew[0] = p->yystk0;
+  }else{
+    pNew = realloc(p->yystack, newSize*sizeof(pNew[0]));
+  }
+  if( pNew ){
+    p->yystack = pNew;
+    p->yytos = &p->yystack[idx];
+#ifndef NDEBUG
+    if( yyTraceFILE ){
+      fprintf(yyTraceFILE,"%sStack grows from %d to %d entries.\n",
+              yyTracePrompt, p->yystksz, newSize);
+    }
+#endif
+    p->yystksz = newSize;
+  }
+  return pNew==0; 
+}
+#endif
+
+/* Datatype of the argument to the memory allocated passed as the
+** second argument to ParseAlloc() below.  This can be changed by
+** putting an appropriate #define in the %include section of the input
+** grammar.
+*/
+#ifndef YYMALLOCARGTYPE
+# define YYMALLOCARGTYPE size_t
+#endif
+
+/* Initialize a new parser that has already been allocated.
+*/
+void ParseInit(void *yypRawParser ParseCTX_PDECL){
+  yyParser *yypParser = (yyParser*)yypRawParser;
+  ParseCTX_STORE
+#ifdef YYTRACKMAXSTACKDEPTH
+  yypParser->yyhwm = 0;
+#endif
+#if YYSTACKDEPTH<=0
+  yypParser->yytos = NULL;
+  yypParser->yystack = NULL;
+  yypParser->yystksz = 0;
+  if( yyGrowStack(yypParser) ){
+    yypParser->yystack = &yypParser->yystk0;
+    yypParser->yystksz = 1;
+  }
+#endif
+#ifndef YYNOERRORRECOVERY
+  yypParser->yyerrcnt = -1;
+#endif
+  yypParser->yytos = yypParser->yystack;
+  yypParser->yystack[0].stateno = 0;
+  yypParser->yystack[0].major = 0;
+#if YYSTACKDEPTH>0
+  yypParser->yystackEnd = &yypParser->yystack[YYSTACKDEPTH-1];
+#endif
+}
+
+#ifndef Parse_ENGINEALWAYSONSTACK
+/* 
+** This function allocates a new parser.
+** The only argument is a pointer to a function which works like
+** malloc.
+**
+** Inputs:
+** A pointer to the function used to allocate memory.
+**
+** Outputs:
+** A pointer to a parser.  This pointer is used in subsequent calls
+** to Parse and ParseFree.
+*/
+void *ParseAlloc(void *(*mallocProc)(YYMALLOCARGTYPE) ParseCTX_PDECL){
+  yyParser *yypParser;
+  yypParser = (yyParser*)(*mallocProc)( (YYMALLOCARGTYPE)sizeof(yyParser) );
+  if( yypParser ){
+    ParseCTX_STORE
+    ParseInit(yypParser ParseCTX_PARAM);
+  }
+  return (void*)yypParser;
+}
+#endif /* Parse_ENGINEALWAYSONSTACK */
+
+
+/* The following function deletes the "minor type" or semantic value
+** associated with a symbol.  The symbol can be either a terminal
+** or nonterminal. "yymajor" is the symbol code, and "yypminor" is
+** a pointer to the value to be deleted.  The code used to do the 
+** deletions is derived from the %destructor and/or %token_destructor
+** directives of the input grammar.
+*/
+static void yy_destructor(
+  yyParser *yypParser,    /* The parser */
+  YYCODETYPE yymajor,     /* Type code for object to destroy */
+  YYMINORTYPE *yypminor   /* The object to be destroyed */
+){
+  ParseARG_FETCH
+  ParseCTX_FETCH
+  switch( yymajor ){
+    /* Here is inserted the actions which take place when a
+    ** terminal or non-terminal is destroyed.  This can happen
+    ** when the symbol is popped from the stack during a
+    ** reduce or during error processing or when a parser is 
+    ** being destroyed before it is finished parsing.
+    **
+    ** Note: during a reduce, the only symbols destroyed are those
+    ** which appear on the RHS of the rule, but which are *not* used
+    ** inside the C code.
+    */
+/********* Begin destructor definitions ***************************************/
+    case 8: /* triple */
+{
+#line 19 "src/codec/grammar_nt.y"
+ LSUP_triple_free ((yypminor->yy6)); 
+#line 517 "src/codec/grammar_nt.c"
+}
+      break;
+    case 9: /* subject */
+    case 10: /* predicate */
+    case 11: /* object */
+{
+#line 21 "src/codec/grammar_nt.y"
+ LSUP_term_free ((yypminor->yy10)); 
+#line 526 "src/codec/grammar_nt.c"
+}
+      break;
+/********* End destructor definitions *****************************************/
+    default:  break;   /* If no destructor action specified: do nothing */
+  }
+}
+
+/*
+** Pop the parser's stack once.
+**
+** If there is a destructor routine associated with the token which
+** is popped from the stack, then call it.
+*/
+static void yy_pop_parser_stack(yyParser *pParser){
+  yyStackEntry *yytos;
+  assert( pParser->yytos!=0 );
+  assert( pParser->yytos > pParser->yystack );
+  yytos = pParser->yytos--;
+#ifndef NDEBUG
+  if( yyTraceFILE ){
+    fprintf(yyTraceFILE,"%sPopping %s\n",
+      yyTracePrompt,
+      yyTokenName[yytos->major]);
+  }
+#endif
+  yy_destructor(pParser, yytos->major, &yytos->minor);
+}
+
+/*
+** Clear all secondary memory allocations from the parser
+*/
+void ParseFinalize(void *p){
+  yyParser *pParser = (yyParser*)p;
+  while( pParser->yytos>pParser->yystack ) yy_pop_parser_stack(pParser);
+#if YYSTACKDEPTH<=0
+  if( pParser->yystack!=&pParser->yystk0 ) free(pParser->yystack);
+#endif
+}
+
+#ifndef Parse_ENGINEALWAYSONSTACK
+/* 
+** Deallocate and destroy a parser.  Destructors are called for
+** all stack elements before shutting the parser down.
+**
+** If the YYPARSEFREENEVERNULL macro exists (for example because it
+** is defined in a %include section of the input grammar) then it is
+** assumed that the input pointer is never NULL.
+*/
+void ParseFree(
+  void *p,                    /* The parser to be deleted */
+  void (*freeProc)(void*)     /* Function used to reclaim memory */
+){
+#ifndef YYPARSEFREENEVERNULL
+  if( p==0 ) return;
+#endif
+  ParseFinalize(p);
+  (*freeProc)(p);
+}
+#endif /* Parse_ENGINEALWAYSONSTACK */
+
+/*
+** Return the peak depth of the stack for a parser.
+*/
+#ifdef YYTRACKMAXSTACKDEPTH
+int ParseStackPeak(void *p){
+  yyParser *pParser = (yyParser*)p;
+  return pParser->yyhwm;
+}
+#endif
+
+/* This array of booleans keeps track of the parser statement
+** coverage.  The element yycoverage[X][Y] is set when the parser
+** is in state X and has a lookahead token Y.  In a well-tested
+** systems, every element of this matrix should end up being set.
+*/
+#if defined(YYCOVERAGE)
+static unsigned char yycoverage[YYNSTATE][YYNTOKEN];
+#endif
+
+/*
+** Write into out a description of every state/lookahead combination that
+**
+**   (1)  has not been used by the parser, and
+**   (2)  is not a syntax error.
+**
+** Return the number of missed state/lookahead combinations.
+*/
+#if defined(YYCOVERAGE)
+int ParseCoverage(FILE *out){
+  int stateno, iLookAhead, i;
+  int nMissed = 0;
+  for(stateno=0; stateno<YYNSTATE; stateno++){
+    i = yy_shift_ofst[stateno];
+    for(iLookAhead=0; iLookAhead<YYNTOKEN; iLookAhead++){
+      if( yy_lookahead[i+iLookAhead]!=iLookAhead ) continue;
+      if( yycoverage[stateno][iLookAhead]==0 ) nMissed++;
+      if( out ){
+        fprintf(out,"State %d lookahead %s %s\n", stateno,
+                yyTokenName[iLookAhead],
+                yycoverage[stateno][iLookAhead] ? "ok" : "missed");
+      }
+    }
+  }
+  return nMissed;
+}
+#endif
+
+/*
+** Find the appropriate action for a parser given the terminal
+** look-ahead token iLookAhead.
+*/
+static YYACTIONTYPE yy_find_shift_action(
+  YYCODETYPE iLookAhead,    /* The look-ahead token */
+  YYACTIONTYPE stateno      /* Current state number */
+){
+  int i;
+
+  if( stateno>YY_MAX_SHIFT ) return stateno;
+  assert( stateno <= YY_SHIFT_COUNT );
+#if defined(YYCOVERAGE)
+  yycoverage[stateno][iLookAhead] = 1;
+#endif
+  do{
+    i = yy_shift_ofst[stateno];
+    assert( i>=0 );
+    assert( i<=YY_ACTTAB_COUNT );
+    assert( i+YYNTOKEN<=(int)YY_NLOOKAHEAD );
+    assert( iLookAhead!=YYNOCODE );
+    assert( iLookAhead < YYNTOKEN );
+    i += iLookAhead;
+    assert( i<(int)YY_NLOOKAHEAD );
+    if( yy_lookahead[i]!=iLookAhead ){
+#ifdef YYFALLBACK
+      YYCODETYPE iFallback;            /* Fallback token */
+      assert( iLookAhead<sizeof(yyFallback)/sizeof(yyFallback[0]) );
+      iFallback = yyFallback[iLookAhead];
+      if( iFallback!=0 ){
+#ifndef NDEBUG
+        if( yyTraceFILE ){
+          fprintf(yyTraceFILE, "%sFALLBACK %s => %s\n",
+             yyTracePrompt, yyTokenName[iLookAhead], yyTokenName[iFallback]);
+        }
+#endif
+        assert( yyFallback[iFallback]==0 ); /* Fallback loop must terminate */
+        iLookAhead = iFallback;
+        continue;
+      }
+#endif
+#ifdef YYWILDCARD
+      {
+        int j = i - iLookAhead + YYWILDCARD;
+        assert( j<(int)(sizeof(yy_lookahead)/sizeof(yy_lookahead[0])) );
+        if( yy_lookahead[j]==YYWILDCARD && iLookAhead>0 ){
+#ifndef NDEBUG
+          if( yyTraceFILE ){
+            fprintf(yyTraceFILE, "%sWILDCARD %s => %s\n",
+               yyTracePrompt, yyTokenName[iLookAhead],
+               yyTokenName[YYWILDCARD]);
+          }
+#endif /* NDEBUG */
+          return yy_action[j];
+        }
+      }
+#endif /* YYWILDCARD */
+      return yy_default[stateno];
+    }else{
+      assert( i>=0 && i<(int)(sizeof(yy_action)/sizeof(yy_action[0])) );
+      return yy_action[i];
+    }
+  }while(1);
+}
+
+/*
+** Find the appropriate action for a parser given the non-terminal
+** look-ahead token iLookAhead.
+*/
+static YYACTIONTYPE yy_find_reduce_action(
+  YYACTIONTYPE stateno,     /* Current state number */
+  YYCODETYPE iLookAhead     /* The look-ahead token */
+){
+  int i;
+#ifdef YYERRORSYMBOL
+  if( stateno>YY_REDUCE_COUNT ){
+    return yy_default[stateno];
+  }
+#else
+  assert( stateno<=YY_REDUCE_COUNT );
+#endif
+  i = yy_reduce_ofst[stateno];
+  assert( iLookAhead!=YYNOCODE );
+  i += iLookAhead;
+#ifdef YYERRORSYMBOL
+  if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){
+    return yy_default[stateno];
+  }
+#else
+  assert( i>=0 && i<YY_ACTTAB_COUNT );
+  assert( yy_lookahead[i]==iLookAhead );
+#endif
+  return yy_action[i];
+}
+
+/*
+** The following routine is called if the stack overflows.
+*/
+static void yyStackOverflow(yyParser *yypParser){
+   ParseARG_FETCH
+   ParseCTX_FETCH
+#ifndef NDEBUG
+   if( yyTraceFILE ){
+     fprintf(yyTraceFILE,"%sStack Overflow!\n",yyTracePrompt);
+   }
+#endif
+   while( yypParser->yytos>yypParser->yystack ) yy_pop_parser_stack(yypParser);
+   /* Here code is inserted which will execute if the parser
+   ** stack every overflows */
+/******** Begin %stack_overflow code ******************************************/
+/******** End %stack_overflow code ********************************************/
+   ParseARG_STORE /* Suppress warning about unused %extra_argument var */
+   ParseCTX_STORE
+}
+
+/*
+** Print tracing information for a SHIFT action
+*/
+#ifndef NDEBUG
+static void yyTraceShift(yyParser *yypParser, int yyNewState, const char *zTag){
+  if( yyTraceFILE ){
+    if( yyNewState<YYNSTATE ){
+      fprintf(yyTraceFILE,"%s%s '%s', go to state %d\n",
+         yyTracePrompt, zTag, yyTokenName[yypParser->yytos->major],
+         yyNewState);
+    }else{
+      fprintf(yyTraceFILE,"%s%s '%s', pending reduce %d\n",
+         yyTracePrompt, zTag, yyTokenName[yypParser->yytos->major],
+         yyNewState - YY_MIN_REDUCE);
+    }
+  }
+}
+#else
+# define yyTraceShift(X,Y,Z)
+#endif
+
+/*
+** Perform a shift action.
+*/
+static void yy_shift(
+  yyParser *yypParser,          /* The parser to be shifted */
+  YYACTIONTYPE yyNewState,      /* The new state to shift in */
+  YYCODETYPE yyMajor,           /* The major token to shift in */
+  ParseTOKENTYPE yyMinor        /* The minor token to shift in */
+){
+  yyStackEntry *yytos;
+  yypParser->yytos++;
+#ifdef YYTRACKMAXSTACKDEPTH
+  if( (int)(yypParser->yytos - yypParser->yystack)>yypParser->yyhwm ){
+    yypParser->yyhwm++;
+    assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack) );
+  }
+#endif
+#if YYSTACKDEPTH>0 
+  if( yypParser->yytos>yypParser->yystackEnd ){
+    yypParser->yytos--;
+    yyStackOverflow(yypParser);
+    return;
+  }
+#else
+  if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz] ){
+    if( yyGrowStack(yypParser) ){
+      yypParser->yytos--;
+      yyStackOverflow(yypParser);
+      return;
+    }
+  }
+#endif
+  if( yyNewState > YY_MAX_SHIFT ){
+    yyNewState += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE;
+  }
+  yytos = yypParser->yytos;
+  yytos->stateno = yyNewState;
+  yytos->major = yyMajor;
+  yytos->minor.yy0 = yyMinor;
+  yyTraceShift(yypParser, yyNewState, "Shift");
+}
+
+/* For rule J, yyRuleInfoLhs[J] contains the symbol on the left-hand side
+** of that rule */
+static const YYCODETYPE yyRuleInfoLhs[] = {
+     8,  /* (0) triple ::= ws subject ws predicate ws object ws DOT */
+    12,  /* (1) ntriplesDoc ::= triples EOF */
+    13,  /* (2) triples ::= eol */
+    13,  /* (3) triples ::= triple eol */
+    13,  /* (4) triples ::= triples triple eol */
+     9,  /* (5) subject ::= IRIREF */
+     9,  /* (6) subject ::= BNODE */
+    10,  /* (7) predicate ::= IRIREF */
+    11,  /* (8) object ::= IRIREF */
+    11,  /* (9) object ::= BNODE */
+    11,  /* (10) object ::= LITERAL */
+    14,  /* (11) eol ::= EOL */
+    14,  /* (12) eol ::= eol EOL */
+    15,  /* (13) ws ::= */
+    15,  /* (14) ws ::= WS */
+};
+
+/* For rule J, yyRuleInfoNRhs[J] contains the negative of the number
+** of symbols on the right-hand side of that rule. */
+static const signed char yyRuleInfoNRhs[] = {
+   -8,  /* (0) triple ::= ws subject ws predicate ws object ws DOT */
+   -2,  /* (1) ntriplesDoc ::= triples EOF */
+   -1,  /* (2) triples ::= eol */
+   -2,  /* (3) triples ::= triple eol */
+   -3,  /* (4) triples ::= triples triple eol */
+   -1,  /* (5) subject ::= IRIREF */
+   -1,  /* (6) subject ::= BNODE */
+   -1,  /* (7) predicate ::= IRIREF */
+   -1,  /* (8) object ::= IRIREF */
+   -1,  /* (9) object ::= BNODE */
+   -1,  /* (10) object ::= LITERAL */
+   -1,  /* (11) eol ::= EOL */
+   -2,  /* (12) eol ::= eol EOL */
+    0,  /* (13) ws ::= */
+   -1,  /* (14) ws ::= WS */
+};
+
+static void yy_accept(yyParser*);  /* Forward Declaration */
+
+/*
+** Perform a reduce action and the shift that must immediately
+** follow the reduce.
+**
+** The yyLookahead and yyLookaheadToken parameters provide reduce actions
+** access to the lookahead token (if any).  The yyLookahead will be YYNOCODE
+** if the lookahead token has already been consumed.  As this procedure is
+** only called from one place, optimizing compilers will in-line it, which
+** means that the extra parameters have no performance impact.
+*/
+static YYACTIONTYPE yy_reduce(
+  yyParser *yypParser,         /* The parser */
+  unsigned int yyruleno,       /* Number of the rule by which to reduce */
+  int yyLookahead,             /* Lookahead token, or YYNOCODE if none */
+  ParseTOKENTYPE yyLookaheadToken  /* Value of the lookahead token */
+  ParseCTX_PDECL                   /* %extra_context */
+){
+  int yygoto;                     /* The next state */
+  YYACTIONTYPE yyact;             /* The next action */
+  yyStackEntry *yymsp;            /* The top of the parser's stack */
+  int yysize;                     /* Amount to pop the stack */
+  ParseARG_FETCH
+  (void)yyLookahead;
+  (void)yyLookaheadToken;
+  yymsp = yypParser->yytos;
+  assert( yyruleno<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) );
+#ifndef NDEBUG
+  if( yyTraceFILE ){
+    yysize = yyRuleInfoNRhs[yyruleno];
+    if( yysize ){
+      fprintf(yyTraceFILE, "%sReduce %d [%s]%s, pop back to state %d.\n",
+        yyTracePrompt,
+        yyruleno, yyRuleName[yyruleno],
+        yyruleno<YYNRULE_WITH_ACTION ? "" : " without external action",
+        yymsp[yysize].stateno);
+    }else{
+      fprintf(yyTraceFILE, "%sReduce %d [%s]%s.\n",
+        yyTracePrompt, yyruleno, yyRuleName[yyruleno],
+        yyruleno<YYNRULE_WITH_ACTION ? "" : " without external action");
+    }
+  }
+#endif /* NDEBUG */
+
+  /* Check that the stack is large enough to grow by a single entry
+  ** if the RHS of the rule is empty.  This ensures that there is room
+  ** enough on the stack to push the LHS value */
+  if( yyRuleInfoNRhs[yyruleno]==0 ){
+#ifdef YYTRACKMAXSTACKDEPTH
+    if( (int)(yypParser->yytos - yypParser->yystack)>yypParser->yyhwm ){
+      yypParser->yyhwm++;
+      assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack));
+    }
+#endif
+#if YYSTACKDEPTH>0 
+    if( yypParser->yytos>=yypParser->yystackEnd ){
+      yyStackOverflow(yypParser);
+      /* The call to yyStackOverflow() above pops the stack until it is
+      ** empty, causing the main parser loop to exit.  So the return value
+      ** is never used and does not matter. */
+      return 0;
+    }
+#else
+    if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz-1] ){
+      if( yyGrowStack(yypParser) ){
+        yyStackOverflow(yypParser);
+        /* The call to yyStackOverflow() above pops the stack until it is
+        ** empty, causing the main parser loop to exit.  So the return value
+        ** is never used and does not matter. */
+        return 0;
+      }
+      yymsp = yypParser->yytos;
+    }
+#endif
+  }
+
+  switch( yyruleno ){
+  /* Beginning here are the reduction cases.  A typical example
+  ** follows:
+  **   case 0:
+  **  #line <lineno> <grammarfile>
+  **     { ... }           // User supplied code
+  **  #line <lineno> <thisfile>
+  **     break;
+  */
+/********** Begin reduce actions **********************************************/
+      case 0: /* triple ::= ws subject ws predicate ws object ws DOT */
+#line 39 "src/codec/grammar_nt.y"
+{
+
+                yymsp[-7].minor.yy6 = LSUP_triple_new (yymsp[-6].minor.yy10, yymsp[-4].minor.yy10, yymsp[-2].minor.yy10);
+                LSUP_graph_add_iter (it, yymsp[-7].minor.yy6);
+            }
+#line 946 "src/codec/grammar_nt.c"
+        break;
+      case 3: /* triples ::= triple eol */
+{  yy_destructor(yypParser,8,&yymsp[-1].minor);
+#line 36 "src/codec/grammar_nt.y"
+{
+}
+#line 953 "src/codec/grammar_nt.c"
+}
+        break;
+      case 4: /* triples ::= triples triple eol */
+#line 37 "src/codec/grammar_nt.y"
+{
+}
+#line 960 "src/codec/grammar_nt.c"
+  yy_destructor(yypParser,8,&yymsp[-1].minor);
+        break;
+      default:
+      /* (1) ntriplesDoc ::= triples EOF */ yytestcase(yyruleno==1);
+      /* (2) triples ::= eol */ yytestcase(yyruleno==2);
+      /* (5) subject ::= IRIREF */ yytestcase(yyruleno==5);
+      /* (6) subject ::= BNODE */ yytestcase(yyruleno==6);
+      /* (7) predicate ::= IRIREF */ yytestcase(yyruleno==7);
+      /* (8) object ::= IRIREF */ yytestcase(yyruleno==8);
+      /* (9) object ::= BNODE */ yytestcase(yyruleno==9);
+      /* (10) object ::= LITERAL */ yytestcase(yyruleno==10);
+      /* (11) eol ::= EOL */ yytestcase(yyruleno==11);
+      /* (12) eol ::= eol EOL */ yytestcase(yyruleno==12);
+      /* (13) ws ::= */ yytestcase(yyruleno==13);
+      /* (14) ws ::= WS */ yytestcase(yyruleno==14);
+        break;
+/********** End reduce actions ************************************************/
+  };
+  assert( yyruleno<sizeof(yyRuleInfoLhs)/sizeof(yyRuleInfoLhs[0]) );
+  yygoto = yyRuleInfoLhs[yyruleno];
+  yysize = yyRuleInfoNRhs[yyruleno];
+  yyact = yy_find_reduce_action(yymsp[yysize].stateno,(YYCODETYPE)yygoto);
+
+  /* There are no SHIFTREDUCE actions on nonterminals because the table
+  ** generator has simplified them to pure REDUCE actions. */
+  assert( !(yyact>YY_MAX_SHIFT && yyact<=YY_MAX_SHIFTREDUCE) );
+
+  /* It is not possible for a REDUCE to be followed by an error */
+  assert( yyact!=YY_ERROR_ACTION );
+
+  yymsp += yysize+1;
+  yypParser->yytos = yymsp;
+  yymsp->stateno = (YYACTIONTYPE)yyact;
+  yymsp->major = (YYCODETYPE)yygoto;
+  yyTraceShift(yypParser, yyact, "... then shift");
+  return yyact;
+}
+
+/*
+** The following code executes when the parse fails
+*/
+#ifndef YYNOERRORRECOVERY
+static void yy_parse_failed(
+  yyParser *yypParser           /* The parser */
+){
+  ParseARG_FETCH
+  ParseCTX_FETCH
+#ifndef NDEBUG
+  if( yyTraceFILE ){
+    fprintf(yyTraceFILE,"%sFail!\n",yyTracePrompt);
+  }
+#endif
+  while( yypParser->yytos>yypParser->yystack ) yy_pop_parser_stack(yypParser);
+  /* Here code is inserted which will be executed whenever the
+  ** parser fails */
+/************ Begin %parse_failure code ***************************************/
+/************ End %parse_failure code *****************************************/
+  ParseARG_STORE /* Suppress warning about unused %extra_argument variable */
+  ParseCTX_STORE
+}
+#endif /* YYNOERRORRECOVERY */
+
+/*
+** The following code executes when a syntax error first occurs.
+*/
+static void yy_syntax_error(
+  yyParser *yypParser,           /* The parser */
+  int yymajor,                   /* The major type of the error token */
+  ParseTOKENTYPE yyminor         /* The minor type of the error token */
+){
+  ParseARG_FETCH
+  ParseCTX_FETCH
+#define TOKEN yyminor
+/************ Begin %syntax_error code ****************************************/
+/************ End %syntax_error code ******************************************/
+  ParseARG_STORE /* Suppress warning about unused %extra_argument variable */
+  ParseCTX_STORE
+}
+
+/*
+** The following is executed when the parser accepts
+*/
+static void yy_accept(
+  yyParser *yypParser           /* The parser */
+){
+  ParseARG_FETCH
+  ParseCTX_FETCH
+#ifndef NDEBUG
+  if( yyTraceFILE ){
+    fprintf(yyTraceFILE,"%sAccept!\n",yyTracePrompt);
+  }
+#endif
+#ifndef YYNOERRORRECOVERY
+  yypParser->yyerrcnt = -1;
+#endif
+  assert( yypParser->yytos==yypParser->yystack );
+  /* Here code is inserted which will be executed whenever the
+  ** parser accepts */
+/*********** Begin %parse_accept code *****************************************/
+/*********** End %parse_accept code *******************************************/
+  ParseARG_STORE /* Suppress warning about unused %extra_argument variable */
+  ParseCTX_STORE
+}
+
+/* The main parser program.
+** The first argument is a pointer to a structure obtained from
+** "ParseAlloc" which describes the current state of the parser.
+** The second argument is the major token number.  The third is
+** the minor token.  The fourth optional argument is whatever the
+** user wants (and specified in the grammar) and is available for
+** use by the action routines.
+**
+** Inputs:
+** <ul>
+** <li> A pointer to the parser (an opaque structure.)
+** <li> The major token number.
+** <li> The minor token number.
+** <li> An option argument of a grammar-specified type.
+** </ul>
+**
+** Outputs:
+** None.
+*/
+void Parse(
+  void *yyp,                   /* The parser */
+  int yymajor,                 /* The major token code number */
+  ParseTOKENTYPE yyminor       /* The value for the token */
+  ParseARG_PDECL               /* Optional %extra_argument parameter */
+){
+  YYMINORTYPE yyminorunion;
+  YYACTIONTYPE yyact;   /* The parser action. */
+#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY)
+  int yyendofinput;     /* True if we are at the end of input */
+#endif
+#ifdef YYERRORSYMBOL
+  int yyerrorhit = 0;   /* True if yymajor has invoked an error */
+#endif
+  yyParser *yypParser = (yyParser*)yyp;  /* The parser */
+  ParseCTX_FETCH
+  ParseARG_STORE
+
+  assert( yypParser->yytos!=0 );
+#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY)
+  yyendofinput = (yymajor==0);
+#endif
+
+  yyact = yypParser->yytos->stateno;
+#ifndef NDEBUG
+  if( yyTraceFILE ){
+    if( yyact < YY_MIN_REDUCE ){
+      fprintf(yyTraceFILE,"%sInput '%s' in state %d\n",
+              yyTracePrompt,yyTokenName[yymajor],yyact);
+    }else{
+      fprintf(yyTraceFILE,"%sInput '%s' with pending reduce %d\n",
+              yyTracePrompt,yyTokenName[yymajor],yyact-YY_MIN_REDUCE);
+    }
+  }
+#endif
+
+  do{
+    assert( yyact==yypParser->yytos->stateno );
+    yyact = yy_find_shift_action((YYCODETYPE)yymajor,yyact);
+    if( yyact >= YY_MIN_REDUCE ){
+      yyact = yy_reduce(yypParser,yyact-YY_MIN_REDUCE,yymajor,
+                        yyminor ParseCTX_PARAM);
+    }else if( yyact <= YY_MAX_SHIFTREDUCE ){
+      yy_shift(yypParser,yyact,(YYCODETYPE)yymajor,yyminor);
+#ifndef YYNOERRORRECOVERY
+      yypParser->yyerrcnt--;
+#endif
+      break;
+    }else if( yyact==YY_ACCEPT_ACTION ){
+      yypParser->yytos--;
+      yy_accept(yypParser);
+      return;
+    }else{
+      assert( yyact == YY_ERROR_ACTION );
+      yyminorunion.yy0 = yyminor;
+#ifdef YYERRORSYMBOL
+      int yymx;
+#endif
+#ifndef NDEBUG
+      if( yyTraceFILE ){
+        fprintf(yyTraceFILE,"%sSyntax Error!\n",yyTracePrompt);
+      }
+#endif
+#ifdef YYERRORSYMBOL
+      /* A syntax error has occurred.
+      ** The response to an error depends upon whether or not the
+      ** grammar defines an error token "ERROR".  
+      **
+      ** This is what we do if the grammar does define ERROR:
+      **
+      **  * Call the %syntax_error function.
+      **
+      **  * Begin popping the stack until we enter a state where
+      **    it is legal to shift the error symbol, then shift
+      **    the error symbol.
+      **
+      **  * Set the error count to three.
+      **
+      **  * Begin accepting and shifting new tokens.  No new error
+      **    processing will occur until three tokens have been
+      **    shifted successfully.
+      **
+      */
+      if( yypParser->yyerrcnt<0 ){
+        yy_syntax_error(yypParser,yymajor,yyminor);
+      }
+      yymx = yypParser->yytos->major;
+      if( yymx==YYERRORSYMBOL || yyerrorhit ){
+#ifndef NDEBUG
+        if( yyTraceFILE ){
+          fprintf(yyTraceFILE,"%sDiscard input token %s\n",
+             yyTracePrompt,yyTokenName[yymajor]);
+        }
+#endif
+        yy_destructor(yypParser, (YYCODETYPE)yymajor, &yyminorunion);
+        yymajor = YYNOCODE;
+      }else{
+        while( yypParser->yytos >= yypParser->yystack
+            && (yyact = yy_find_reduce_action(
+                        yypParser->yytos->stateno,
+                        YYERRORSYMBOL)) > YY_MAX_SHIFTREDUCE
+        ){
+          yy_pop_parser_stack(yypParser);
+        }
+        if( yypParser->yytos < yypParser->yystack || yymajor==0 ){
+          yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion);
+          yy_parse_failed(yypParser);
+#ifndef YYNOERRORRECOVERY
+          yypParser->yyerrcnt = -1;
+#endif
+          yymajor = YYNOCODE;
+        }else if( yymx!=YYERRORSYMBOL ){
+          yy_shift(yypParser,yyact,YYERRORSYMBOL,yyminor);
+        }
+      }
+      yypParser->yyerrcnt = 3;
+      yyerrorhit = 1;
+      if( yymajor==YYNOCODE ) break;
+      yyact = yypParser->yytos->stateno;
+#elif defined(YYNOERRORRECOVERY)
+      /* If the YYNOERRORRECOVERY macro is defined, then do not attempt to
+      ** do any kind of error recovery.  Instead, simply invoke the syntax
+      ** error routine and continue going as if nothing had happened.
+      **
+      ** Applications can set this macro (for example inside %include) if
+      ** they intend to abandon the parse upon the first syntax error seen.
+      */
+      yy_syntax_error(yypParser,yymajor, yyminor);
+      yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion);
+      break;
+#else  /* YYERRORSYMBOL is not defined */
+      /* This is what we do if the grammar does not define ERROR:
+      **
+      **  * Report an error message, and throw away the input token.
+      **
+      **  * If the input token is $, then fail the parse.
+      **
+      ** As before, subsequent error messages are suppressed until
+      ** three input tokens have been successfully shifted.
+      */
+      if( yypParser->yyerrcnt<=0 ){
+        yy_syntax_error(yypParser,yymajor, yyminor);
+      }
+      yypParser->yyerrcnt = 3;
+      yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion);
+      if( yyendofinput ){
+        yy_parse_failed(yypParser);
+#ifndef YYNOERRORRECOVERY
+        yypParser->yyerrcnt = -1;
+#endif
+      }
+      break;
+#endif
+    }
+  }while( yypParser->yytos>yypParser->yystack );
+#ifndef NDEBUG
+  if( yyTraceFILE ){
+    yyStackEntry *i;
+    char cDiv = '[';
+    fprintf(yyTraceFILE,"%sReturn. Stack=",yyTracePrompt);
+    for(i=&yypParser->yystack[1]; i<=yypParser->yytos; i++){
+      fprintf(yyTraceFILE,"%c%s", cDiv, yyTokenName[i->major]);
+      cDiv = ' ';
+    }
+    fprintf(yyTraceFILE,"]\n");
+  }
+#endif
+  return;
+}
+
+/*
+** Return the fallback token corresponding to canonical token iToken, or
+** 0 if iToken has no fallback.
+*/
+int ParseFallback(int iToken){
+#ifdef YYFALLBACK
+  assert( iToken<(int)(sizeof(yyFallback)/sizeof(yyFallback[0])) );
+  return yyFallback[iToken];
+#else
+  (void)iToken;
+  return 0;
+#endif
+}

+ 7 - 0
src/codec/grammar_nt.h

@@ -0,0 +1,7 @@
+#define T_EOF                              1
+#define T_DOT                              2
+#define T_IRIREF                           3
+#define T_BNODE                            4
+#define T_LITERAL                          5
+#define T_EOL                              6
+#define T_WS                               7

+ 0 - 0
src/codec/nt_grammar.y → src/codec/grammar_nt.y


+ 218 - 0
src/codec/grammar_ttl.y

@@ -0,0 +1,218 @@
+%include {
+
+/** @brief Lemon parser grammar for N-Triples.
+ *
+ * The `lemon' parser generator executable must be in your PATH:
+ * https://sqlite.org/src/doc/trunk/doc/lemon.html
+ *
+ * To generate the parser, run: `lemon ${FILE}'
+ *
+ * TTL EBNF: https://www.w3.org/TeamSubmission/turtle/#sec-grammar-grammar
+ */
+
+#include "graph.h"
+
+
+typedef struct {
+    LSUP_Term **    predList;       ///< NULL-terminated array of term handles.
+    LSUP_Term ***   objList;        /**<
+                                      * NULL-terminated array of
+                                      * NULL-terminated arrays of term handles.
+                                      * The indices of the outer array are
+                                      * equal to the indices of the associated
+                                      * predicate in the predicate list.
+                                      */
+} PredObjList;
+
+}
+
+
+%token_type { LSUP_Term * }
+%token_prefix "T_"
+
+%type triple            { LSUP_Triple * }
+%destructor triple      { LSUP_triple_free ($$); }
+
+%type subject           { LSUP_Term * }
+%destructor subject     { LSUP_term_free ($$); }
+
+%type predicate         { LSUP_Term * }
+%destructor predicate   { LSUP_term_free ($$); }
+
+%type object            { LSUP_Term * }
+%destructor object      { LSUP_term_free ($$); }
+
+/* NULL-terminated array of object term handles. */
+%type objList           { LSUP_Term ** }
+%destructor objList     {
+    for (size_t i = 0; $$[i]; i++) {
+        LSUP_term_free ($$[i]);
+    }
+}
+
+%type predObjList       { LSUP_Term *** }
+%destructor predObjList {
+    for (size_t i = 0; $$[i]; i++) {
+        for (size_t j = 0; $$[i][j]; j++) {
+            LSUP_term_free ($$[i][j]);
+        }
+    }
+}
+
+%default_type           { void * }
+
+%extra_argument         { LSUP_GraphIterator *it }
+
+
+// Rules.
+
+turtleDoc   ::= statements EOF .
+
+statements  ::= .
+statements  ::= statement .
+statements  ::= statements WS statement .
+
+statement(A) ::= directive ows EOS . {
+            }
+statement(A) ::= triples ows EOS {
+            }
+
+directive 	::= prefixID .
+directive   ::= base .
+
+prefixID    :== PREFIX WS PFX_NAME COLON IRIREF {
+            }
+prefixID    :== PREFIX WS COLON IRIREF {
+            }
+
+base        ::= BASE WS IRIREF . {
+            }
+
+triples 	::= subject predObjList .
+
+predObjList ::= predObjList SEMICOLON .
+predObjList ::= predObjList SEMICOLON verb objectList .
+predObjList ::= verb objectList .
+
+objectList 	::= objectList COMMA object .
+objectList 	::= object .
+
+verb        ::= predicate .
+verb        ::= rdfType .
+
+comment 	::= COMMENT .
+
+subject 	::= resource .
+subject 	::= blank .
+
+predicate   ::= resource .
+
+object 	    ::= resource .
+object 	    ::= blank .
+object 	    ::= literal .
+
+literal(A)  ::= qString(D) . {
+            }
+literal(A)  ::= qString(D) LANGTAG(L) . {
+            }
+literal(A)  ::= qString(D) DT_MARKER resource(M) . {
+            }
+literal(A)  ::= INTEGER(D) . {
+            }
+literal(A)  ::= DOUBLE(D) . {
+            }
+literal(A)  ::= DECIMAL(D) . {
+            }
+literal(A)  ::= BOOLEAN(D) . {
+            }
+
+blank       :== nodeID .
+blank       :== LBRACKET RBRACKET .
+blank       :== LBRACKET predObjList RBRACKET .
+blank       :== collection .
+
+collection  ::= LPAREN itemList RPAREN .
+
+itemList    ::= itemList object .
+itemList    ::= object .
+itemList    ::= .
+
+resource    ::= iriref .
+resource    ::= qname .
+
+rdfType(A) ::= RDF_TYPE . {
+            }
+
+iriref(A) ::= IRIREF(D) . {
+            }
+qname(A)    ::= PFX_NAME(P) COLON NAME(D) . {
+            }
+qname(A)    ::= COLON NAME(D) . {
+            }
+
+qstring(A)  ::= STRING(D) {
+            }
+
+node_id(A)    ::= NODE_ID(D) . {
+            }
+
+ows         ::= WS.
+ows         ::=.
+
+opt_pfx     ::= PFX .
+opt_pfx     ::= .
+
+/*
+ * From https://www.w3.org/TeamSubmission/turtle/#sec-grammar-grammar :
+
+
+[1]	turtleDoc 	::= 	statement*
+[2]	statement 	::= 	directive '.' | triples '.' | ws+
+[3]	directive 	::= 	prefixID | base
+[4]	prefixID 	::= 	'@prefix' ws+ prefixName? ':' uriref
+[5]	base 	::= 	'@base' ws+ uriref
+[6]	triples 	::= 	subject predicateObjectList
+[7]	predicateObjectList 	::= 	verb objectList ( ';' verb objectList )* ( ';')?
+[8]	objectList 	::= 	object ( ',' object)*
+[9]	verb 	::= 	predicate | 'a'
+[10]	comment 	::= 	'#' ( [^#xA#xD] )*
+[11]	subject 	::= 	resource | blank
+[12]	predicate 	::= 	resource
+[13]	object 	::= 	resource | blank | literal
+[14]	literal 	::= 	quotedString ( '@' language )? | datatypeString | integer | double | decimal | boolean
+[15]	datatypeString 	::= 	quotedString '^^' resource
+[16]	integer 	::= 	('-' | '+') ? [0-9]+
+[17]	double 	::= 	('-' | '+') ? ( [0-9]+ '.' [0-9]* exponent | '.' ([0-9])+ exponent | ([0-9])+ exponent )
+[18]	decimal 	::= 	('-' | '+')? ( [0-9]+ '.' [0-9]* | '.' ([0-9])+ | ([0-9])+ )
+[19]	exponent 	::= 	[eE] ('-' | '+')? [0-9]+
+[20]	boolean 	::= 	'true' | 'false'
+[21]	blank 	::= 	nodeID | '[]' | '[' predicateObjectList ']' | collection
+[22]	itemList 	::= 	object+
+[23]	collection 	::= 	'(' itemList? ')'
+[24]	ws 	::= 	#x9 | #xA | #xD | #x20 | comment
+[25]	resource 	::= 	uriref | qname
+[26]	nodeID 	::= 	'_:' name
+[27]	qname 	::= 	prefixName? ':' name?
+[28]	uriref 	::= 	'<' relativeURI '>'
+[29]	language 	::= 	[a-z]+ ('-' [a-z0-9]+ )*
+[30]	nameStartChar 	::= 	[A-Z] | "_" | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
+[31]	nameChar 	::= 	nameStartChar | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
+[32]	name 	::= 	nameStartChar nameChar*
+[33]	prefixName 	::= 	( nameStartChar - '_' ) nameChar*
+[34]	relativeURI 	::= 	ucharacter*
+[35]	quotedString 	::= 	string | longString
+[36]	string 	::= 	#x22 scharacter* #x22
+[37]	longString 	::= 	#x22 #x22 #x22 lcharacter* #x22 #x22 #x22
+[38]	character 	::= 	'\u' hex hex hex hex |
+'\U' hex hex hex hex hex hex hex hex |
+'\\' |
+[#x20-#x5B] | [#x5D-#x10FFFF]
+[39]	echaracter 	::= 	character | '\t' | '\n' | '\r'
+[40]	hex 	::= 	[#x30-#x39] | [#x41-#x46]
+[41]	ucharacter 	::= 	( character - #x3E ) | '\>'
+[42]	scharacter 	::= 	( echaracter - #x22 ) | '\"'
+[43]	lcharacter 	::= 	echaracter | '\"' | #x9 | #xA | #xD 
+
+
+
+*/

+ 4 - 4
src/codec/ttl_lexer.re → src/codec/lexer_nt.re

@@ -1,5 +1,5 @@
-#include "nt_grammar.h"
-#include "nt_parser.h"
+#include "grammar_nt.h"
+#include "parser_nt.h"
 
 
 #define YYCTYPE     unsigned char
@@ -14,7 +14,7 @@
 #ifdef LSUP_RDF_STREAM_CHUNK_SIZE
 #define CHUNK_SIZE LSUP_RDF_STREAM_CHUNK_SIZE
 #else
-#define CHUNK_SIZE 8192
+#define CHUNK_SIZE 8191
 #endif
 
 
@@ -174,7 +174,7 @@ loop:
     _WS                 = [\x09\x20];
     WS                  = _WS+;
     EOL                 = [\x0D\x0A] (_WS | [\x0D\x0A])*;
-    DOT                 = [.];
+    DOT                 = ".";
     HEX                 = [0-9A-Fa-f];
     ECHAR               = [\\] [tbnrf"'\\];
     UCHAR               = "\\u" HEX{4} | "\\U" HEX{8};

+ 188 - 136
src/codec/nt_lexer.re → src/codec/lexer_ttl.re

@@ -1,5 +1,5 @@
-#include "nt_grammar.h"
-#include "nt_parser.h"
+#include "grammar_ttl.h"
+#include "parser_ttl.h"
 
 
 #define YYCTYPE     unsigned char
@@ -14,7 +14,7 @@
 #ifdef LSUP_RDF_STREAM_CHUNK_SIZE
 #define CHUNK_SIZE LSUP_RDF_STREAM_CHUNK_SIZE
 #else
-#define CHUNK_SIZE 8192
+#define CHUNK_SIZE 8191
 #endif
 
 
@@ -30,13 +30,19 @@ typedef struct {
             *       bol;                // Address of the beginning of the
                                         //   current line (for debugging).
     unsigned        line;               // Current line no. (for debugging).
+    unsigned        stmt;               // Current statement.
     unsigned        ct;                 // Number of parsed triples.
     bool            eof;                // if we have reached EOF.
     /*!stags:re2c format = "YYCTYPE *@@;"; */
 } ParseIterator;
 
+typedef struct {
+    YYCTYPE *       data;
+    size_t          size;
+} ParserToken;
+
 
-// TODO The opposite of this is in codec_nt.c. Find a better place for both.
+// TODO The opposite of this is in codec_ttl.c. Find a better place for both.
 static inline char unescape_char(const char c) {
     switch (c) {
         case 't': return '\t';
@@ -49,47 +55,12 @@ static inline char unescape_char(const char c) {
 }
 
 
-static int fill(ParseIterator *it)
-{
-    if (it->eof) {
-        return 1;
-    }
-    const size_t shift = it->tok - it->buf;
-    if (shift < 1) {
-        return 2;
-    }
-    log_debug ("Shifting bytes: %lu", shift);
-    memmove(it->buf, it->tok, it->lim - it->tok);
-    it->lim -= shift;
-    it->cur -= shift;
-    it->mar -= shift;
-    it->tok -= shift;
-    it->lim += fread(it->lim, 1, shift, it->fh);
-    /*!stags:re2c format = "if (it->@@) it->@@ -= shift; "; */
-    it->lim[0] = 0;
-    it->eof |= it->lim < it->buf + CHUNK_SIZE;
-    return 0;
-}
-
-
-static void parse_init(ParseIterator *it, FILE *fh)
-{
-    it->fh = fh;
-    it->cur = it->mar = it->tok = it->lim = it->buf + CHUNK_SIZE;
-    it->line = 1;
-    it->bol = it->buf;
-    it->ct = 0;
-    it->eof = 0;
-    /*!stags:re2c format = "it->@@ = NULL; "; */
-    fill (it);
-}
-
-
 /** @brief Replace \uxxxx and \Uxxxxxxxx with Unicode bytes.
  */
-static YYCTYPE *unescape_unicode (const YYCTYPE *esc_str, size_t size)
+ParserToken *unescape_unicode (const YYCTYPE *esc_str, size_t size)
 {
-    YYCTYPE *uc_str = malloc (size + 1);
+    ParserToken *token = malloc (sizeof (*token));
+    token->data = malloc (size + 1);
 
     size_t j = 0;
     YYCTYPE tmp_chr[5];
@@ -112,8 +83,10 @@ static YYCTYPE *unescape_unicode (const YYCTYPE *esc_str, size_t size)
                 int nbytes = utf8_encode (tmp_val, tmp_chr);
 
                 // Copy bytes into destination.
-                memcpy (uc_str + j, tmp_chr, nbytes);
-                log_debug ("UC byte value: %x %x", uc_str[j], uc_str[j + 1]);
+                memcpy (token->data + j, tmp_chr, nbytes);
+                log_debug (
+                        "UC byte value: %x %x",
+                        token->data[j], token->data[j + 1]);
 
                 j += nbytes;
                 i += 4;
@@ -125,19 +98,57 @@ static YYCTYPE *unescape_unicode (const YYCTYPE *esc_str, size_t size)
                 return NULL; // TODO encode UTF-16
 
             // Unescape other escaped characters.
-            } else uc_str[j++] = unescape_char(esc_str[i++]);
+            } else token->data[j++] = unescape_char(esc_str[i++]);
         } else {
             // Copy ASCII char verbatim.
-            uc_str[j++] = esc_str[i++];
+            token->data[j++] = esc_str[i++];
         }
     }
 
-    YYCTYPE *tmp = realloc (uc_str, j + 1);
+    YYCTYPE *tmp = realloc (token->data, j + 1);
     if (UNLIKELY (!tmp)) return NULL;
-    uc_str = tmp;
-    uc_str[j] = '\0';
+    token->data = tmp;
+    token->data[j] = '\0';
+    token->size = strlen(token->data) + 1;
 
-    return uc_str;
+    return token;
+}
+
+
+static int fill(ParseIterator *it)
+{
+    if (it->eof) {
+        return 1;
+    }
+    const size_t shift = it->tok - it->buf;
+    if (shift < 1) {
+        return 2;
+    }
+    log_debug ("Shifting bytes: %lu", shift);
+    memmove(it->buf, it->tok, it->lim - it->tok);
+    it->lim -= shift;
+    it->cur -= shift;
+    it->mar -= shift;
+    it->tok -= shift;
+    it->lim += fread(it->lim, 1, shift, it->fh);
+    /*!stags:re2c format = "if (it->@@) it->@@ -= shift; "; */
+    it->lim[0] = 0;
+    it->eof |= it->lim < it->buf + CHUNK_SIZE;
+    return 0;
+}
+
+
+static void parse_init (ParseIterator *it, FILE *fh)
+{
+    it->fh = fh;
+    it->cur = it->mar = it->tok = it->lim = it->buf + CHUNK_SIZE;
+    it->line = 1;
+    it->stmt = 1;
+    it->bol = it->buf;
+    it->ct = 0;
+    it->eof = 0;
+    /*!stags:re2c format = "it->@@ = NULL; "; */
+    fill (it);
 }
 
 
@@ -150,7 +161,7 @@ void ParseFree();
 
 // Lexer.
 
-static int lex (ParseIterator *it, LSUP_Term **term)
+static int lex (ParseIterator *it, ParserToken **token_p)
 {
     const YYCTYPE *lit_data_e, *dtype_s, *lang_s;
 
@@ -158,7 +169,7 @@ loop:
 
     it->tok = it->cur;
 
-    *term = NULL;
+    *token_p = NULL;
 
     /*!re2c
     re2c:eof = 0;
@@ -169,116 +180,155 @@ loop:
     re2c:define:YYFILL:naked = 1;
 
 
-    // For unresolved and partially resolved inconsistencies of the spec, see
-    // https://lists.w3.org/Archives/Public/public-rdf-comments/2017Jun/0000.html
-    _WS                 = [\x09\x20];
-    WS                  = _WS+;
-    EOL                 = [\x0D\x0A] (_WS | [\x0D\x0A])*;
-    DOT                 = [.];
-    HEX                 = [0-9A-Fa-f];
-    ECHAR               = [\\] [tbnrf"'\\];
-    UCHAR               = "\\u" HEX{4} | "\\U" HEX{8};
-    PN_CHARS_BASE       = [A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF];
-    PN_CHARS_U          = PN_CHARS_BASE | '_' | ':';
-    PN_CHARS            = PN_CHARS_U | '-' | [0-9\u00B7\u0300-\u036F\u203F-\u2040];
-    IRI_CHARS           = ([^\x00-\x20<>"{}|^`\\] | UCHAR)*;
-    LITERAL_QUOTE       = ["] ([^\x22\x5C\x0A\x0D] | ECHAR|UCHAR)* ["];
-    LANGTAG             = [@] [a-zA-Z]+ ("-" [a-zA-Z0-9]+)*;
-
-    IRIREF              = [<] IRI_CHARS [>];
-    LITERAL             = LITERAL_QUOTE @lit_data_e _WS* ("^^" _WS* @dtype_s IRIREF | @lang_s LANGTAG)?;
-    BNODE               = "_:" ((PN_CHARS_U | [0-9]) ((PN_CHARS | ".")* PN_CHARS)?);
-    COMMENT             = "#" .*;
+    // Character classes.
+    EOL             = [\x0A\x0D];
+    NCWS            = [\x09\x20] | EOL;
+    HEX             = [\x30-\x39\x41-\x46];
+    CHARACTER       = "\\u" HEX{4} | "\\U" HEX{8} | '\\' | [\x20-\x5B] | [\u005D-\U0010FFFF];
+    NSTART_CHAR     = [a-zA-Z_] | [\u00C0-\u00D6] | [\u00D8-\u00F6] | [\u00F8-\u02FF] | [\u0370-\u037D] | [\u037F-\u1FFF] | [\u200C-\u200D] | [\u2070-\u218F] | [\u2C00-\u2FEF] | [\u3001-\uD7FF] | [\uF900-\uFDCF] | [\uFDF0-\uFFFD] | [\U00010000-\U000EFFFF];
+    NAME_CHAR       = NAME_START_CHAR | '-' | [0-9\u00B7\u0300-\u036F\u203F-\u2040];
+    ECHAR           = CHARACTER | [\t\n\r];
+    UCHAR           = (CHARACTER \ [\x3E]) | '>';
+    SCHAR           = (CHARACTER \ [\x22]) | '"';
+    LCHAR           = ECHAR | ["\x09\x0A\x0D];
+
+    // Constructs.
+    COMMENT         = '#' ( [^\x0A\x0D] )*;
+    WS              = NCWS+ | COMMENT;
+    EXPONENT        = [eE] INTEGER;
+    LANGUAGE        = [a-z]+ ('-' [a-z0-9]+)*;
+    NODE_ID         = '_:' NAME;
+    REL_IRI         = UCHAR*;
+
+    // Token aliases.
+    IRIREF          = '<' REL_IRI '>';
+    PFX_NAME        = (NSTART_CHAR \ [_]) NAME_CHAR*;
+    NAME            = NSTART_CHAR NAME_CHAR*;
+    LSTRING         = \x22 \x22 \x22 LCHAR \x22 \x22 \x22;
+    STRING          = \x22 SCHAR \x22;
+    LANGTAG         = '@' LANGUAGE
+    INTEGER         = ('-' | '+')? [0-9]+;
+    DOUBLE          = ('-' | '+') ? ([0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT);
+    DECIMAL         = ('-' | '+')? ( [0-9]+ '.' [0-9]* | '.' ([0-9])+ | ([0-9])+ );
+    BOOLEAN         = 'true' | 'false';
+    //RDF_TYPE        = NCWS 'a' / WS;
 
 
+    $ {
+        log_debug ("End of document.");
+        return T_EOF;
+    }
+
+    '.' {
+        log_debug ("End of statement #%u.", it->stmt);
+        it->stmt++;
+        return T_EOS;
+    }
+
     EOL {
         it->line ++;
         it->bol = YYCURSOR;
         log_debug ("New line: #%u.", it->line);
-        return T_EOL;
+        goto loop;
     }
 
-    $ {
-        log_debug ("End of buffer.");
-        return T_EOF;
+    IRIREF {
+        *token_p = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
+        log_debug ("URI data: %s", (*token_p)->data);
+
+        return T_IRIREF;
     }
 
-    IRIREF {
-        YYCTYPE *data = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
+    PFX_NAME {
+        *token_p = unescape_unicode (it->tok, YYCURSOR - it->tok - 1);
+        log_debug ("Prefix name: %s", (*token_p)->data);
 
-        log_debug ("URI data: %s", data);
+        return T_PFX_NAME;
+    }
 
-        *term = LSUP_iriref_new ((char*)data, NULL);
-        free (data);
+    NAME {
+        *token_p = unescape_unicode (it->tok, YYCURSOR - it->tok - 1);
+        log_debug ("name: %s", (*token_p)->data);
 
-        return T_IRIREF;
+        return T_NAME;
     }
 
-    LITERAL {
-        // Only unescape Unicode from data.
-        size_t size = lit_data_e - it->tok - 2;
-        YYCTYPE *data = unescape_unicode (it->tok + 1, size);
-        log_trace ("Literal data: %s", data);
-
-        char *metadata = NULL;
-        const YYCTYPE *md_marker;
-        LSUP_TermType type = LSUP_TERM_LITERAL;
-
-        if (dtype_s) {
-            md_marker = dtype_s;
-            size = YYCURSOR - md_marker - 1;
-        } else if (lang_s) {
-            type = LSUP_TERM_LT_LITERAL;
-            md_marker = lang_s;
-            size = YYCURSOR - md_marker;
-        } else md_marker = NULL;
-
-        if (md_marker) {
-            metadata = malloc (size);
-            memcpy (metadata, md_marker + 1, size);
-            metadata [size - 1] = '\0';
-            log_trace ("metadata: %s", metadata);
-        }
+    LSTRING {
+        *token_p = unescape_unicode (it->tok + 3, YYCURSOR - it->tok - 4);
+        log_debug ("Long string: %s", (*token_p)->data);
 
-        if (type == LSUP_TERM_LITERAL) {
-            LSUP_Term *dtype;
-            dtype = (
-                metadata ? LSUP_iriref_new ((char *) metadata, NULL) : NULL);
+        return T_LSTRING;
+    }
 
-            *term = LSUP_literal_new ((char *) data, dtype);
+    STRING {
+        *token_p = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
+        log_debug ("Long string: %s", (*token_p)->data);
 
-        } else *term = LSUP_lt_literal_new ((char *) data, (char *) metadata);
+        return T_STRING;
+    }
 
-        free (data);
-        free (metadata);
+    LANGTAG {
+        (*token_p)->data = strndup (it->tok + 1, YYCURSOR - it->tok - 1);
+        log_debug ("Lang tag: %s", (*token_p)->data);
 
-        return T_LITERAL;
+        return T_LANGTAG;
     }
 
-    BNODE {
-        YYCTYPE *data = unescape_unicode (it->tok + 2, YYCURSOR - it->tok - 2);
+    INTEGER {
+        // Normalize sign.
+        size_t offset = *it->tok == '+' ? 1 : 0;
 
-        log_debug ("BNode data: %s", data);
+        (*token_p)->data = strndup (it->tok + offset, YYCURSOR - it->tok - 1);
+        log_debug ("Integer: %s", (*token_p)->data);
 
-        *term = LSUP_term_new (LSUP_TERM_BNODE, (char*)data, NULL);
-        free (data);
+        return T_INTEGER;
+    }
+
+    DOUBLE {
+        // Normalize sign.
+        size_t offset = *it->tok == '+' ? 1 : 0;
+
+        (*token_p)->data = strndup (it->tok + offset, YYCURSOR - it->tok - 1);
+        log_debug ("Integer: %s", (*token_p)->data);
 
-        return T_BNODE;
+        return T_DOUBLE;
     }
 
-    DOT {
-        log_debug ("End of triple.");
-        it->ct ++;
+    DECIMAL {
+        // Normalize sign.
+        YYCTYPE offset = *it->tok == '+' ? 1 : 0;
 
-        return T_DOT;
+        // Normalize trailing zeros in fractional part.
+        size_t size = YYCURSOR - it->tok - 1;
+        if (strchr (it->tok, '.'))
+            for (YYCTYPE i = YYCURSOR; *i == '0'; i--) limit --;
+
+        (*token_p)->data = strndup (it->tok + offset, size);
+        log_debug ("Integer: %s", (*token_p)->data);
+
+        return T_DECIMAL;
     }
 
-    WS {
-        log_debug ("Separator.");
+    BOOLEAN {
+        (*token_p)->data = strndup (it->tok, YYCURSOR - it->tok - 1);
+        log_debug ("Boolean: %s", (*token_p)->data);
 
-        return T_WS;
+        return T_BOOLEAN;
     }
 
+    '(' { return T_LPAREN; }
+    ')' { return T_RPAREN; }
+    '[' { return T_LBRACKET; }
+    ']' { return T_RBRACKET; }
+    ';' { return T_SEMICOLON; }
+    ',' { return T_COMMA; }
+    ':' { return T_COLON; }
+    'a' { return T_RDF_TYPE; }
+    '_:' { return T_BNODE_PFX; }
+    '^^' { return T_DTYPE_MARKER; }
+    '@base' {return T_BASE; }
+    '@prefix' {return T_PREFIX; }
+
     COMMENT {
         size_t size = YYCURSOR - it->tok + 1;
         YYCTYPE *data = malloc (size);
@@ -303,14 +353,15 @@ loop:
 
 
 LSUP_rc
-LSUP_nt_parse_term (const char *rep, const LSUP_NSMap *map, LSUP_Term **term)
+LSUP_ttl_parse_term (const char *rep, const LSUP_NSMap *map, LSUP_Term **term)
 {
     FILE *fh = fmemopen ((void *)rep, strlen (rep), "r");
 
     ParseIterator it;
     parse_init (&it, fh);
 
-    int ttype = lex (&it, term);
+    ParserToken **token_p;
+    int ttype = lex (&it, token_p);
 
     fclose (fh);
 
@@ -325,7 +376,7 @@ LSUP_nt_parse_term (const char *rep, const LSUP_NSMap *map, LSUP_Term **term)
 }
 
 LSUP_rc
-LSUP_nt_parse_doc (FILE *fh, LSUP_Graph **gr_p, size_t *ct, char **err_p)
+LSUP_ttl_parse_doc (FILE *fh, LSUP_Graph **gr_p, size_t *ct, char **err_p)
 {
     *err_p = NULL;
     *gr_p = NULL;
@@ -337,8 +388,9 @@ LSUP_nt_parse_doc (FILE *fh, LSUP_Graph **gr_p, size_t *ct, char **err_p)
 
     LSUP_rc rc;
 
+    LSUP_NSMap *nsm = LSUP_nsmap_new();
     LSUP_Graph *gr = LSUP_graph_new (
-            LSUP_iriref_new (NULL, NULL), LSUP_STORE_HTABLE, NULL, NULL, 0);
+            LSUP_iriref_new (NULL, NULL), LSUP_STORE_HTABLE, NULL, nsm, 0);
     if (UNLIKELY (!gr)) return LSUP_MEM_ERR;
 
     LSUP_GraphIterator *it = LSUP_graph_add_init (gr);
@@ -347,10 +399,10 @@ LSUP_nt_parse_doc (FILE *fh, LSUP_Graph **gr_p, size_t *ct, char **err_p)
         return LSUP_MEM_ERR;
     }
 
-    LSUP_Term *term = NULL;
+    ParserToken **token_p;
 
     for (;;) {
-        int ttype = lex (&parse_it, &term);
+        int ttype = lex (&parse_it, token_p);
 
         if (ttype == -1) {
             char token[16] = {'\0'};
@@ -373,7 +425,7 @@ LSUP_nt_parse_doc (FILE *fh, LSUP_Graph **gr_p, size_t *ct, char **err_p)
             goto finally;
         }
 
-        Parse (parser, ttype, term, it);
+        Parse (parser, ttype, token_p, it);
 
         if (ttype == T_EOF) break;
     };

+ 0 - 58
src/codec/ttl_grammar.y

@@ -1,58 +0,0 @@
-%include {
-
-/** @brief Lemon parser grammar for N-Triples.
- *
- * The `lemon' parser generator executable must be in your PATH:
- * https://sqlite.org/src/doc/trunk/doc/lemon.html
- *
- * To generate the parser, run: `lemon ${FILE}'
- */
-
-#include "graph.h"
-}
-
-
-%token_type { LSUP_Term * }
-%token_prefix "T_"
-
-%type triple            { LSUP_Triple * }
-%destructor triple      { LSUP_triple_free ($$); }
-%type subject           { LSUP_Term * }
-%destructor subject     { LSUP_term_free ($$); }
-%type predicate         { LSUP_Term * }
-%destructor predicate   { LSUP_term_free ($$); }
-%type object            { LSUP_Term * }
-%destructor object      { LSUP_term_free ($$); }
-%default_type           { void * }
-
-%extra_argument         { LSUP_GraphIterator *it }
-
-
-// Rules.
-
-ntriplesDoc ::= triples EOF.
-
-triples     ::= eol.
-triples     ::= triple eol.
-triples     ::= triples triple eol.
-
-triple(A)   ::= ws subject(S) ws predicate(P) ws object(O) ws DOT. {
-
-                A = LSUP_triple_new (S, P, O);
-                LSUP_graph_add_iter (it, A);
-            }
-
-subject     ::= IRIREF.
-subject     ::= BNODE.
-
-predicate   ::= IRIREF.
-
-object      ::= IRIREF.
-object      ::= BNODE.
-object      ::= LITERAL.
-
-eol         ::= EOL.
-eol         ::= eol EOL.
-
-ws          ::=.
-ws          ::= WS.

+ 1 - 1
src/codec_nt.c

@@ -1,5 +1,5 @@
 #include "codec_nt.h"
-#include "nt_parser.h"
+#include "parser_nt.h"
 
 /** @brief List of characters to be escaped in serialized literals.
  *

+ 21 - 21
src/codec_ttl.c

@@ -1,5 +1,5 @@
-#include "codec_nt.h"
-#include "nt_parser.h"
+#include "codec_ttl.h"
+#include "parser_ttl.h"
 
 /** @brief List of characters to be escaped in serialized literals.
  *
@@ -22,7 +22,7 @@ static LSUP_rc escape_lit (const char *in, char **out_p);
 /* * * Codec functions. * * */
 
 static LSUP_rc
-term_to_nt (const LSUP_Term *term, const LSUP_NSMap *nsm, char **out_p)
+term_to_ttl (const LSUP_Term *term, const LSUP_NSMap *nsm, char **out_p)
 {
     LSUP_rc rc;
     char *out = NULL, *tmp, *escaped;
@@ -117,17 +117,17 @@ term_to_nt (const LSUP_Term *term, const LSUP_NSMap *nsm, char **out_p)
 
 
 static LSUP_CodecIterator *
-gr_to_nt_init (const LSUP_Graph *gr);
+gr_to_ttl_init (const LSUP_Graph *gr);
 
 
 static LSUP_rc
-gr_to_nt_iter (LSUP_CodecIterator *it, unsigned char **res) {
+gr_to_ttl_iter (LSUP_CodecIterator *it, unsigned char **res) {
     LSUP_rc rc = LSUP_graph_iter_next (it->gr_it, it->trp);
     if (rc != LSUP_OK) goto finally;
 
-    term_to_nt (it->trp->s, it->nsm, &it->str_s);
-    term_to_nt (it->trp->p, it->nsm, &it->str_p);
-    term_to_nt (it->trp->o, it->nsm, &it->str_o);
+    term_to_ttl (it->trp->s, it->nsm, &it->str_s);
+    term_to_ttl (it->trp->p, it->nsm, &it->str_p);
+    term_to_ttl (it->trp->o, it->nsm, &it->str_o);
 
     // 3 term separators + dot + newline + terminal = 6
     unsigned char *tmp = realloc (
@@ -154,7 +154,7 @@ finally:
 
 
 static void
-gr_to_nt_done (LSUP_CodecIterator *it)
+gr_to_ttl_done (LSUP_CodecIterator *it)
 {
     LSUP_graph_iter_free (it->gr_it);
     LSUP_triple_free (it->trp);
@@ -166,19 +166,19 @@ gr_to_nt_done (LSUP_CodecIterator *it)
 }
 
 
-const LSUP_Codec nt_codec = {
-    .name               = "N-Triples",
-    .mimetype           = "application/n-triples",
-    .extension          = "nt",
+const LSUP_Codec ttl_codec = {
+    .name               = "Turtle",
+    .mimetype           = "text/turtle",
+    .extension          = "ttl",
 
-    .encode_term        = term_to_nt,
+    .encode_term        = term_to_ttl,
 
-    .encode_graph_init  = gr_to_nt_init,
-    .encode_graph_iter  = gr_to_nt_iter,
-    .encode_graph_done  = gr_to_nt_done,
+    .encode_graph_init  = gr_to_ttl_init,
+    .encode_graph_iter  = gr_to_ttl_iter,
+    .encode_graph_done  = gr_to_ttl_done,
 
-    .decode_term        = LSUP_nt_parse_term,
-    .decode_graph       = LSUP_nt_parse_doc,
+    .decode_term        = LSUP_ttl_parse_term,
+    .decode_graph       = LSUP_ttl_parse_doc,
 };
 
 
@@ -201,12 +201,12 @@ static inline char replace_char(const char c) {
 
 
 static LSUP_CodecIterator *
-gr_to_nt_init (const LSUP_Graph *gr)
+gr_to_ttl_init (const LSUP_Graph *gr)
 {
     LSUP_CodecIterator *it;
     MALLOC_GUARD (it, NULL);
 
-    it->codec = &nt_codec;
+    it->codec = &ttl_codec;
     it->gr_it = LSUP_graph_lookup(gr, NULL, NULL, NULL, &it->cur);
     it->nsm = LSUP_graph_namespace (gr);
     it->cur = 0;

+ 5 - 0
src/graph.c

@@ -447,6 +447,11 @@ LSUP_graph_iter_next (LSUP_GraphIterator *it, LSUP_Triple *spo)
 }
 
 
+const LSUP_Graph *
+LSUP_graph_iter_graph (LSUP_GraphIterator *it)
+{ return it->graph; }
+
+
 void
 LSUP_graph_iter_free (LSUP_GraphIterator *it)
 {