ソースを参照

First pass at complete TTL codec.

Stefano Cossu 2 年 前
コミット
e16d772b6d
15 ファイル変更754 行追加1599 行削除
  1. 2 1
      Makefile
  2. 18 3
      include/buffer.h
  3. 169 0
      include/codec.h
  4. 1 1
      include/codec_nt.h
  5. 1 1
      include/codec_ttl.h
  6. 1 1
      include/core.h
  7. 19 20
      include/term.h
  8. 131 4
      src/buffer.c
  9. 198 0
      src/codec.c
  10. 0 1240
      src/codec/grammar_nt.c
  11. 128 83
      src/codec/grammar_ttl.y
  12. 8 85
      src/codec/lexer_nt.re
  13. 75 157
      src/codec/lexer_ttl.re
  14. 1 1
      src/core.c
  15. 2 2
      src/term.c

+ 2 - 1
Makefile

@@ -119,10 +119,11 @@ parsers: $(PARSER_SRC) ## Make intermediate parser sources for development.
 
 # Codecs.
 # Parser C sources.
-$(CODEC_DIR)/parser_%.c: $(CODEC_DIR)/lexer_%.re $(CODEC_DIR)/grammar_%.c $(CODEC_DIR)/grammar_%.h
+$(CODEC_DIR)/parser_%.c: $(CODEC_DIR)/lexer_%.re $(CODEC_DIR)/grammar_%.c $(CODEC_DIR)/grammar_%.h src/codec.c
 	$(LEXER) $< -o $@ -T --case-ranges
 
 
+.SECONDARY: $(CODEC_DIR)/grammar_%.c
 # Parser generators.
 $(CODEC_DIR)/grammar_%.c $(CODEC_DIR)/grammar_%.h: $(CODEC_DIR)/grammar_%.y
 	$(PARSER) $< -q -T$(CODEC_DIR)/lempar.c -d$(CODEC_DIR)

+ 18 - 3
include/buffer.h

@@ -19,7 +19,7 @@
  * without being freed between iterations, by using #LSUP_buffer_init.
  */
 typedef struct LSUP_Buffer {
-    /*@null@*/ void *addr;
+    /*@null@*/ unsigned char *addr;
     size_t size;
 } LSUP_Buffer;
 
@@ -61,7 +61,8 @@ typedef enum {
  *  existing data are preserved as with a normal realloc().
  */
 LSUP_rc
-LSUP_buffer_init (LSUP_Buffer *buf, const size_t size, const void *data);
+LSUP_buffer_init (
+        LSUP_Buffer *buf, const size_t size, const unsigned char *data);
 
 
 /** @brief Create a new buffer and optionally populate it with data.
@@ -79,7 +80,7 @@ LSUP_buffer_init (LSUP_Buffer *buf, const size_t size, const void *data);
  *  on error.
  */
 inline LSUP_Buffer *
-LSUP_buffer_new (const void *data, const size_t size)
+LSUP_buffer_new (const unsigned char *data, const size_t size)
 {
     LSUP_Buffer *buf;
     CALLOC_GUARD (buf, NULL);
@@ -109,6 +110,20 @@ void LSUP_buffer_done (LSUP_Buffer *buf);
 void LSUP_buffer_free (LSUP_Buffer *buf);
 
 
+/** @brief Replace \uxxxx and \Uxxxxxxxx with Unicode bytes.
+ *
+ * This can also be used as a generic method to generate a buffer from a
+ * string.
+ *
+ * @param[in] esc_str ASCII- or UTF8-encoded string with escaped Unicode code
+ *  points.
+ *
+ * @return Byte buffer with unescaped Unicode points. Encoding is not
+ *  specified.
+ */
+LSUP_Buffer *LSUP_buffer_from_str (const char *str);
+
+
 /** @brief Hash a buffer.
  */
 inline LSUP_Key

+ 169 - 0
include/codec_base.h → include/codec.h

@@ -4,6 +4,16 @@
 #include "graph.h"
 
 
+/**
+ * Max data size passed to the scanner and parser at each iteration.
+ */
+#ifdef LSUP_RDF_STREAM_CHUNK_SIZE
+#define CHUNK_SIZE LSUP_RDF_STREAM_CHUNK_SIZE
+#else
+#define CHUNK_SIZE 8192
+#endif
+
+
 typedef struct codec_t LSUP_Codec;
 
 
@@ -29,6 +39,24 @@ typedef struct codec_iter_t {
          *              str_o;      // Temporary string.
 } LSUP_CodecIterator;
 
+/// Predicate and object list. Used for Turtle.
+typedef struct {
+    LSUP_Term **    p;              ///< NULL-terminated array of term handles.
+    LSUP_Term ***   o;              /**<
+                                      * NULL-terminated array of
+                                      * NULL-terminated arrays of term handles.
+                                      * The indices of the outer array are
+                                      * equal to the indices of the associated
+                                      * predicate in the predicate list.
+                                      */
+} LSUP_PredObjList;
+
+/// Parser state.
+typedef struct {
+    LSUP_GraphIterator *    it;     ///< Iterator used to build the graph.
+    LSUP_NSMap *            nsm;    ///< NS map used in the document.
+    LSUP_Term *             base;   ///< Base IRI used in the document.
+} LSUP_TTLParserState;
 
 /** @brief Parse error information.
  *
@@ -42,6 +70,10 @@ typedef struct parse_error_t {
 */
 
 
+/*
+ * Interface prototypes.
+ */
+
 /** @brief Term encoder callback type.
  *
  * @param[in] term Single term handle.
@@ -190,4 +222,141 @@ struct codec_t {
     gr_decode_fn_t      decode_graph;     // Graph decoder function.
 };
 
+
+/*
+ * Common utility functions.
+ */
+
+/** @brief strdup() for unsigned char.
+ *
+ * This is to be used with uint8_t sequences considered to be UTF-8 sequences,
+ * requird by re2c (it won't work with byte sequences containing `NUL`).
+ */
+inline uint8_t
+*uint8_dup (const uint8_t *str)
+{ return (uint8_t *) strdup ((char *) str); }
+
+
+/** @brief strndup() for unsigned char.
+ *
+ * This is to be used with uint8_t sequences considered to be UTF-8 sequences,
+ * requird by re2c (it won't work with byte sequences containing `NUL`).
+ */
+inline uint8_t
+*uint8_ndup (const uint8_t *str, size_t size)
+{ return (uint8_t *) strndup ((char *) str, size); }
+
+
+/** @brief Unescape a single character.
+ *
+ * Convert escaped special characters such as `\t`, `\n`, etc. into their
+ * corresponding code points.
+ *
+ * Non-special characters are returned unchanged.
+ *
+ * @param[in] c Character to unescape. Note that this is the single character
+ * after `\`.
+ * 
+ * @return Code point corresponding to the escaped character.
+ */
+inline char
+unescape_char (const char c)
+{
+    switch (c) {
+        case 't': return '\t';
+        case 'b': return '\b';
+        case 'n': return '\n';
+        case 'r': return '\r';
+        case 'f': return '\f';
+        default: return c;
+    }
+}
+
+/** @brief Replace \uxxxx and \Uxxxxxxxx with Unicode bytes.
+ *
+ * TODO This does not encode UTF-16 yet. An UTF-16 version should change
+ * signature because it cannot output a plain NUL-terminated string.
+ *
+ * @param[in] esc_str Escaped string.
+ *
+ * @return String with escape sequences replaced by Unicode bytes.
+ */
+uint8_t *unescape_unicode (const uint8_t *esc_str);
+
+
+/** @brief Add an object to an objct list.
+ *
+ * @param[in] ol Array of object handles to be added to. On success, this
+ *  handle will be reallocated and the new address returned, so the passed
+ *  handle should no longer be used. On failure, it remains unchanged and may
+ *  be reused.
+ *
+ * @param[in] o Object to be added to the list.
+ *
+ * @return Reallocated list on success; NULL on failure.
+ */
+LSUP_Term **
+LSUP_obj_list_add (LSUP_Term **ol, LSUP_Term *o);
+
+
+/** @brief New predicate-object list.
+ *
+ * @return a new empty predicate-object list.
+ */
+LSUP_PredObjList *
+LSUP_pred_obj_list_new (void);
+
+
+/** @brief Free a predicate-object list.
+ *
+ * @param[in] pol Predicate-object list handle obtained with
+ *  #LSUP_pred_obj_list_new().
+ */
+void
+LSUP_pred_obj_list_free (LSUP_PredObjList *pol);
+
+
+/** @brief Add a predicate-object list pair to a PO list.
+ *
+ * @param[in] pol Predicate-object list handle obtained with
+ *  #LSUP_pred_obj_list_new().
+ *
+ *  @param[in] p Predicate to be associated with the given object list.
+ *
+ *  @param[in] o NULL-terminated array of object term handles to be associated
+ *   with the given predicate. 
+ *
+ * @return LSUP_OK on success; LSUP_MEM_ERR on allocation error.
+ */
+LSUP_rc
+LSUP_pred_obj_list_add (LSUP_PredObjList *pol, LSUP_Term *p, LSUP_Term **o);
+
+
+/** @brief Add triples for a subject and a PO list to a graph.
+ *
+ * @param[in] it Graph iterator to use for insertion.
+ *
+ * @param[in] s Subject of all the triples.
+ *
+ * @param[in] po Predicate-object list.
+ */
+LSUP_rc
+LSUP_spo_list_add_triples (
+        LSUP_GraphIterator *it, LSUP_Term *s, const LSUP_PredObjList *po);
+
+
+/** @brief Add triples for an anonymous collection to a graph.
+ *
+ * The `rdf:first`, `rdf:rest`, etc. terms are automatically added and the term
+ * for the first item in the list is returned.
+ *
+ * @param[in] it Graph iterator to use for insertion.
+ *
+ * @param[in] ol NULL-terminated term array.
+ *
+ * @return Blank node representing the first list item.
+ */
+LSUP_Term *
+LSUP_bnode_add_collection (LSUP_GraphIterator *it, LSUP_Term **ol);
+
 #endif

+ 1 - 1
include/codec_nt.h

@@ -1,7 +1,7 @@
 #ifndef _LSUP_CODEC_NT_H
 #define _LSUP_CODEC_NT_H
 
-#include "codec_base.h"
+#include "codec.h"
 
 /** @brief N-Triples codec.
  */

+ 1 - 1
include/codec_ttl.h

@@ -1,7 +1,7 @@
 #ifndef _LSUP_CODEC_TTL_H
 #define _LSUP_CODEC_TTL_H
 
-#include "codec_base.h"
+#include "codec.h"
 
 /** @brief Turtle codec.
  */

+ 1 - 1
include/core.h

@@ -235,7 +235,7 @@ LSUP_strerror (LSUP_rc rc);
  * @return number of bytes on success, 0 on failure (also produces U+FFFD,
  *  which uses 3 bytes)
  */
-inline int utf8_encode(const uint32_t utf, unsigned char *out)
+inline int utf8_encode (const uint32_t utf, unsigned char *out)
 {
   if (utf <= 0x7F) {
     // Plain ASCII

+ 19 - 20
include/term.h

@@ -9,22 +9,6 @@
 
 #define UUID4_URN_SIZE UUIDSTR_SIZE + 10
 
-/*
- * Term types.
- */
-/* Undefined placeholder or result of an error. Invalid for most operations. */
-#define LSUP_TERM_UNDEFINED     0
-/* IRI reference. */
-#define LSUP_TERM_IRIREF        1
-/* Namespace-prefixed IRI reference. */
-#define LSUP_TERM_NS_IRIREF     2
-/* Literal without language tag. */
-#define LSUP_TERM_LITERAL       3
-/* Language-tagged string literal. */
-#define LSUP_TERM_LT_LITERAL    4
-/* Blank node. */
-#define LSUP_TERM_BNODE         5
-
 /** @brief Default data type for untyped literals (prefixed IRI).
  */
 #define DEFAULT_DTYPE           "http://www.w3.org/2001/XMLSchema#string"
@@ -55,15 +39,29 @@
  * Data types.
  */
 
-typedef char LSUP_TermType;
+/// Language tag, currently restricted to 7 characters.
 typedef char LSUP_LangTag[8];
 
+/// Term type.
+typedef enum {
+    LSUP_TERM_UNDEFINED = 0,/**<
+                              * Undefined placeholder or result of an error.
+                              * Invalid for most operations.
+                              */
+    LSUP_TERM_IRIREF,       ///< IRI reference.
+    LSUP_TERM_NS_IRIREF,    ///< Namespace-prefixed IRI reference.
+    LSUP_TERM_LITERAL,      ///< Literal without language tag.
+    LSUP_TERM_LT_LITERAL,   ///< Language-tagged string literal.
+    LSUP_TERM_BNODE,        ///< Blank node.
+} LSUP_TermType;
+
 /** @brief IRI information.
  *
  * See regex matching group for #LSUP_URI_REGEX_STR for more information.
  */
 typedef struct iri_info_t LSUP_IRIInfo;
 
+/// RDF term.
 typedef struct term_t {
     char *              data;       // URI, literal value, or BNode label.
     union {
@@ -129,7 +127,7 @@ extern LSUP_Term *LSUP_default_datatype;
 
 
 /*
- * Function prototypes.
+ * API functions.
  */
 
 /** @brief Create a new term.
@@ -141,7 +139,8 @@ extern LSUP_Term *LSUP_default_datatype;
  * @param type[in] Term type. One of #LSUP_TermType.
  *
  * @param data[in] Term data: textual URI, literal value without data type
- *  or langtag, etc.
+ *  or langtag, etc. It may be NULL for IRI refs and BNodes, in which case a
+ *  random identifier is generated.
  *
  * @param metadata[in] Namespace map (LSUP_NSMap *) for IRI refs; language tag
  * (LSUP_LangTag *) for language-tagged literals; or data type (LSUP_Term *)
@@ -188,7 +187,7 @@ LSUP_iriref_new (const char *data, LSUP_NSMap *nsm)
  * If the provided IRI is already a fully qualified IRI (i.e. it has a prefix)
  * the result is semantically identical to the input.
  *
- * If the provided IRI begins with a '/', the resulting IRI is relative to the
+ * If the relative IRI begins with a '/', the resulting IRI is relative to the
  * web root of the root IRI. I.e. if a root IRI has a path after the webroot,
  * it is ignored.
  *

+ 131 - 4
src/buffer.c

@@ -2,20 +2,22 @@
 
 /* * * Inline extern prototypes * * */
 
-LSUP_Buffer *LSUP_buffer_new (const void *data, const size_t size);
+LSUP_Buffer *LSUP_buffer_new (const unsigned char *data, const size_t size);
 LSUP_rc LSUP_buffer_init (
-        LSUP_Buffer *buf, const size_t size, const void *data);
+        LSUP_Buffer *buf, const size_t size, const unsigned char *data);
 bool LSUP_buffer_eq (const LSUP_Buffer *buf1, const LSUP_Buffer *buf2);
 int LSUP_buffer_cmp (const LSUP_Buffer *buf1, const LSUP_Buffer *buf2);
+static inline char unescape_char (const char c);
 
 
 /* * * API * * */
 
 LSUP_rc
-LSUP_buffer_init (LSUP_Buffer *buf, const size_t size, const void *data)
+LSUP_buffer_init (
+        LSUP_Buffer *buf, const size_t size, const unsigned char *data)
 {
     // If size is zero, addr becomes NULL.
-    void *tmp = realloc (buf->addr, size);
+    unsigned char *tmp = realloc (buf->addr, size);
     if (UNLIKELY (size > 0 && tmp == NULL)) return LSUP_MEM_ERR;
 
     buf->addr = tmp;
@@ -27,6 +29,116 @@ LSUP_buffer_init (LSUP_Buffer *buf, const size_t size, const void *data)
 }
 
 
+int LSUP_utf8_encode (const uint32_t utf, unsigned char *out)
+{
+  if (utf <= 0x7F) {
+    // Plain ASCII
+    out[0] = (char) utf;
+    out[1] = 0;
+    return 1;
+  }
+  else if (utf <= 0x07FF) {
+    // 2-byte unicode
+    out[0] = (char) (((utf >> 6) & 0x1F) | 0xC0);
+    out[1] = (char) (((utf >> 0) & 0x3F) | 0x80);
+    out[2] = 0;
+    return 2;
+  }
+  else if (utf <= 0xFFFF) {
+    // 3-byte unicode
+    out[0] = (char) (((utf >> 12) & 0x0F) | 0xE0);
+    out[1] = (char) (((utf >>  6) & 0x3F) | 0x80);
+    out[2] = (char) (((utf >>  0) & 0x3F) | 0x80);
+    out[3] = 0;
+    return 3;
+  }
+  else if (utf <= 0x10FFFF) {
+    // 4-byte unicode
+    out[0] = (char) (((utf >> 18) & 0x07) | 0xF0);
+    out[1] = (char) (((utf >> 12) & 0x3F) | 0x80);
+    out[2] = (char) (((utf >>  6) & 0x3F) | 0x80);
+    out[3] = (char) (((utf >>  0) & 0x3F) | 0x80);
+    out[4] = 0;
+    return 4;
+  }
+  else {
+    // error - use replacement character
+    out[0] = (char) 0xEF;
+    out[1] = (char) 0xBF;
+    out[2] = (char) 0xBD;
+    out[3] = 0;
+    return 0;
+  }
+}
+
+
+LSUP_Buffer *LSUP_buffer_from_str (const char *str)
+{
+    LSUP_Buffer *buf;
+    MALLOC_GUARD (buf, NULL);
+    buf->addr = malloc (strlen (str) + 1);
+    buf->size = 1; // At least the NUL byte.
+
+    size_t j = 0;
+    unsigned char tmp_chr[5];
+    for (size_t i = 0; i < strlen (str);) {
+        if (str[i] == '\\') {
+            i++; // Skip over '\\'
+
+            // 4-hex sequence.
+            if (str[i] == 'u') {
+                i ++; // Skip over 'u'
+
+                // Use tmp_chr to hold the hex string for the code point.
+                memcpy(tmp_chr, str + i, sizeof (tmp_chr) - 1);
+                tmp_chr[4] = '\0';
+
+                uint32_t tmp_val = strtol ((char*)tmp_chr, NULL, 16);
+                log_debug ("tmp_val: %d", tmp_val);
+
+                // Reuse tmp_chr to hold the byte values for the code point.
+                int nbytes = utf8_encode (tmp_val, tmp_chr);
+                if (nbytes == 0)
+                    log_error ("Error encoding code point: %u", tmp_val);
+
+                // Copy bytes into destination.
+                memcpy (buf->addr + j, tmp_chr, nbytes);
+                /*
+                log_trace (
+                        "UC byte value: %x %x",
+                        buf->addr[j], buf->addr[j + 1]);
+                */
+
+                j += nbytes;
+                i += 4;
+
+            // 8-hex sequence.
+            } else if (str[i] == 'U') {
+                i ++; // Skip over 'U'
+                log_error ("UTF-16 sequence unescaping not yet implemented.");
+                return NULL; // TODO encode UTF-16
+
+            // Unescape other escaped characters.
+            } else buf->addr[j++] = unescape_char(str[i++]);
+        } else {
+            // Copy ASCII char verbatim.
+            buf->addr[j++] = str[i++];
+        }
+        buf->size ++;
+    }
+
+    unsigned char *tmp = realloc (buf->addr, j + 1); // Compact data.
+    if (UNLIKELY (!tmp)) return NULL;
+    buf->addr = tmp;
+    buf->addr[j] = '\0';
+    buf->size ++;
+
+    return buf;
+}
+
+
+
+
 void LSUP_buffer_print (const LSUP_Buffer *buf)
 {
     for (size_t i = 0; i < buf->size; i++) {
@@ -153,6 +265,21 @@ LSUP_btriple_free_shallow (LSUP_BufferTriple *sspo)
 }
 
 
+/*
+ * Statics.
+ */
+
+static inline char unescape_char (const char c) {
+    switch (c) {
+        case 't': return '\t';
+        case 'b': return '\b';
+        case 'n': return '\n';
+        case 'r': return '\r';
+        case 'f': return '\f';
+        default: return c;
+    }
+}
+
 
 /* Extern inline prototypes. */
 

+ 198 - 0
src/codec.c

@@ -0,0 +1,198 @@
+#include "codec.h"
+
+uint8_t *unescape_unicode (const uint8_t *esc_str)
+{
+    size_t size = strlen ((char *)esc_str);
+    uint8_t *data = malloc (size + 1);
+
+    size_t j = 0;
+    uint8_t tmp_chr[5];
+    for (size_t i = 0; i < size;) {
+        if (esc_str[i] == '\\') {
+            i++; // Skip over '\\'
+
+            // 4-hex sequence.
+            if (esc_str[i] == 'u') {
+                i ++; // Skip over 'u'
+
+                // Use tmp_chr to hold the hex string for the code point.
+                memcpy (tmp_chr, esc_str + i, sizeof (tmp_chr) - 1);
+                tmp_chr[4] = '\0';
+
+                uint32_t tmp_val = strtol ((char*)tmp_chr, NULL, 16);
+                log_debug ("tmp_val: %d", tmp_val);
+
+                // Reuse tmp_chr to hold the byte values for the code point.
+                int nbytes = utf8_encode (tmp_val, tmp_chr);
+                if (nbytes == 0)
+                    log_error ("Error encoding sequence: \\u%s", tmp_chr);
+
+                // Copy bytes into destination.
+                memcpy (data + j, tmp_chr, nbytes);
+                log_trace ("UC byte value: %x %x", data[j], data[j + 1]);
+
+                j += nbytes;
+                i += 4;
+
+            // 8-hex sequence.
+            } else if (esc_str[i] == 'U') {
+                i ++; // Skip over 'U'
+                log_error ("4-byte sequence encoding is not yet implemented.");
+                return NULL; // TODO encode 4-byte sequences
+
+            // Unescape other escaped characters.
+            } else data[j++] = unescape_char (esc_str[i++]);
+        } else {
+            // Copy ASCII char verbatim.
+            data[j++] = esc_str[i++];
+        }
+    }
+
+    data[j++] = '\0';
+    uint8_t *tmp = realloc (data, j); // Compact result.
+    if (UNLIKELY (!tmp)) return NULL;
+    data = tmp;
+
+    return data;
+}
+
+
+LSUP_Term **
+LSUP_obj_list_add (LSUP_Term **ol, LSUP_Term *o)
+{
+    size_t i = 0;
+    while (ol[i++]);
+    LSUP_Term **tmp = realloc (ol, sizeof (*ol) * (i + 1));
+    if (!tmp) return NULL;
+    tmp[i] = o;
+    tmp[i + 1] = NULL;
+
+    return ol;
+}
+
+
+LSUP_PredObjList *
+LSUP_pred_obj_list_new (void)
+{
+    /*
+     * Init state:
+     * {p: [NULL], o: [NULL]}
+     */
+    LSUP_PredObjList *po;
+    MALLOC_GUARD (po, NULL);
+    // Set sentinels.
+    CALLOC_GUARD (po->p, NULL);
+    CALLOC_GUARD (po->o, NULL);
+
+    return po;
+}
+
+
+void
+LSUP_pred_obj_list_free (LSUP_PredObjList *po)
+{
+    for (size_t i = 0; po->p[i]; i++) {
+        LSUP_term_free (po->p[i]);
+    }
+
+    for (size_t i = 0; po->o[i]; i++) {
+        for (size_t j = 0; po->o[i][j]; j++) {
+            LSUP_term_free (po->o[i][j]);
+        }
+    }
+
+    free (po);
+}
+
+
+LSUP_rc
+LSUP_pred_obj_list_add (LSUP_PredObjList *po, LSUP_Term *p, LSUP_Term **o)
+{
+    size_t i;
+
+    i = 0;
+    while (po->p[i++]);  // Count includes sentinel.
+    LSUP_Term **tmp_p = realloc (po->p, sizeof (*po->p) * (i + 1));
+    if (!tmp_p) return LSUP_MEM_ERR;
+    tmp_p[i] = p;
+    tmp_p[i + 1] = NULL;
+    po->p = tmp_p;
+
+    i = 0;
+    while (po->o[i++]);
+    LSUP_Term ***tmp_o = realloc (po->o, sizeof (*po->o) * (i + 1));
+    if (!tmp_o) return LSUP_MEM_ERR;
+    tmp_o[i] = o;
+    tmp_o[i + 1] = NULL;
+    po->o = tmp_o;
+
+    return LSUP_OK;
+}
+
+
+LSUP_rc
+LSUP_spo_list_add_triples (
+        LSUP_GraphIterator *it, LSUP_Term *s, const LSUP_PredObjList *po)
+{
+    LSUP_Triple *spo = LSUP_triple_new (s, NULL, NULL);
+    for (size_t i = 0; po->p[i]; i++) {
+        spo->p = po->p[i];
+        for (size_t j = 0; po->o[i][j]; j++) {
+            spo->o = po->o[i][j];
+
+            PRCCK (LSUP_graph_add_iter (it, spo));
+        }
+    }
+    free (spo);
+
+    return LSUP_OK;
+}
+
+
+LSUP_Term *
+LSUP_bnode_add_collection (LSUP_GraphIterator *it, LSUP_Term **ol)
+{
+    LSUP_NSMap *nsm = LSUP_graph_namespace (LSUP_graph_iter_graph (it));
+    LSUP_Term
+        *s = LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL),
+        *rdf_first = LSUP_iriref_new ("rdf:first", nsm),
+        *rdf_rest = LSUP_iriref_new ("rdf:rest", nsm),
+        *rdf_nil = LSUP_iriref_new ("rdf:nil", nsm),
+        *link;
+
+    LSUP_Triple *spo = TRP_DUMMY;
+    link = s;
+    for (size_t i = 0; ol[i]; i++) {
+        spo->s = link;
+        spo->p = rdf_first;
+        spo->o = ol[i];
+        PRCNL (LSUP_graph_add_iter (it, spo));
+
+        spo->p = rdf_rest;
+        spo->o = (
+                ol[i + 1] ? LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL)
+                : rdf_nil);
+
+        PRCNL (LSUP_graph_add_iter (it, spo));
+
+        if (link != s) LSUP_term_free (link);
+        // Current object becomes next subject. Irrelevant for last item.
+        link = spo->o;
+    }
+
+    LSUP_term_free (rdf_first);
+    LSUP_term_free (rdf_rest);
+    LSUP_term_free (rdf_nil);
+    free (spo);
+
+    return s;
+}
+
+
+/*
+ * Extern inline functions.
+ */
+
+char unescape_char (const char c);
+inline uint8_t *uint8_dup (const uint8_t *str);
+inline uint8_t *uint8_ndup (const uint8_t *str, size_t size);

+ 0 - 1240
src/codec/grammar_nt.c

@@ -1,1240 +0,0 @@
-/* This file is automatically generated by Lemon from input grammar
-** source file "src/codec/grammar_nt.y". */
-/** @brief Lemon parser grammar for N-Triples.
- *
- * The `lemon' parser generator executable must be in your PATH:
- * https://sqlite.org/src/doc/trunk/doc/lemon.html
- *
- * To generate the parser, run: `lemon ${FILE}'
- */
-
-#include "graph.h"
-#line 38 "src/codec/grammar_nt.c"
-#include "src/codec/grammar_nt.h"
-/**************** End of %include directives **********************************/
-/* These constants specify the various numeric values for terminal symbols.
-***************** Begin token definitions *************************************/
-#if INTERFACE
-#define T_EOF                             1
-#define T_DOT                             2
-#define T_IRIREF                          3
-#define T_BNODE                           4
-#define T_LITERAL                         5
-#define T_EOL                             6
-#define T_WS                              7
-#endif
-/**************** End token definitions ***************************************/
-
-/* The next sections is a series of control #defines.
-** various aspects of the generated parser.
-**    YYCODETYPE         is the data type used to store the integer codes
-**                       that represent terminal and non-terminal symbols.
-**                       "unsigned char" is used if there are fewer than
-**                       256 symbols.  Larger types otherwise.
-**    YYNOCODE           is a number of type YYCODETYPE that is not used for
-**                       any terminal or nonterminal symbol.
-**    YYFALLBACK         If defined, this indicates that one or more tokens
-**                       (also known as: "terminal symbols") have fall-back
-**                       values which should be used if the original symbol
-**                       would not parse.  This permits keywords to sometimes
-**                       be used as identifiers, for example.
-**    YYACTIONTYPE       is the data type used for "action codes" - numbers
-**                       that indicate what to do in response to the next
-**                       token.
-**    ParseTOKENTYPE     is the data type used for minor type for terminal
-**                       symbols.  Background: A "minor type" is a semantic
-**                       value associated with a terminal or non-terminal
-**                       symbols.  For example, for an "ID" terminal symbol,
-**                       the minor type might be the name of the identifier.
-**                       Each non-terminal can have a different minor type.
-**                       Terminal symbols all have the same minor type, though.
-**                       This macros defines the minor type for terminal 
-**                       symbols.
-**    YYMINORTYPE        is the data type used for all minor types.
-**                       This is typically a union of many types, one of
-**                       which is ParseTOKENTYPE.  The entry in the union
-**                       for terminal symbols is called "yy0".
-**    YYSTACKDEPTH       is the maximum depth of the parser's stack.  If
-**                       zero the stack is dynamically sized using realloc()
-**    ParseARG_SDECL     A static variable declaration for the %extra_argument
-**    ParseARG_PDECL     A parameter declaration for the %extra_argument
-**    ParseARG_PARAM     Code to pass %extra_argument as a subroutine parameter
-**    ParseARG_STORE     Code to store %extra_argument into yypParser
-**    ParseARG_FETCH     Code to extract %extra_argument from yypParser
-**    ParseCTX_*         As ParseARG_ except for %extra_context
-**    YYERRORSYMBOL      is the code number of the error symbol.  If not
-**                       defined, then do no error processing.
-**    YYNSTATE           the combined number of states.
-**    YYNRULE            the number of rules in the grammar
-**    YYNTOKEN           Number of terminal symbols
-**    YY_MAX_SHIFT       Maximum value for shift actions
-**    YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions
-**    YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions
-**    YY_ERROR_ACTION    The yy_action[] code for syntax error
-**    YY_ACCEPT_ACTION   The yy_action[] code for accept
-**    YY_NO_ACTION       The yy_action[] code for no-op
-**    YY_MIN_REDUCE      Minimum value for reduce actions
-**    YY_MAX_REDUCE      Maximum value for reduce actions
-*/
-#ifndef INTERFACE
-# define INTERFACE 1
-#endif
-/************* Begin control #defines *****************************************/
-#define YYCODETYPE unsigned char
-#define YYNOCODE 16
-#define YYACTIONTYPE unsigned char
-#if INTERFACE
-#define ParseTOKENTYPE  LSUP_Term * 
-#endif
-typedef union {
-  int yyinit;
-  ParseTOKENTYPE yy0;
-  LSUP_Triple * yy6;
-  LSUP_Term * yy10;
-  void * yy17;
-} YYMINORTYPE;
-#ifndef YYSTACKDEPTH
-#define YYSTACKDEPTH 100
-#endif
-#if INTERFACE
-#define ParseARG_SDECL  LSUP_GraphIterator *it ;
-#define ParseARG_PDECL , LSUP_GraphIterator *it 
-#define ParseARG_PARAM ,it 
-#define ParseARG_FETCH  LSUP_GraphIterator *it =yypParser->it ;
-#define ParseARG_STORE yypParser->it =it ;
-#define ParseCTX_SDECL
-#define ParseCTX_PDECL
-#define ParseCTX_PARAM
-#define ParseCTX_FETCH
-#define ParseCTX_STORE
-#endif
-#define YYNSTATE             15
-#define YYNRULE              15
-#define YYNRULE_WITH_ACTION  1
-#define YYNTOKEN             8
-#define YY_MAX_SHIFT         14
-#define YY_MIN_SHIFTREDUCE   25
-#define YY_MAX_SHIFTREDUCE   39
-#define YY_ERROR_ACTION      40
-#define YY_ACCEPT_ACTION     41
-#define YY_NO_ACTION         42
-#define YY_MIN_REDUCE        43
-#define YY_MAX_REDUCE        57
-/************* End control #defines *******************************************/
-#define YY_NLOOKAHEAD ((int)(sizeof(yy_lookahead)/sizeof(yy_lookahead[0])))
-
-/* Define the yytestcase() macro to be a no-op if is not already defined
-** otherwise.
-**
-** Applications can choose to define yytestcase() in the %include section
-** to a macro that can assist in verifying code coverage.  For production
-** code the yytestcase() macro should be turned off.  But it is useful
-** for testing.
-*/
-#ifndef yytestcase
-# define yytestcase(X)
-#endif
-
-
-/* Next are the tables used to determine what action to take based on the
-** current state and lookahead token.  These tables are used to implement
-** functions that take a state number and lookahead value and return an
-** action integer.  
-**
-** Suppose the action integer is N.  Then the action is determined as
-** follows
-**
-**   0 <= N <= YY_MAX_SHIFT             Shift N.  That is, push the lookahead
-**                                      token onto the stack and goto state N.
-**
-**   N between YY_MIN_SHIFTREDUCE       Shift to an arbitrary state then
-**     and YY_MAX_SHIFTREDUCE           reduce by rule N-YY_MIN_SHIFTREDUCE.
-**
-**   N == YY_ERROR_ACTION               A syntax error has occurred.
-**
-**   N == YY_ACCEPT_ACTION              The parser accepts its input.
-**
-**   N == YY_NO_ACTION                  No such action.  Denotes unused
-**                                      slots in the yy_action[] table.
-**
-**   N between YY_MIN_REDUCE            Reduce by rule N-YY_MIN_REDUCE
-**     and YY_MAX_REDUCE
-**
-** The action table is constructed as a single large table named yy_action[].
-** Given state S and lookahead X, the action is computed as either:
-**
-**    (A)   N = yy_action[ yy_shift_ofst[S] + X ]
-**    (B)   N = yy_default[S]
-**
-** The (A) formula is preferred.  The B formula is used instead if
-** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X.
-**
-** The formulas above are for computing the action when the lookahead is
-** a terminal symbol.  If the lookahead is a non-terminal (as occurs after
-** a reduce action) then the yy_reduce_ofst[] array is used in place of
-** the yy_shift_ofst[] array.
-**
-** The following are the tables generated in this section:
-**
-**  yy_action[]        A single table containing all actions.
-**  yy_lookahead[]     A table containing the lookahead for each entry in
-**                     yy_action.  Used to detect hash collisions.
-**  yy_shift_ofst[]    For each state, the offset into yy_action for
-**                     shifting terminals.
-**  yy_reduce_ofst[]   For each state, the offset into yy_action for
-**                     shifting non-terminals after a reduce.
-**  yy_default[]       Default action for each state.
-**
-*********** Begin parsing tables **********************************************/
-#define YY_ACTTAB_COUNT (30)
-static const YYACTIONTYPE yy_action[] = {
- /*     0 */     4,    5,    8,   10,   41,    1,   11,    3,   33,   34,
- /*    10 */    35,   14,   36,   39,   36,    9,   39,   39,   12,   30,
- /*    20 */    31,   32,    3,    6,   25,   37,    2,    7,   13,   44,
-};
-static const YYCODETYPE yy_lookahead[] = {
- /*     0 */     8,   11,    9,   14,   12,   13,   14,   15,    3,    4,
- /*    10 */     5,    1,    6,    7,    6,    8,    7,    7,   15,    3,
- /*    20 */     4,    3,   15,   10,    2,    6,   15,   15,   14,    0,
- /*    30 */    16,   16,   16,   16,   16,   16,   16,   16,
-};
-#define YY_SHIFT_COUNT    (14)
-#define YY_SHIFT_MIN      (0)
-#define YY_SHIFT_MAX      (29)
-static const unsigned char yy_shift_ofst[] = {
- /*     0 */     6,   10,    5,   16,    8,    9,    9,   18,    9,    8,
- /*    10 */    19,   19,   22,   19,   29,
-};
-#define YY_REDUCE_COUNT (9)
-#define YY_REDUCE_MIN   (-11)
-#define YY_REDUCE_MAX   (14)
-static const signed char yy_reduce_ofst[] = {
- /*     0 */    -8,    7,  -10,   -7,  -11,    3,   11,   13,   12,   14,
-};
-static const YYACTIONTYPE yy_default[] = {
- /*     0 */    56,   56,   40,   40,   40,   56,   56,   40,   56,   40,
- /*    10 */    46,   45,   40,   47,   40,
-};
-/********** End of lemon-generated parsing tables *****************************/
-
-/* The next table maps tokens (terminal symbols) into fallback tokens.  
-** If a construct like the following:
-** 
-**      %fallback ID X Y Z.
-**
-** appears in the grammar, then ID becomes a fallback token for X, Y,
-** and Z.  Whenever one of the tokens X, Y, or Z is input to the parser
-** but it does not parse, the type of the token is changed to ID and
-** the parse is retried before an error is thrown.
-**
-** This feature can be used, for example, to cause some keywords in a language
-** to revert to identifiers if they keyword does not apply in the context where
-** it appears.
-*/
-#ifdef YYFALLBACK
-static const YYCODETYPE yyFallback[] = {
-};
-#endif /* YYFALLBACK */
-
-/* The following structure represents a single element of the
-** parser's stack.  Information stored includes:
-**
-**   +  The state number for the parser at this level of the stack.
-**
-**   +  The value of the token stored at this level of the stack.
-**      (In other words, the "major" token.)
-**
-**   +  The semantic value stored at this level of the stack.  This is
-**      the information used by the action routines in the grammar.
-**      It is sometimes called the "minor" token.
-**
-** After the "shift" half of a SHIFTREDUCE action, the stateno field
-** actually contains the reduce action for the second half of the
-** SHIFTREDUCE.
-*/
-struct yyStackEntry {
-  YYACTIONTYPE stateno;  /* The state-number, or reduce action in SHIFTREDUCE */
-  YYCODETYPE major;      /* The major token value.  This is the code
-                         ** number for the token at this stack level */
-  YYMINORTYPE minor;     /* The user-supplied minor token value.  This
-                         ** is the value of the token  */
-};
-typedef struct yyStackEntry yyStackEntry;
-
-/* The state of the parser is completely contained in an instance of
-** the following structure */
-struct yyParser {
-  yyStackEntry *yytos;          /* Pointer to top element of the stack */
-#ifdef YYTRACKMAXSTACKDEPTH
-  int yyhwm;                    /* High-water mark of the stack */
-#endif
-#ifndef YYNOERRORRECOVERY
-  int yyerrcnt;                 /* Shifts left before out of the error */
-#endif
-  ParseARG_SDECL                /* A place to hold %extra_argument */
-  ParseCTX_SDECL                /* A place to hold %extra_context */
-#if YYSTACKDEPTH<=0
-  int yystksz;                  /* Current side of the stack */
-  yyStackEntry *yystack;        /* The parser's stack */
-  yyStackEntry yystk0;          /* First stack entry */
-#else
-  yyStackEntry yystack[YYSTACKDEPTH];  /* The parser's stack */
-  yyStackEntry *yystackEnd;            /* Last entry in the stack */
-#endif
-};
-typedef struct yyParser yyParser;
-
-#ifndef NDEBUG
-#include <stdio.h>
-#include <assert.h>
-static FILE *yyTraceFILE = 0;
-static char *yyTracePrompt = 0;
-#endif /* NDEBUG */
-
-#ifndef NDEBUG
-/* 
-** Turn parser tracing on by giving a stream to which to write the trace
-** and a prompt to preface each trace message.  Tracing is turned off
-** by making either argument NULL 
-**
-** Inputs:
-** <ul>
-** <li> A FILE* to which trace output should be written.
-**      If NULL, then tracing is turned off.
-** <li> A prefix string written at the beginning of every
-**      line of trace output.  If NULL, then tracing is
-**      turned off.
-** </ul>
-**
-** Outputs:
-** None.
-*/
-void ParseTrace(FILE *TraceFILE, char *zTracePrompt){
-  yyTraceFILE = TraceFILE;
-  yyTracePrompt = zTracePrompt;
-  if( yyTraceFILE==0 ) yyTracePrompt = 0;
-  else if( yyTracePrompt==0 ) yyTraceFILE = 0;
-}
-#endif /* NDEBUG */
-
-#if defined(YYCOVERAGE) || !defined(NDEBUG)
-/* For tracing shifts, the names of all terminals and nonterminals
-** are required.  The following table supplies these names */
-static const char *const yyTokenName[] = { 
-  /*    0 */ "$",
-  /*    1 */ "EOF",
-  /*    2 */ "DOT",
-  /*    3 */ "IRIREF",
-  /*    4 */ "BNODE",
-  /*    5 */ "LITERAL",
-  /*    6 */ "EOL",
-  /*    7 */ "WS",
-  /*    8 */ "triple",
-  /*    9 */ "subject",
-  /*   10 */ "predicate",
-  /*   11 */ "object",
-  /*   12 */ "ntriplesDoc",
-  /*   13 */ "triples",
-  /*   14 */ "eol",
-  /*   15 */ "ws",
-};
-#endif /* defined(YYCOVERAGE) || !defined(NDEBUG) */
-
-#ifndef NDEBUG
-/* For tracing reduce actions, the names of all rules are required.
-*/
-static const char *const yyRuleName[] = {
- /*   0 */ "triple ::= ws subject ws predicate ws object ws DOT",
- /*   1 */ "ntriplesDoc ::= triples EOF",
- /*   2 */ "triples ::= eol",
- /*   3 */ "triples ::= triple eol",
- /*   4 */ "triples ::= triples triple eol",
- /*   5 */ "subject ::= IRIREF",
- /*   6 */ "subject ::= BNODE",
- /*   7 */ "predicate ::= IRIREF",
- /*   8 */ "object ::= IRIREF",
- /*   9 */ "object ::= BNODE",
- /*  10 */ "object ::= LITERAL",
- /*  11 */ "eol ::= EOL",
- /*  12 */ "eol ::= eol EOL",
- /*  13 */ "ws ::=",
- /*  14 */ "ws ::= WS",
-};
-#endif /* NDEBUG */
-
-
-#if YYSTACKDEPTH<=0
-/*
-** Try to increase the size of the parser stack.  Return the number
-** of errors.  Return 0 on success.
-*/
-static int yyGrowStack(yyParser *p){
-  int newSize;
-  int idx;
-  yyStackEntry *pNew;
-
-  newSize = p->yystksz*2 + 100;
-  idx = p->yytos ? (int)(p->yytos - p->yystack) : 0;
-  if( p->yystack==&p->yystk0 ){
-    pNew = malloc(newSize*sizeof(pNew[0]));
-    if( pNew ) pNew[0] = p->yystk0;
-  }else{
-    pNew = realloc(p->yystack, newSize*sizeof(pNew[0]));
-  }
-  if( pNew ){
-    p->yystack = pNew;
-    p->yytos = &p->yystack[idx];
-#ifndef NDEBUG
-    if( yyTraceFILE ){
-      fprintf(yyTraceFILE,"%sStack grows from %d to %d entries.\n",
-              yyTracePrompt, p->yystksz, newSize);
-    }
-#endif
-    p->yystksz = newSize;
-  }
-  return pNew==0; 
-}
-#endif
-
-/* Datatype of the argument to the memory allocated passed as the
-** second argument to ParseAlloc() below.  This can be changed by
-** putting an appropriate #define in the %include section of the input
-** grammar.
-*/
-#ifndef YYMALLOCARGTYPE
-# define YYMALLOCARGTYPE size_t
-#endif
-
-/* Initialize a new parser that has already been allocated.
-*/
-void ParseInit(void *yypRawParser ParseCTX_PDECL){
-  yyParser *yypParser = (yyParser*)yypRawParser;
-  ParseCTX_STORE
-#ifdef YYTRACKMAXSTACKDEPTH
-  yypParser->yyhwm = 0;
-#endif
-#if YYSTACKDEPTH<=0
-  yypParser->yytos = NULL;
-  yypParser->yystack = NULL;
-  yypParser->yystksz = 0;
-  if( yyGrowStack(yypParser) ){
-    yypParser->yystack = &yypParser->yystk0;
-    yypParser->yystksz = 1;
-  }
-#endif
-#ifndef YYNOERRORRECOVERY
-  yypParser->yyerrcnt = -1;
-#endif
-  yypParser->yytos = yypParser->yystack;
-  yypParser->yystack[0].stateno = 0;
-  yypParser->yystack[0].major = 0;
-#if YYSTACKDEPTH>0
-  yypParser->yystackEnd = &yypParser->yystack[YYSTACKDEPTH-1];
-#endif
-}
-
-#ifndef Parse_ENGINEALWAYSONSTACK
-/* 
-** This function allocates a new parser.
-** The only argument is a pointer to a function which works like
-** malloc.
-**
-** Inputs:
-** A pointer to the function used to allocate memory.
-**
-** Outputs:
-** A pointer to a parser.  This pointer is used in subsequent calls
-** to Parse and ParseFree.
-*/
-void *ParseAlloc(void *(*mallocProc)(YYMALLOCARGTYPE) ParseCTX_PDECL){
-  yyParser *yypParser;
-  yypParser = (yyParser*)(*mallocProc)( (YYMALLOCARGTYPE)sizeof(yyParser) );
-  if( yypParser ){
-    ParseCTX_STORE
-    ParseInit(yypParser ParseCTX_PARAM);
-  }
-  return (void*)yypParser;
-}
-#endif /* Parse_ENGINEALWAYSONSTACK */
-
-
-/* The following function deletes the "minor type" or semantic value
-** associated with a symbol.  The symbol can be either a terminal
-** or nonterminal. "yymajor" is the symbol code, and "yypminor" is
-** a pointer to the value to be deleted.  The code used to do the 
-** deletions is derived from the %destructor and/or %token_destructor
-** directives of the input grammar.
-*/
-static void yy_destructor(
-  yyParser *yypParser,    /* The parser */
-  YYCODETYPE yymajor,     /* Type code for object to destroy */
-  YYMINORTYPE *yypminor   /* The object to be destroyed */
-){
-  ParseARG_FETCH
-  ParseCTX_FETCH
-  switch( yymajor ){
-    /* Here is inserted the actions which take place when a
-    ** terminal or non-terminal is destroyed.  This can happen
-    ** when the symbol is popped from the stack during a
-    ** reduce or during error processing or when a parser is 
-    ** being destroyed before it is finished parsing.
-    **
-    ** Note: during a reduce, the only symbols destroyed are those
-    ** which appear on the RHS of the rule, but which are *not* used
-    ** inside the C code.
-    */
-/********* Begin destructor definitions ***************************************/
-    case 8: /* triple */
-{
-#line 19 "src/codec/grammar_nt.y"
- LSUP_triple_free ((yypminor->yy6)); 
-#line 517 "src/codec/grammar_nt.c"
-}
-      break;
-    case 9: /* subject */
-    case 10: /* predicate */
-    case 11: /* object */
-{
-#line 21 "src/codec/grammar_nt.y"
- LSUP_term_free ((yypminor->yy10)); 
-#line 526 "src/codec/grammar_nt.c"
-}
-      break;
-/********* End destructor definitions *****************************************/
-    default:  break;   /* If no destructor action specified: do nothing */
-  }
-}
-
-/*
-** Pop the parser's stack once.
-**
-** If there is a destructor routine associated with the token which
-** is popped from the stack, then call it.
-*/
-static void yy_pop_parser_stack(yyParser *pParser){
-  yyStackEntry *yytos;
-  assert( pParser->yytos!=0 );
-  assert( pParser->yytos > pParser->yystack );
-  yytos = pParser->yytos--;
-#ifndef NDEBUG
-  if( yyTraceFILE ){
-    fprintf(yyTraceFILE,"%sPopping %s\n",
-      yyTracePrompt,
-      yyTokenName[yytos->major]);
-  }
-#endif
-  yy_destructor(pParser, yytos->major, &yytos->minor);
-}
-
-/*
-** Clear all secondary memory allocations from the parser
-*/
-void ParseFinalize(void *p){
-  yyParser *pParser = (yyParser*)p;
-  while( pParser->yytos>pParser->yystack ) yy_pop_parser_stack(pParser);
-#if YYSTACKDEPTH<=0
-  if( pParser->yystack!=&pParser->yystk0 ) free(pParser->yystack);
-#endif
-}
-
-#ifndef Parse_ENGINEALWAYSONSTACK
-/* 
-** Deallocate and destroy a parser.  Destructors are called for
-** all stack elements before shutting the parser down.
-**
-** If the YYPARSEFREENEVERNULL macro exists (for example because it
-** is defined in a %include section of the input grammar) then it is
-** assumed that the input pointer is never NULL.
-*/
-void ParseFree(
-  void *p,                    /* The parser to be deleted */
-  void (*freeProc)(void*)     /* Function used to reclaim memory */
-){
-#ifndef YYPARSEFREENEVERNULL
-  if( p==0 ) return;
-#endif
-  ParseFinalize(p);
-  (*freeProc)(p);
-}
-#endif /* Parse_ENGINEALWAYSONSTACK */
-
-/*
-** Return the peak depth of the stack for a parser.
-*/
-#ifdef YYTRACKMAXSTACKDEPTH
-int ParseStackPeak(void *p){
-  yyParser *pParser = (yyParser*)p;
-  return pParser->yyhwm;
-}
-#endif
-
-/* This array of booleans keeps track of the parser statement
-** coverage.  The element yycoverage[X][Y] is set when the parser
-** is in state X and has a lookahead token Y.  In a well-tested
-** systems, every element of this matrix should end up being set.
-*/
-#if defined(YYCOVERAGE)
-static unsigned char yycoverage[YYNSTATE][YYNTOKEN];
-#endif
-
-/*
-** Write into out a description of every state/lookahead combination that
-**
-**   (1)  has not been used by the parser, and
-**   (2)  is not a syntax error.
-**
-** Return the number of missed state/lookahead combinations.
-*/
-#if defined(YYCOVERAGE)
-int ParseCoverage(FILE *out){
-  int stateno, iLookAhead, i;
-  int nMissed = 0;
-  for(stateno=0; stateno<YYNSTATE; stateno++){
-    i = yy_shift_ofst[stateno];
-    for(iLookAhead=0; iLookAhead<YYNTOKEN; iLookAhead++){
-      if( yy_lookahead[i+iLookAhead]!=iLookAhead ) continue;
-      if( yycoverage[stateno][iLookAhead]==0 ) nMissed++;
-      if( out ){
-        fprintf(out,"State %d lookahead %s %s\n", stateno,
-                yyTokenName[iLookAhead],
-                yycoverage[stateno][iLookAhead] ? "ok" : "missed");
-      }
-    }
-  }
-  return nMissed;
-}
-#endif
-
-/*
-** Find the appropriate action for a parser given the terminal
-** look-ahead token iLookAhead.
-*/
-static YYACTIONTYPE yy_find_shift_action(
-  YYCODETYPE iLookAhead,    /* The look-ahead token */
-  YYACTIONTYPE stateno      /* Current state number */
-){
-  int i;
-
-  if( stateno>YY_MAX_SHIFT ) return stateno;
-  assert( stateno <= YY_SHIFT_COUNT );
-#if defined(YYCOVERAGE)
-  yycoverage[stateno][iLookAhead] = 1;
-#endif
-  do{
-    i = yy_shift_ofst[stateno];
-    assert( i>=0 );
-    assert( i<=YY_ACTTAB_COUNT );
-    assert( i+YYNTOKEN<=(int)YY_NLOOKAHEAD );
-    assert( iLookAhead!=YYNOCODE );
-    assert( iLookAhead < YYNTOKEN );
-    i += iLookAhead;
-    assert( i<(int)YY_NLOOKAHEAD );
-    if( yy_lookahead[i]!=iLookAhead ){
-#ifdef YYFALLBACK
-      YYCODETYPE iFallback;            /* Fallback token */
-      assert( iLookAhead<sizeof(yyFallback)/sizeof(yyFallback[0]) );
-      iFallback = yyFallback[iLookAhead];
-      if( iFallback!=0 ){
-#ifndef NDEBUG
-        if( yyTraceFILE ){
-          fprintf(yyTraceFILE, "%sFALLBACK %s => %s\n",
-             yyTracePrompt, yyTokenName[iLookAhead], yyTokenName[iFallback]);
-        }
-#endif
-        assert( yyFallback[iFallback]==0 ); /* Fallback loop must terminate */
-        iLookAhead = iFallback;
-        continue;
-      }
-#endif
-#ifdef YYWILDCARD
-      {
-        int j = i - iLookAhead + YYWILDCARD;
-        assert( j<(int)(sizeof(yy_lookahead)/sizeof(yy_lookahead[0])) );
-        if( yy_lookahead[j]==YYWILDCARD && iLookAhead>0 ){
-#ifndef NDEBUG
-          if( yyTraceFILE ){
-            fprintf(yyTraceFILE, "%sWILDCARD %s => %s\n",
-               yyTracePrompt, yyTokenName[iLookAhead],
-               yyTokenName[YYWILDCARD]);
-          }
-#endif /* NDEBUG */
-          return yy_action[j];
-        }
-      }
-#endif /* YYWILDCARD */
-      return yy_default[stateno];
-    }else{
-      assert( i>=0 && i<(int)(sizeof(yy_action)/sizeof(yy_action[0])) );
-      return yy_action[i];
-    }
-  }while(1);
-}
-
-/*
-** Find the appropriate action for a parser given the non-terminal
-** look-ahead token iLookAhead.
-*/
-static YYACTIONTYPE yy_find_reduce_action(
-  YYACTIONTYPE stateno,     /* Current state number */
-  YYCODETYPE iLookAhead     /* The look-ahead token */
-){
-  int i;
-#ifdef YYERRORSYMBOL
-  if( stateno>YY_REDUCE_COUNT ){
-    return yy_default[stateno];
-  }
-#else
-  assert( stateno<=YY_REDUCE_COUNT );
-#endif
-  i = yy_reduce_ofst[stateno];
-  assert( iLookAhead!=YYNOCODE );
-  i += iLookAhead;
-#ifdef YYERRORSYMBOL
-  if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){
-    return yy_default[stateno];
-  }
-#else
-  assert( i>=0 && i<YY_ACTTAB_COUNT );
-  assert( yy_lookahead[i]==iLookAhead );
-#endif
-  return yy_action[i];
-}
-
-/*
-** The following routine is called if the stack overflows.
-*/
-static void yyStackOverflow(yyParser *yypParser){
-   ParseARG_FETCH
-   ParseCTX_FETCH
-#ifndef NDEBUG
-   if( yyTraceFILE ){
-     fprintf(yyTraceFILE,"%sStack Overflow!\n",yyTracePrompt);
-   }
-#endif
-   while( yypParser->yytos>yypParser->yystack ) yy_pop_parser_stack(yypParser);
-   /* Here code is inserted which will execute if the parser
-   ** stack every overflows */
-/******** Begin %stack_overflow code ******************************************/
-/******** End %stack_overflow code ********************************************/
-   ParseARG_STORE /* Suppress warning about unused %extra_argument var */
-   ParseCTX_STORE
-}
-
-/*
-** Print tracing information for a SHIFT action
-*/
-#ifndef NDEBUG
-static void yyTraceShift(yyParser *yypParser, int yyNewState, const char *zTag){
-  if( yyTraceFILE ){
-    if( yyNewState<YYNSTATE ){
-      fprintf(yyTraceFILE,"%s%s '%s', go to state %d\n",
-         yyTracePrompt, zTag, yyTokenName[yypParser->yytos->major],
-         yyNewState);
-    }else{
-      fprintf(yyTraceFILE,"%s%s '%s', pending reduce %d\n",
-         yyTracePrompt, zTag, yyTokenName[yypParser->yytos->major],
-         yyNewState - YY_MIN_REDUCE);
-    }
-  }
-}
-#else
-# define yyTraceShift(X,Y,Z)
-#endif
-
-/*
-** Perform a shift action.
-*/
-static void yy_shift(
-  yyParser *yypParser,          /* The parser to be shifted */
-  YYACTIONTYPE yyNewState,      /* The new state to shift in */
-  YYCODETYPE yyMajor,           /* The major token to shift in */
-  ParseTOKENTYPE yyMinor        /* The minor token to shift in */
-){
-  yyStackEntry *yytos;
-  yypParser->yytos++;
-#ifdef YYTRACKMAXSTACKDEPTH
-  if( (int)(yypParser->yytos - yypParser->yystack)>yypParser->yyhwm ){
-    yypParser->yyhwm++;
-    assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack) );
-  }
-#endif
-#if YYSTACKDEPTH>0 
-  if( yypParser->yytos>yypParser->yystackEnd ){
-    yypParser->yytos--;
-    yyStackOverflow(yypParser);
-    return;
-  }
-#else
-  if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz] ){
-    if( yyGrowStack(yypParser) ){
-      yypParser->yytos--;
-      yyStackOverflow(yypParser);
-      return;
-    }
-  }
-#endif
-  if( yyNewState > YY_MAX_SHIFT ){
-    yyNewState += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE;
-  }
-  yytos = yypParser->yytos;
-  yytos->stateno = yyNewState;
-  yytos->major = yyMajor;
-  yytos->minor.yy0 = yyMinor;
-  yyTraceShift(yypParser, yyNewState, "Shift");
-}
-
-/* For rule J, yyRuleInfoLhs[J] contains the symbol on the left-hand side
-** of that rule */
-static const YYCODETYPE yyRuleInfoLhs[] = {
-     8,  /* (0) triple ::= ws subject ws predicate ws object ws DOT */
-    12,  /* (1) ntriplesDoc ::= triples EOF */
-    13,  /* (2) triples ::= eol */
-    13,  /* (3) triples ::= triple eol */
-    13,  /* (4) triples ::= triples triple eol */
-     9,  /* (5) subject ::= IRIREF */
-     9,  /* (6) subject ::= BNODE */
-    10,  /* (7) predicate ::= IRIREF */
-    11,  /* (8) object ::= IRIREF */
-    11,  /* (9) object ::= BNODE */
-    11,  /* (10) object ::= LITERAL */
-    14,  /* (11) eol ::= EOL */
-    14,  /* (12) eol ::= eol EOL */
-    15,  /* (13) ws ::= */
-    15,  /* (14) ws ::= WS */
-};
-
-/* For rule J, yyRuleInfoNRhs[J] contains the negative of the number
-** of symbols on the right-hand side of that rule. */
-static const signed char yyRuleInfoNRhs[] = {
-   -8,  /* (0) triple ::= ws subject ws predicate ws object ws DOT */
-   -2,  /* (1) ntriplesDoc ::= triples EOF */
-   -1,  /* (2) triples ::= eol */
-   -2,  /* (3) triples ::= triple eol */
-   -3,  /* (4) triples ::= triples triple eol */
-   -1,  /* (5) subject ::= IRIREF */
-   -1,  /* (6) subject ::= BNODE */
-   -1,  /* (7) predicate ::= IRIREF */
-   -1,  /* (8) object ::= IRIREF */
-   -1,  /* (9) object ::= BNODE */
-   -1,  /* (10) object ::= LITERAL */
-   -1,  /* (11) eol ::= EOL */
-   -2,  /* (12) eol ::= eol EOL */
-    0,  /* (13) ws ::= */
-   -1,  /* (14) ws ::= WS */
-};
-
-static void yy_accept(yyParser*);  /* Forward Declaration */
-
-/*
-** Perform a reduce action and the shift that must immediately
-** follow the reduce.
-**
-** The yyLookahead and yyLookaheadToken parameters provide reduce actions
-** access to the lookahead token (if any).  The yyLookahead will be YYNOCODE
-** if the lookahead token has already been consumed.  As this procedure is
-** only called from one place, optimizing compilers will in-line it, which
-** means that the extra parameters have no performance impact.
-*/
-static YYACTIONTYPE yy_reduce(
-  yyParser *yypParser,         /* The parser */
-  unsigned int yyruleno,       /* Number of the rule by which to reduce */
-  int yyLookahead,             /* Lookahead token, or YYNOCODE if none */
-  ParseTOKENTYPE yyLookaheadToken  /* Value of the lookahead token */
-  ParseCTX_PDECL                   /* %extra_context */
-){
-  int yygoto;                     /* The next state */
-  YYACTIONTYPE yyact;             /* The next action */
-  yyStackEntry *yymsp;            /* The top of the parser's stack */
-  int yysize;                     /* Amount to pop the stack */
-  ParseARG_FETCH
-  (void)yyLookahead;
-  (void)yyLookaheadToken;
-  yymsp = yypParser->yytos;
-  assert( yyruleno<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) );
-#ifndef NDEBUG
-  if( yyTraceFILE ){
-    yysize = yyRuleInfoNRhs[yyruleno];
-    if( yysize ){
-      fprintf(yyTraceFILE, "%sReduce %d [%s]%s, pop back to state %d.\n",
-        yyTracePrompt,
-        yyruleno, yyRuleName[yyruleno],
-        yyruleno<YYNRULE_WITH_ACTION ? "" : " without external action",
-        yymsp[yysize].stateno);
-    }else{
-      fprintf(yyTraceFILE, "%sReduce %d [%s]%s.\n",
-        yyTracePrompt, yyruleno, yyRuleName[yyruleno],
-        yyruleno<YYNRULE_WITH_ACTION ? "" : " without external action");
-    }
-  }
-#endif /* NDEBUG */
-
-  /* Check that the stack is large enough to grow by a single entry
-  ** if the RHS of the rule is empty.  This ensures that there is room
-  ** enough on the stack to push the LHS value */
-  if( yyRuleInfoNRhs[yyruleno]==0 ){
-#ifdef YYTRACKMAXSTACKDEPTH
-    if( (int)(yypParser->yytos - yypParser->yystack)>yypParser->yyhwm ){
-      yypParser->yyhwm++;
-      assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack));
-    }
-#endif
-#if YYSTACKDEPTH>0 
-    if( yypParser->yytos>=yypParser->yystackEnd ){
-      yyStackOverflow(yypParser);
-      /* The call to yyStackOverflow() above pops the stack until it is
-      ** empty, causing the main parser loop to exit.  So the return value
-      ** is never used and does not matter. */
-      return 0;
-    }
-#else
-    if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz-1] ){
-      if( yyGrowStack(yypParser) ){
-        yyStackOverflow(yypParser);
-        /* The call to yyStackOverflow() above pops the stack until it is
-        ** empty, causing the main parser loop to exit.  So the return value
-        ** is never used and does not matter. */
-        return 0;
-      }
-      yymsp = yypParser->yytos;
-    }
-#endif
-  }
-
-  switch( yyruleno ){
-  /* Beginning here are the reduction cases.  A typical example
-  ** follows:
-  **   case 0:
-  **  #line <lineno> <grammarfile>
-  **     { ... }           // User supplied code
-  **  #line <lineno> <thisfile>
-  **     break;
-  */
-/********** Begin reduce actions **********************************************/
-      case 0: /* triple ::= ws subject ws predicate ws object ws DOT */
-#line 39 "src/codec/grammar_nt.y"
-{
-
-                yymsp[-7].minor.yy6 = LSUP_triple_new (yymsp[-6].minor.yy10, yymsp[-4].minor.yy10, yymsp[-2].minor.yy10);
-                LSUP_graph_add_iter (it, yymsp[-7].minor.yy6);
-            }
-#line 946 "src/codec/grammar_nt.c"
-        break;
-      case 3: /* triples ::= triple eol */
-{  yy_destructor(yypParser,8,&yymsp[-1].minor);
-#line 36 "src/codec/grammar_nt.y"
-{
-}
-#line 953 "src/codec/grammar_nt.c"
-}
-        break;
-      case 4: /* triples ::= triples triple eol */
-#line 37 "src/codec/grammar_nt.y"
-{
-}
-#line 960 "src/codec/grammar_nt.c"
-  yy_destructor(yypParser,8,&yymsp[-1].minor);
-        break;
-      default:
-      /* (1) ntriplesDoc ::= triples EOF */ yytestcase(yyruleno==1);
-      /* (2) triples ::= eol */ yytestcase(yyruleno==2);
-      /* (5) subject ::= IRIREF */ yytestcase(yyruleno==5);
-      /* (6) subject ::= BNODE */ yytestcase(yyruleno==6);
-      /* (7) predicate ::= IRIREF */ yytestcase(yyruleno==7);
-      /* (8) object ::= IRIREF */ yytestcase(yyruleno==8);
-      /* (9) object ::= BNODE */ yytestcase(yyruleno==9);
-      /* (10) object ::= LITERAL */ yytestcase(yyruleno==10);
-      /* (11) eol ::= EOL */ yytestcase(yyruleno==11);
-      /* (12) eol ::= eol EOL */ yytestcase(yyruleno==12);
-      /* (13) ws ::= */ yytestcase(yyruleno==13);
-      /* (14) ws ::= WS */ yytestcase(yyruleno==14);
-        break;
-/********** End reduce actions ************************************************/
-  };
-  assert( yyruleno<sizeof(yyRuleInfoLhs)/sizeof(yyRuleInfoLhs[0]) );
-  yygoto = yyRuleInfoLhs[yyruleno];
-  yysize = yyRuleInfoNRhs[yyruleno];
-  yyact = yy_find_reduce_action(yymsp[yysize].stateno,(YYCODETYPE)yygoto);
-
-  /* There are no SHIFTREDUCE actions on nonterminals because the table
-  ** generator has simplified them to pure REDUCE actions. */
-  assert( !(yyact>YY_MAX_SHIFT && yyact<=YY_MAX_SHIFTREDUCE) );
-
-  /* It is not possible for a REDUCE to be followed by an error */
-  assert( yyact!=YY_ERROR_ACTION );
-
-  yymsp += yysize+1;
-  yypParser->yytos = yymsp;
-  yymsp->stateno = (YYACTIONTYPE)yyact;
-  yymsp->major = (YYCODETYPE)yygoto;
-  yyTraceShift(yypParser, yyact, "... then shift");
-  return yyact;
-}
-
-/*
-** The following code executes when the parse fails
-*/
-#ifndef YYNOERRORRECOVERY
-static void yy_parse_failed(
-  yyParser *yypParser           /* The parser */
-){
-  ParseARG_FETCH
-  ParseCTX_FETCH
-#ifndef NDEBUG
-  if( yyTraceFILE ){
-    fprintf(yyTraceFILE,"%sFail!\n",yyTracePrompt);
-  }
-#endif
-  while( yypParser->yytos>yypParser->yystack ) yy_pop_parser_stack(yypParser);
-  /* Here code is inserted which will be executed whenever the
-  ** parser fails */
-/************ Begin %parse_failure code ***************************************/
-/************ End %parse_failure code *****************************************/
-  ParseARG_STORE /* Suppress warning about unused %extra_argument variable */
-  ParseCTX_STORE
-}
-#endif /* YYNOERRORRECOVERY */
-
-/*
-** The following code executes when a syntax error first occurs.
-*/
-static void yy_syntax_error(
-  yyParser *yypParser,           /* The parser */
-  int yymajor,                   /* The major type of the error token */
-  ParseTOKENTYPE yyminor         /* The minor type of the error token */
-){
-  ParseARG_FETCH
-  ParseCTX_FETCH
-#define TOKEN yyminor
-/************ Begin %syntax_error code ****************************************/
-/************ End %syntax_error code ******************************************/
-  ParseARG_STORE /* Suppress warning about unused %extra_argument variable */
-  ParseCTX_STORE
-}
-
-/*
-** The following is executed when the parser accepts
-*/
-static void yy_accept(
-  yyParser *yypParser           /* The parser */
-){
-  ParseARG_FETCH
-  ParseCTX_FETCH
-#ifndef NDEBUG
-  if( yyTraceFILE ){
-    fprintf(yyTraceFILE,"%sAccept!\n",yyTracePrompt);
-  }
-#endif
-#ifndef YYNOERRORRECOVERY
-  yypParser->yyerrcnt = -1;
-#endif
-  assert( yypParser->yytos==yypParser->yystack );
-  /* Here code is inserted which will be executed whenever the
-  ** parser accepts */
-/*********** Begin %parse_accept code *****************************************/
-/*********** End %parse_accept code *******************************************/
-  ParseARG_STORE /* Suppress warning about unused %extra_argument variable */
-  ParseCTX_STORE
-}
-
-/* The main parser program.
-** The first argument is a pointer to a structure obtained from
-** "ParseAlloc" which describes the current state of the parser.
-** The second argument is the major token number.  The third is
-** the minor token.  The fourth optional argument is whatever the
-** user wants (and specified in the grammar) and is available for
-** use by the action routines.
-**
-** Inputs:
-** <ul>
-** <li> A pointer to the parser (an opaque structure.)
-** <li> The major token number.
-** <li> The minor token number.
-** <li> An option argument of a grammar-specified type.
-** </ul>
-**
-** Outputs:
-** None.
-*/
-void Parse(
-  void *yyp,                   /* The parser */
-  int yymajor,                 /* The major token code number */
-  ParseTOKENTYPE yyminor       /* The value for the token */
-  ParseARG_PDECL               /* Optional %extra_argument parameter */
-){
-  YYMINORTYPE yyminorunion;
-  YYACTIONTYPE yyact;   /* The parser action. */
-#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY)
-  int yyendofinput;     /* True if we are at the end of input */
-#endif
-#ifdef YYERRORSYMBOL
-  int yyerrorhit = 0;   /* True if yymajor has invoked an error */
-#endif
-  yyParser *yypParser = (yyParser*)yyp;  /* The parser */
-  ParseCTX_FETCH
-  ParseARG_STORE
-
-  assert( yypParser->yytos!=0 );
-#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY)
-  yyendofinput = (yymajor==0);
-#endif
-
-  yyact = yypParser->yytos->stateno;
-#ifndef NDEBUG
-  if( yyTraceFILE ){
-    if( yyact < YY_MIN_REDUCE ){
-      fprintf(yyTraceFILE,"%sInput '%s' in state %d\n",
-              yyTracePrompt,yyTokenName[yymajor],yyact);
-    }else{
-      fprintf(yyTraceFILE,"%sInput '%s' with pending reduce %d\n",
-              yyTracePrompt,yyTokenName[yymajor],yyact-YY_MIN_REDUCE);
-    }
-  }
-#endif
-
-  do{
-    assert( yyact==yypParser->yytos->stateno );
-    yyact = yy_find_shift_action((YYCODETYPE)yymajor,yyact);
-    if( yyact >= YY_MIN_REDUCE ){
-      yyact = yy_reduce(yypParser,yyact-YY_MIN_REDUCE,yymajor,
-                        yyminor ParseCTX_PARAM);
-    }else if( yyact <= YY_MAX_SHIFTREDUCE ){
-      yy_shift(yypParser,yyact,(YYCODETYPE)yymajor,yyminor);
-#ifndef YYNOERRORRECOVERY
-      yypParser->yyerrcnt--;
-#endif
-      break;
-    }else if( yyact==YY_ACCEPT_ACTION ){
-      yypParser->yytos--;
-      yy_accept(yypParser);
-      return;
-    }else{
-      assert( yyact == YY_ERROR_ACTION );
-      yyminorunion.yy0 = yyminor;
-#ifdef YYERRORSYMBOL
-      int yymx;
-#endif
-#ifndef NDEBUG
-      if( yyTraceFILE ){
-        fprintf(yyTraceFILE,"%sSyntax Error!\n",yyTracePrompt);
-      }
-#endif
-#ifdef YYERRORSYMBOL
-      /* A syntax error has occurred.
-      ** The response to an error depends upon whether or not the
-      ** grammar defines an error token "ERROR".  
-      **
-      ** This is what we do if the grammar does define ERROR:
-      **
-      **  * Call the %syntax_error function.
-      **
-      **  * Begin popping the stack until we enter a state where
-      **    it is legal to shift the error symbol, then shift
-      **    the error symbol.
-      **
-      **  * Set the error count to three.
-      **
-      **  * Begin accepting and shifting new tokens.  No new error
-      **    processing will occur until three tokens have been
-      **    shifted successfully.
-      **
-      */
-      if( yypParser->yyerrcnt<0 ){
-        yy_syntax_error(yypParser,yymajor,yyminor);
-      }
-      yymx = yypParser->yytos->major;
-      if( yymx==YYERRORSYMBOL || yyerrorhit ){
-#ifndef NDEBUG
-        if( yyTraceFILE ){
-          fprintf(yyTraceFILE,"%sDiscard input token %s\n",
-             yyTracePrompt,yyTokenName[yymajor]);
-        }
-#endif
-        yy_destructor(yypParser, (YYCODETYPE)yymajor, &yyminorunion);
-        yymajor = YYNOCODE;
-      }else{
-        while( yypParser->yytos >= yypParser->yystack
-            && (yyact = yy_find_reduce_action(
-                        yypParser->yytos->stateno,
-                        YYERRORSYMBOL)) > YY_MAX_SHIFTREDUCE
-        ){
-          yy_pop_parser_stack(yypParser);
-        }
-        if( yypParser->yytos < yypParser->yystack || yymajor==0 ){
-          yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion);
-          yy_parse_failed(yypParser);
-#ifndef YYNOERRORRECOVERY
-          yypParser->yyerrcnt = -1;
-#endif
-          yymajor = YYNOCODE;
-        }else if( yymx!=YYERRORSYMBOL ){
-          yy_shift(yypParser,yyact,YYERRORSYMBOL,yyminor);
-        }
-      }
-      yypParser->yyerrcnt = 3;
-      yyerrorhit = 1;
-      if( yymajor==YYNOCODE ) break;
-      yyact = yypParser->yytos->stateno;
-#elif defined(YYNOERRORRECOVERY)
-      /* If the YYNOERRORRECOVERY macro is defined, then do not attempt to
-      ** do any kind of error recovery.  Instead, simply invoke the syntax
-      ** error routine and continue going as if nothing had happened.
-      **
-      ** Applications can set this macro (for example inside %include) if
-      ** they intend to abandon the parse upon the first syntax error seen.
-      */
-      yy_syntax_error(yypParser,yymajor, yyminor);
-      yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion);
-      break;
-#else  /* YYERRORSYMBOL is not defined */
-      /* This is what we do if the grammar does not define ERROR:
-      **
-      **  * Report an error message, and throw away the input token.
-      **
-      **  * If the input token is $, then fail the parse.
-      **
-      ** As before, subsequent error messages are suppressed until
-      ** three input tokens have been successfully shifted.
-      */
-      if( yypParser->yyerrcnt<=0 ){
-        yy_syntax_error(yypParser,yymajor, yyminor);
-      }
-      yypParser->yyerrcnt = 3;
-      yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion);
-      if( yyendofinput ){
-        yy_parse_failed(yypParser);
-#ifndef YYNOERRORRECOVERY
-        yypParser->yyerrcnt = -1;
-#endif
-      }
-      break;
-#endif
-    }
-  }while( yypParser->yytos>yypParser->yystack );
-#ifndef NDEBUG
-  if( yyTraceFILE ){
-    yyStackEntry *i;
-    char cDiv = '[';
-    fprintf(yyTraceFILE,"%sReturn. Stack=",yyTracePrompt);
-    for(i=&yypParser->yystack[1]; i<=yypParser->yytos; i++){
-      fprintf(yyTraceFILE,"%c%s", cDiv, yyTokenName[i->major]);
-      cDiv = ' ';
-    }
-    fprintf(yyTraceFILE,"]\n");
-  }
-#endif
-  return;
-}
-
-/*
-** Return the fallback token corresponding to canonical token iToken, or
-** 0 if iToken has no fallback.
-*/
-int ParseFallback(int iToken){
-#ifdef YYFALLBACK
-  assert( iToken<(int)(sizeof(yyFallback)/sizeof(yyFallback[0])) );
-  return yyFallback[iToken];
-#else
-  (void)iToken;
-  return 0;
-#endif
-}

+ 128 - 83
src/codec/grammar_ttl.y

@@ -1,46 +1,40 @@
 %include {
 
-/** @brief Lemon parser grammar for N-Triples.
+/** @brief Lemon parser grammar for Turtle.
  *
  * The `lemon' parser generator executable must be in your PATH:
  * https://sqlite.org/src/doc/trunk/doc/lemon.html
  *
- * To generate the parser, run: `lemon ${FILE}'
+ * To generate the parser, run: `make parsers'
  *
  * TTL EBNF: https://www.w3.org/TeamSubmission/turtle/#sec-grammar-grammar
  */
 
-#include "graph.h"
+#include "codec.h"
 
 
-typedef struct {
-    LSUP_Term **    predList;       ///< NULL-terminated array of term handles.
-    LSUP_Term ***   objList;        /**<
-                                      * NULL-terminated array of
-                                      * NULL-terminated arrays of term handles.
-                                      * The indices of the outer array are
-                                      * equal to the indices of the associated
-                                      * predicate in the predicate list.
-                                      */
-} PredObjList;
-
+DEFINE XSD_PFX "http://www.w3.org/2001/XMLSchema#"
 }
 
+%stack_overflow {
+    log_error ("Stack oveflow in TTL parsing. Please jettison the parser.";
+}
 
-%token_type { LSUP_Term * }
-%token_prefix "T_"
-
-%type triple            { LSUP_Triple * }
-%destructor triple      { LSUP_triple_free ($$); }
+%parse_failure {
+    log_error ("TTL parse error. Cannot continue.");
+}
 
-%type subject           { LSUP_Term * }
-%destructor subject     { LSUP_term_free ($$); }
+%stack_size CHUNK_SIZE
 
-%type predicate         { LSUP_Term * }
-%destructor predicate   { LSUP_term_free ($$); }
+%syntax_error {
+    //UNUSED_PARAMETER (yymajor);  /* Silence some compiler warnings */
+    if (TOKEN.z[0]) log_error ("near \"%T\": syntax error", &TOKEN);
+    else log_error ("incomplete input");
+}
 
-%type object            { LSUP_Term * }
-%destructor object      { LSUP_term_free ($$); }
+%token_prefix "T_"
+%token_type { uint8_t * }
+%token_destructor { free ($$); }
 
 /* NULL-terminated array of object term handles. */
 %type objList           { LSUP_Term ** }
@@ -50,120 +44,171 @@ typedef struct {
     }
 }
 
-%type predObjList       { LSUP_Term *** }
-%destructor predObjList {
-    for (size_t i = 0; $$[i]; i++) {
-        for (size_t j = 0; $$[i][j]; j++) {
-            LSUP_term_free ($$[i][j]);
-        }
-    }
-}
-
-%default_type           { void * }
+%default_type           { uint8_t * }
 
-%extra_argument         { LSUP_GraphIterator *it }
+%extra_argument         { LSUP_TTLParserState *state }
 
 
-// Rules.
+/*
+ * Rules.
+ */
 
 turtleDoc   ::= statements EOF .
 
+statements  ::= statements statement .
 statements  ::= .
-statements  ::= statement .
-statements  ::= statements WS statement .
 
-statement(A) ::= directive ows EOS . {
-            }
-statement(A) ::= triples ows EOS {
-            }
+statement   ::= directive ows EOS .
+statement   ::= triples ows EOS .
 
 directive 	::= prefixID .
 directive   ::= base .
 
-prefixID    :== PREFIX WS PFX_NAME COLON IRIREF {
+prefixID    ::= PREFIX WS PFX_NAME(P) COLON IRIREF(N) . {
+                LSUP_nsmap_add (state->nsm, P, N);
             }
-prefixID    :== PREFIX WS COLON IRIREF {
+prefixID    ::= PREFIX WS COLON IRIREF(N) . {
+                LSUP_nsmap_add (state->nsm, "", N);
             }
 
-base        ::= BASE WS IRIREF . {
+base        ::= BASE WS IRIREF(D) . {
+                state->base = LSUP_iriref_new (D, NULL);
             }
 
-triples 	::= subject predObjList .
+triples 	::= subject(S) predObjList(L) . {
+                LSUP_spo_list_add_triples (state->it, S, L);
+                LSUP_term_free (S);
+                LSUP_pred_obj_list_free (L);
+            }
 
+%type predObjList       { PredObjList * }
+%destructor predObjList { LSUP_pred_obj_list_free ($$); }
+predObjList(A) ::= predObjList(A) SEMICOLON predicate(P) objectList(O) . {
+                return LSUP_pred_obj_list_add (A, P, O);
+            }
+predObjList(A) ::= predicate(P) objectList(O) . {
+                A = LSUP_pred_ob_list_new();
+                return LSUP_pred_obj_list_add (A, P, O);
+            }
 predObjList ::= predObjList SEMICOLON .
-predObjList ::= predObjList SEMICOLON verb objectList .
-predObjList ::= verb objectList .
-
-objectList 	::= objectList COMMA object .
-objectList 	::= object .
 
-verb        ::= predicate .
-verb        ::= rdfType .
-
-comment 	::= COMMENT .
+%type objectList { LSUP_Term ** }
+objectList(A) ::= objectList(L) COMMA object(O) . {
+                A = LSUP_obj_list_add (L, O);
+            }
+objectList(A) ::= object(O) . {
+                A = calloc (sizeof (*A) * 2);
+                if (UNLIKELY (!A)) return LSUP_MEM_ERR; // TODO error handling
+                A[0] = O;
+            }
 
 subject 	::= resource .
 subject 	::= blank .
 
 predicate   ::= resource .
+predicate(A)::= RDF_TYPE . { A = LSUP_iriref_new ("rdf:type", state->nsm); }
 
 object 	    ::= resource .
 object 	    ::= blank .
 object 	    ::= literal .
 
-literal(A)  ::= qString(D) . {
+%type literal { LSUP_Term * }
+//%destructor literal { LSUP_term_free ($$); } // Destroyed with PO list.
+literal(A)  ::= STRING(D) . {
+                A = LSUP_term_new (LSUP_TERM_LITERAL, D, NULL);
             }
-literal(A)  ::= qString(D) LANGTAG(L) . {
+literal(A)  ::= STRING(D) LANGTAG(L) . {
+                A = LSUP_term_new (LSUP_TERM_LT_LITERAL, D, L);
             }
-literal(A)  ::= qString(D) DT_MARKER resource(M) . {
+literal(A)  ::= STRING(D) DTYPE_MARKER resource(M) . {
+                A = LSUP_term_new (LSUP_TERM_LITERAL, D, M);
             }
 literal(A)  ::= INTEGER(D) . {
+                A = LSUP_term_new (
+                    LSUP_TERM_LITERAL, D,
+                    LSUP_iriref_new ("xsd:integer", state->nsm));
             }
 literal(A)  ::= DOUBLE(D) . {
+                A = LSUP_term_new (
+                    LSUP_TERM_LITERAL, D,
+                    LSUP_iriref_new ("xsd:double", state->nsm));
             }
 literal(A)  ::= DECIMAL(D) . {
+                A = LSUP_term_new (
+                    LSUP_TERM_LITERAL, D,
+                    LSUP_iriref_new ("xsd:decimal", state->nsm));
             }
 literal(A)  ::= BOOLEAN(D) . {
+                A = LSUP_term_new (
+                    LSUP_TERM_LITERAL, D,
+                    LSUP_iriref_new ("xsd:boolean", state->nsm));
             }
 
-blank       :== nodeID .
-blank       :== LBRACKET RBRACKET .
-blank       :== LBRACKET predObjList RBRACKET .
-blank       :== collection .
-
-collection  ::= LPAREN itemList RPAREN .
-
-itemList    ::= itemList object .
-itemList    ::= object .
-itemList    ::= .
-
-resource    ::= iriref .
-resource    ::= qname .
-
-rdfType(A) ::= RDF_TYPE . {
+blank(A)    ::= nodeID(D) . {
+                A = LSUP_term_new (LSUP_TERM_BNODE, D, NULL);
+            }
+blank(A)    ::= LBRACKET ows RBRACKET . {
+                A = LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL);
             }
+blank(A)    ::= LBRACKET predObjList(L) RBRACKET . {
+                A = LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL);
+                LSUP_spo_list_add_triples (A, L);
 
-iriref(A) ::= IRIREF(D) . {
+                LSUP_pred_obj_list_free (L);
             }
-qname(A)    ::= PFX_NAME(P) COLON NAME(D) . {
+blank       ::= collection .
+blank(A)    ::= LPAREN ows RPAREN . {
+                A = LSUP_iriref_new ("rdf:nil", state->nsm);
+            }
+
+// "collection" is the subject of the first collection item.
+%type collection { LSUP_Term * }
+// Collection triples are added here to the graph.
+collection(A) ::= LPAREN ows itemList(L) ows RPAREN . {
+                A = LSUP_bnode_add_collection (state->it, L);
             }
-qname(A)    ::= COLON NAME(D) . {
+
+%type itemList { LSUP_Term ** }
+// Freed when the item list in the collection gets added to the graph.
+%destructor itemList {}
+itemList(A) ::= itemList(L) WS object(O) . { A = LSUP_obj_list_add (L, O); }
+itemList(A) ::= object(O) . {
+                A = calloc (sizeof (*A) * 2);
+                if (UNLIKELY (!A)) return LSUP_MEM_ERR; // TODO error handling
+                A[0] = O;
             }
 
-qstring(A)  ::= STRING(D) {
+%type resource { LSUP_Term * }
+%destructor resource { LSUP_term_free ($$); }
+resource(A) ::= IRIREF(D) . {
+                LSUP_Term rel_iri = LSUP_iriref_new (D, NULL);
+                free (D);
+                if (state->base) {
+                    A = LSUP_iriref_absolute (rel_iri, state->base);
+                    LSUP_term_free (rel_iri);
+                } else {
+                    A = rel_iri;
+                }
             }
+resource(A) ::= qname(D) . { A = LSUP_iriref_new (D, state->nsm); }
 
-node_id(A)    ::= NODE_ID(D) . {
+qname(A)    ::= PFX_NAME(P) COLON IDNAME(D) . {
+                A = malloc (strlen (P) + strlen (D) + 2);
+                sprintf (A, "%s:%s", P, D);
+            }
+qname(A)    ::= COLON IDNAME(D) . {
+                A = malloc (strlen (D) + 2);
+                sprintf (A, ":%s", D);
             }
 
+nodeID(A)   ::= BNODE_PFX IDNAME(D) . { A = D; }
+
 ows         ::= WS.
 ows         ::=.
 
-opt_pfx     ::= PFX .
-opt_pfx     ::= .
-
 /*
- * From https://www.w3.org/TeamSubmission/turtle/#sec-grammar-grammar :
+ * This has been adapted from
+ * https://www.w3.org/TeamSubmission/turtle/#sec-grammar-grammar :
 
 
 [1]	turtleDoc 	::= 	statement*

+ 8 - 85
src/codec/lexer_nt.re

@@ -1,26 +1,18 @@
+#include "codec.h"
 #include "grammar_nt.h"
 #include "parser_nt.h"
 
 
-#define YYCTYPE     unsigned char
+#define YYCTYPE     uint8_t
 #define YYCURSOR    it->cur
 #define YYMARKER    it->mar
 #define YYLIMIT     it->lim
 #define YYFILL      fill(it) == 0
 
-/**
- * Max chunk size passed to scanner at each iteration.
- */
-#ifdef LSUP_RDF_STREAM_CHUNK_SIZE
-#define CHUNK_SIZE LSUP_RDF_STREAM_CHUNK_SIZE
-#else
-#define CHUNK_SIZE 8191
-#endif
-
 
 typedef struct {
     FILE *          fh;                 // Input file handle.
-    YYCTYPE         buf[CHUNK_SIZE + 1],// Start of buffer.
+    YYCTYPE         buf[CHUNK_SIZE],    // Start of buffer.
             *       lim,                // Position after the last available
                                         //   input character (YYLIMIT).
             *       cur,                // Next input character to be read
@@ -36,19 +28,6 @@ typedef struct {
 } ParseIterator;
 
 
-// TODO The opposite of this is in codec_nt.c. Find a better place for both.
-static inline char unescape_char(const char c) {
-    switch (c) {
-        case 't': return '\t';
-        case 'b': return '\b';
-        case 'n': return '\n';
-        case 'r': return '\r';
-        case 'f': return '\f';
-        default: return c;
-    }
-}
-
-
 static int fill(ParseIterator *it)
 {
     if (it->eof) {
@@ -67,7 +46,7 @@ static int fill(ParseIterator *it)
     it->lim += fread(it->lim, 1, shift, it->fh);
     /*!stags:re2c format = "if (it->@@) it->@@ -= shift; "; */
     it->lim[0] = 0;
-    it->eof |= it->lim < it->buf + CHUNK_SIZE;
+    it->eof |= it->lim < it->buf + CHUNK_SIZE - 1;
     return 0;
 }
 
@@ -75,7 +54,7 @@ static int fill(ParseIterator *it)
 static void parse_init(ParseIterator *it, FILE *fh)
 {
     it->fh = fh;
-    it->cur = it->mar = it->tok = it->lim = it->buf + CHUNK_SIZE;
+    it->cur = it->mar = it->tok = it->lim = it->buf + CHUNK_SIZE - 1;
     it->line = 1;
     it->bol = it->buf;
     it->ct = 0;
@@ -85,62 +64,6 @@ static void parse_init(ParseIterator *it, FILE *fh)
 }
 
 
-/** @brief Replace \uxxxx and \Uxxxxxxxx with Unicode bytes.
- */
-static YYCTYPE *unescape_unicode (const YYCTYPE *esc_str, size_t size)
-{
-    YYCTYPE *uc_str = malloc (size + 1);
-
-    size_t j = 0;
-    YYCTYPE tmp_chr[5];
-    for (size_t i = 0; i < size;) {
-        if (esc_str[i] == '\\') {
-            i++; // Skip over '\\'
-
-            // 4-hex sequence.
-            if (esc_str[i] == 'u') {
-                i ++; // Skip over 'u'
-
-                // Use tmp_chr to hold the hex string for the code point.
-                memcpy(tmp_chr, esc_str + i, sizeof (tmp_chr) - 1);
-                tmp_chr[4] = '\0';
-
-                uint32_t tmp_val = strtol ((char*)tmp_chr, NULL, 16);
-                log_debug ("tmp_val: %d", tmp_val);
-
-                // Reuse tmp_chr to hold the byte values for the code point.
-                int nbytes = utf8_encode (tmp_val, tmp_chr);
-
-                // Copy bytes into destination.
-                memcpy (uc_str + j, tmp_chr, nbytes);
-                log_debug ("UC byte value: %x %x", uc_str[j], uc_str[j + 1]);
-
-                j += nbytes;
-                i += 4;
-
-            // 8-hex sequence.
-            } else if (esc_str[i] == 'U') {
-                i ++; // Skip over 'U'
-                log_error ("UTF-16 sequence unescaping not yet implemented.");
-                return NULL; // TODO encode UTF-16
-
-            // Unescape other escaped characters.
-            } else uc_str[j++] = unescape_char(esc_str[i++]);
-        } else {
-            // Copy ASCII char verbatim.
-            uc_str[j++] = esc_str[i++];
-        }
-    }
-
-    YYCTYPE *tmp = realloc (uc_str, j + 1);
-    if (UNLIKELY (!tmp)) return NULL;
-    uc_str = tmp;
-    uc_str[j] = '\0';
-
-    return uc_str;
-}
-
-
 // Parser interface.
 
 void *ParseAlloc();
@@ -204,7 +127,7 @@ loop:
     }
 
     IRIREF {
-        YYCTYPE *data = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
+        YYCTYPE *data = unescape_unicode (it->tok + 1);
 
         log_debug ("URI data: %s", data);
 
@@ -217,7 +140,7 @@ loop:
     LITERAL {
         // Only unescape Unicode from data.
         size_t size = lit_data_e - it->tok - 2;
-        YYCTYPE *data = unescape_unicode (it->tok + 1, size);
+        YYCTYPE *data = unescape_unicode (it->tok + 1);
         log_trace ("Literal data: %s", data);
 
         char *metadata = NULL;
@@ -256,7 +179,7 @@ loop:
     }
 
     BNODE {
-        YYCTYPE *data = unescape_unicode (it->tok + 2, YYCURSOR - it->tok - 2);
+        YYCTYPE *data = unescape_unicode (it->tok + 2);
 
         log_debug ("BNode data: %s", data);
 

+ 75 - 157
src/codec/lexer_ttl.re

@@ -1,26 +1,25 @@
+#include "codec.h"
 #include "grammar_ttl.h"
 #include "parser_ttl.h"
 
 
-#define YYCTYPE     unsigned char
+/** @brief TTL is UTF-8 encoded.
+ *
+ * @sa https://www.w3.org/TeamSubmission/turtle/#sec-grammar
+ *
+ * `char` should be considered to be UTF-8 throughout this library, however,
+ * setting YYCTYPE to char generates case labels outside of the char range.
+ */
+#define YYCTYPE     uint8_t
 #define YYCURSOR    it->cur
 #define YYMARKER    it->mar
 #define YYLIMIT     it->lim
 #define YYFILL      fill(it) == 0
 
-/**
- * Max chunk size passed to scanner at each iteration.
- */
-#ifdef LSUP_RDF_STREAM_CHUNK_SIZE
-#define CHUNK_SIZE LSUP_RDF_STREAM_CHUNK_SIZE
-#else
-#define CHUNK_SIZE 8191
-#endif
-
 
 typedef struct {
     FILE *          fh;                 // Input file handle.
-    YYCTYPE         buf[CHUNK_SIZE + 1],// Start of buffer.
+    YYCTYPE         buf[CHUNK_SIZE],    // Start of buffer.
             *       lim,                // Position after the last available
                                         //   input character (YYLIMIT).
             *       cur,                // Next input character to be read
@@ -42,79 +41,6 @@ typedef struct {
 } ParserToken;
 
 
-// TODO The opposite of this is in codec_ttl.c. Find a better place for both.
-static inline char unescape_char(const char c) {
-    switch (c) {
-        case 't': return '\t';
-        case 'b': return '\b';
-        case 'n': return '\n';
-        case 'r': return '\r';
-        case 'f': return '\f';
-        default: return c;
-    }
-}
-
-
-/** @brief Replace \uxxxx and \Uxxxxxxxx with Unicode bytes.
- */
-ParserToken *unescape_unicode (const YYCTYPE *esc_str, size_t size)
-{
-    ParserToken *token = malloc (sizeof (*token));
-    token->data = malloc (size + 1);
-
-    size_t j = 0;
-    YYCTYPE tmp_chr[5];
-    for (size_t i = 0; i < size;) {
-        if (esc_str[i] == '\\') {
-            i++; // Skip over '\\'
-
-            // 4-hex sequence.
-            if (esc_str[i] == 'u') {
-                i ++; // Skip over 'u'
-
-                // Use tmp_chr to hold the hex string for the code point.
-                memcpy(tmp_chr, esc_str + i, sizeof (tmp_chr) - 1);
-                tmp_chr[4] = '\0';
-
-                uint32_t tmp_val = strtol ((char*)tmp_chr, NULL, 16);
-                log_debug ("tmp_val: %d", tmp_val);
-
-                // Reuse tmp_chr to hold the byte values for the code point.
-                int nbytes = utf8_encode (tmp_val, tmp_chr);
-
-                // Copy bytes into destination.
-                memcpy (token->data + j, tmp_chr, nbytes);
-                log_debug (
-                        "UC byte value: %x %x",
-                        token->data[j], token->data[j + 1]);
-
-                j += nbytes;
-                i += 4;
-
-            // 8-hex sequence.
-            } else if (esc_str[i] == 'U') {
-                i ++; // Skip over 'U'
-                log_error ("UTF-16 sequence unescaping not yet implemented.");
-                return NULL; // TODO encode UTF-16
-
-            // Unescape other escaped characters.
-            } else token->data[j++] = unescape_char(esc_str[i++]);
-        } else {
-            // Copy ASCII char verbatim.
-            token->data[j++] = esc_str[i++];
-        }
-    }
-
-    YYCTYPE *tmp = realloc (token->data, j + 1);
-    if (UNLIKELY (!tmp)) return NULL;
-    token->data = tmp;
-    token->data[j] = '\0';
-    token->size = strlen(token->data) + 1;
-
-    return token;
-}
-
-
 static int fill(ParseIterator *it)
 {
     if (it->eof) {
@@ -133,7 +59,7 @@ static int fill(ParseIterator *it)
     it->lim += fread(it->lim, 1, shift, it->fh);
     /*!stags:re2c format = "if (it->@@) it->@@ -= shift; "; */
     it->lim[0] = 0;
-    it->eof |= it->lim < it->buf + CHUNK_SIZE;
+    it->eof |= it->lim < it->buf + CHUNK_SIZE - 1;
     return 0;
 }
 
@@ -141,7 +67,7 @@ static int fill(ParseIterator *it)
 static void parse_init (ParseIterator *it, FILE *fh)
 {
     it->fh = fh;
-    it->cur = it->mar = it->tok = it->lim = it->buf + CHUNK_SIZE;
+    it->cur = it->mar = it->tok = it->lim = it->buf + CHUNK_SIZE - 1;
     it->line = 1;
     it->stmt = 1;
     it->bol = it->buf;
@@ -161,9 +87,8 @@ void ParseFree();
 
 // Lexer.
 
-static int lex (ParseIterator *it, ParserToken **token_p)
+static int lex (ParseIterator *it, YYCTYPE **token_p)
 {
-    const YYCTYPE *lit_data_e, *dtype_s, *lang_s;
 
 loop:
 
@@ -184,32 +109,40 @@ loop:
     EOL             = [\x0A\x0D];
     NCWS            = [\x09\x20] | EOL;
     HEX             = [\x30-\x39\x41-\x46];
-    CHARACTER       = "\\u" HEX{4} | "\\U" HEX{8} | '\\' | [\x20-\x5B] | [\u005D-\U0010FFFF];
-    NSTART_CHAR     = [a-zA-Z_] | [\u00C0-\u00D6] | [\u00D8-\u00F6] | [\u00F8-\u02FF] | [\u0370-\u037D] | [\u037F-\u1FFF] | [\u200C-\u200D] | [\u2070-\u218F] | [\u2C00-\u2FEF] | [\u3001-\uD7FF] | [\uF900-\uFDCF] | [\uFDF0-\uFFFD] | [\U00010000-\U000EFFFF];
-    NAME_CHAR       = NAME_START_CHAR | '-' | [0-9\u00B7\u0300-\u036F\u203F-\u2040];
+    CHAR_BASE       = "\\u" HEX{4} | "\\U" HEX{8} | '\\'
+                    | [\U0000005D-\U0010FFFF];
+    CHARACTER       = CHAR_BASE | [\x20-\x5B];
+    NSTART_CHAR     = [a-zA-Z_] | [\u00C0-\u00D6] | [\u00D8-\u00F6]
+                    | [\u00F8-\u02FF] | [\u0370-\u037D] | [\u037F-\u1FFF]
+                    | [\u200C-\u200D] | [\u2070-\u218F] | [\u2C00-\u2FEF]
+                    | [\u3001-\uD7FF] | [\uF900-\uFDCF] | [\uFDF0-\uFFFD]
+                    | [\U00010000-\U000EFFFF];
+    NAME_CHAR       = NSTART_CHAR | '-'
+                    | [0-9\u00B7\u0300-\u036F\u203F-\u2040];
     ECHAR           = CHARACTER | [\t\n\r];
-    UCHAR           = (CHARACTER \ [\x3E]) | '>';
-    SCHAR           = (CHARACTER \ [\x22]) | '"';
+    UCHAR           = (CHAR_BASE | ([\x20-\x5B] \ [\x3E])) | '>';
+    SCHAR           = (CHAR_BASE | ([\x20-\x5B] \ [\x22])) | '"';
     LCHAR           = ECHAR | ["\x09\x0A\x0D];
 
     // Constructs.
     COMMENT         = '#' ( [^\x0A\x0D] )*;
     WS              = NCWS+ | COMMENT;
+    INTEGER         = ('-' | '+')? [0-9]+;
     EXPONENT        = [eE] INTEGER;
     LANGUAGE        = [a-z]+ ('-' [a-z0-9]+)*;
-    NODE_ID         = '_:' NAME;
     REL_IRI         = UCHAR*;
 
     // Token aliases.
     IRIREF          = '<' REL_IRI '>';
     PFX_NAME        = (NSTART_CHAR \ [_]) NAME_CHAR*;
     NAME            = NSTART_CHAR NAME_CHAR*;
-    LSTRING         = \x22 \x22 \x22 LCHAR \x22 \x22 \x22;
-    STRING          = \x22 SCHAR \x22;
-    LANGTAG         = '@' LANGUAGE
-    INTEGER         = ('-' | '+')? [0-9]+;
-    DOUBLE          = ('-' | '+') ? ([0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT);
-    DECIMAL         = ('-' | '+')? ( [0-9]+ '.' [0-9]* | '.' ([0-9])+ | ([0-9])+ );
+    LSTRING         = [\x22]{3} LCHAR [\x22]{3};
+    STRING          = [\x22] SCHAR [\x22];
+    LANGTAG         = '@' LANGUAGE;
+    DOUBLE          = ('-' | '+') ? ([0-9]+ '.' [0-9]* EXPONENT
+                    | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT);
+    DECIMAL         = ('-' | '+')?
+                    ( [0-9]+ '.' [0-9]* | '.' ([0-9])+ | ([0-9])+ );
     BOOLEAN         = 'true' | 'false';
     //RDF_TYPE        = NCWS 'a' / WS;
 
@@ -233,43 +166,43 @@ loop:
     }
 
     IRIREF {
-        *token_p = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
-        log_debug ("URI data: %s", (*token_p)->data);
+        *token_p = unescape_unicode (it->tok + 1);
+        log_debug ("URI data: %s", *token_p);
 
         return T_IRIREF;
     }
 
     PFX_NAME {
-        *token_p = unescape_unicode (it->tok, YYCURSOR - it->tok - 1);
-        log_debug ("Prefix name: %s", (*token_p)->data);
+        *token_p = unescape_unicode (it->tok);
+        log_debug ("Prefix name: %s", *token_p);
 
         return T_PFX_NAME;
     }
 
     NAME {
-        *token_p = unescape_unicode (it->tok, YYCURSOR - it->tok - 1);
-        log_debug ("name: %s", (*token_p)->data);
+        *token_p = unescape_unicode (it->tok);
+        log_debug ("name: %s", *token_p);
 
-        return T_NAME;
+        return T_IDNAME;
     }
 
     LSTRING {
-        *token_p = unescape_unicode (it->tok + 3, YYCURSOR - it->tok - 4);
-        log_debug ("Long string: %s", (*token_p)->data);
+        *token_p = unescape_unicode (it->tok + 3);
+        log_debug ("Long string: %s", *token_p);
 
-        return T_LSTRING;
+        return T_STRING;
     }
 
     STRING {
-        *token_p = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
-        log_debug ("Long string: %s", (*token_p)->data);
+        *token_p = unescape_unicode (it->tok + 1);
+        log_debug ("Long string: %s", *token_p);
 
         return T_STRING;
     }
 
     LANGTAG {
-        (*token_p)->data = strndup (it->tok + 1, YYCURSOR - it->tok - 1);
-        log_debug ("Lang tag: %s", (*token_p)->data);
+        *token_p = uint8_ndup (it->tok + 1, YYCURSOR - it->tok - 1);
+        log_debug ("Lang tag: %s", *token_p);
 
         return T_LANGTAG;
     }
@@ -278,8 +211,8 @@ loop:
         // Normalize sign.
         size_t offset = *it->tok == '+' ? 1 : 0;
 
-        (*token_p)->data = strndup (it->tok + offset, YYCURSOR - it->tok - 1);
-        log_debug ("Integer: %s", (*token_p)->data);
+        *token_p = uint8_ndup (it->tok + offset, YYCURSOR - it->tok - 1);
+        log_debug ("Integer: %s", *token_p);
 
         return T_INTEGER;
     }
@@ -288,8 +221,8 @@ loop:
         // Normalize sign.
         size_t offset = *it->tok == '+' ? 1 : 0;
 
-        (*token_p)->data = strndup (it->tok + offset, YYCURSOR - it->tok - 1);
-        log_debug ("Integer: %s", (*token_p)->data);
+        *token_p = uint8_ndup (it->tok + offset, YYCURSOR - it->tok - 1);
+        log_debug ("Integer: %s", *token_p);
 
         return T_DOUBLE;
     }
@@ -300,18 +233,18 @@ loop:
 
         // Normalize trailing zeros in fractional part.
         size_t size = YYCURSOR - it->tok - 1;
-        if (strchr (it->tok, '.'))
-            for (YYCTYPE i = YYCURSOR; *i == '0'; i--) limit --;
+        if (strchr ((char *)it->tok, '.'))
+            for (YYCTYPE *i = YYCURSOR; *i == '0'; i--) *i = '\0';
 
-        (*token_p)->data = strndup (it->tok + offset, size);
-        log_debug ("Integer: %s", (*token_p)->data);
+        *token_p = uint8_ndup (it->tok + offset, size);
+        log_debug ("Integer: %s", *token_p);
 
         return T_DECIMAL;
     }
 
     BOOLEAN {
-        (*token_p)->data = strndup (it->tok, YYCURSOR - it->tok - 1);
-        log_debug ("Boolean: %s", (*token_p)->data);
+        *token_p = uint8_ndup (it->tok, YYCURSOR - it->tok - 1);
+        log_debug ("Boolean: %s", *token_p);
 
         return T_BOOLEAN;
     }
@@ -352,29 +285,6 @@ loop:
 }
 
 
-LSUP_rc
-LSUP_ttl_parse_term (const char *rep, const LSUP_NSMap *map, LSUP_Term **term)
-{
-    FILE *fh = fmemopen ((void *)rep, strlen (rep), "r");
-
-    ParseIterator it;
-    parse_init (&it, fh);
-
-    ParserToken **token_p;
-    int ttype = lex (&it, token_p);
-
-    fclose (fh);
-
-    switch (ttype) {
-        case T_IRIREF:
-        case T_LITERAL:
-        case T_BNODE:
-            return LSUP_OK;
-        default:
-            return LSUP_VALUE_ERR;
-    }
-}
-
 LSUP_rc
 LSUP_ttl_parse_doc (FILE *fh, LSUP_Graph **gr_p, size_t *ct, char **err_p)
 {
@@ -388,21 +298,29 @@ LSUP_ttl_parse_doc (FILE *fh, LSUP_Graph **gr_p, size_t *ct, char **err_p)
 
     LSUP_rc rc;
 
-    LSUP_NSMap *nsm = LSUP_nsmap_new();
+    LSUP_TTLParserState *state = malloc (sizeof (*state));
+    if (UNLIKELY (!state)) {
+        rc = LSUP_MEM_ERR;
+        goto finally;
+    }
+    state->base = NULL;
+
+    state->nsm = LSUP_nsmap_new();
+    // TODO add basic NS, critically xsd: and rdf:
     LSUP_Graph *gr = LSUP_graph_new (
-            LSUP_iriref_new (NULL, NULL), LSUP_STORE_HTABLE, NULL, nsm, 0);
+            LSUP_iriref_new (NULL, NULL), LSUP_STORE_HTABLE, NULL, state->nsm, 0);
     if (UNLIKELY (!gr)) return LSUP_MEM_ERR;
 
-    LSUP_GraphIterator *it = LSUP_graph_add_init (gr);
-    if (UNLIKELY (!it)) {
+    state->it = LSUP_graph_add_init (gr);
+    if (UNLIKELY (!state->it)) {
         LSUP_graph_free (gr);
         return LSUP_MEM_ERR;
     }
 
-    ParserToken **token_p;
+    YYCTYPE *token;
 
     for (;;) {
-        int ttype = lex (&parse_it, token_p);
+        int ttype = lex (&parse_it, &token);
 
         if (ttype == -1) {
             char token[16] = {'\0'};
@@ -425,7 +343,7 @@ LSUP_ttl_parse_doc (FILE *fh, LSUP_Graph **gr_p, size_t *ct, char **err_p)
             goto finally;
         }
 
-        Parse (parser, ttype, token_p, it);
+        Parse (parser, ttype, token, state);
 
         if (ttype == T_EOF) break;
     };
@@ -439,11 +357,11 @@ LSUP_ttl_parse_doc (FILE *fh, LSUP_Graph **gr_p, size_t *ct, char **err_p)
     *gr_p = gr;
 
 finally:
-    Parse (parser, 0, NULL, it);
+    Parse (parser, 0, NULL, state);
     ParseFree (parser, free);
 
-    LSUP_graph_add_done (it);
-    LSUP_term_free (term);
+    LSUP_graph_add_done (state->it);
+    free (state);
 
     if (rc < 0) LSUP_graph_free (gr);
 

+ 1 - 1
src/core.c

@@ -118,4 +118,4 @@ LSUP_strerror (LSUP_rc rc)
 
 /* Inline extern functions. */
 
-int utf8_encode(const uint32_t utf, unsigned char *out);
+int utf8_encode (const uint32_t utf, unsigned char *out);

+ 2 - 2
src/term.c

@@ -458,7 +458,7 @@ LSUP_tcache_get (LSUP_Key key)
 
 
 /*
- * Internal functions.
+ * Static functions.
  */
 
 static LSUP_rc
@@ -471,7 +471,7 @@ term_init (
         return LSUP_ERROR;
     }
     // This can never be LSUP_TERM_UNDEFINED.
-    if (type <= LSUP_TERM_UNDEFINED || type > MAX_VALID_TERM_TYPE) {
+    if (type == LSUP_TERM_UNDEFINED) {
         log_error ("%d is not a valid term type.", type);
         return LSUP_VALUE_ERR;
     }