3 years ago · 0cfac367a5
--- a/TODO.md
+++ b/TODO.md
@@ -24,7 +24,7 @@
 
				 - *D* Relative IRIs
			
 
				 - *D* Flexible store interface
			
 
				 - *D* Transaction control
			
 
				-- *P* Turtle serialization / deserialization
			
 
				+- *W* Turtle serialization / deserialization
			
 
				 - *P* Full UTF-8 support
			
 
				 - *P* Extended tests
			
 
				     - *P* C API
			
--- a/include/codec_ttl.h
+++ b/include/codec_ttl.h
@@ -0,0 +1,10 @@
 
				+#ifndef _LSUP_CODEC_NT_H
			
 
				+#define _LSUP_CODEC_NT_H
			
 
				+
			
 
				+#include "codec_base.h"
			
 
				+
			
 
				+/** @brief N-Triples codec.
			
 
				+ */
			
 
				+extern const LSUP_Codec nt_codec;
			
 
				+
			
 
				+#endif
			
--- a/src/codec/ttl_grammar.y
+++ b/src/codec/ttl_grammar.y
@@ -0,0 +1,58 @@
 
				+%include {
			
 
				+
			
 
				+/** @brief Lemon parser grammar for N-Triples.
			
 
				+ *
			
 
				+ * The `lemon' parser generator executable must be in your PATH:
			
 
				+ * https://sqlite.org/src/doc/trunk/doc/lemon.html
			
 
				+ *
			
 
				+ * To generate the parser, run: `lemon ${FILE}'
			
 
				+ */
			
 
				+
			
 
				+#include "graph.h"
			
 
				+}
			
 
				+
			
 
				+
			
 
				+%token_type { LSUP_Term * }
			
 
				+%token_prefix "T_"
			
 
				+
			
 
				+%type triple            { LSUP_Triple * }
			
 
				+%destructor triple      { LSUP_triple_free ($$); }
			
 
				+%type subject           { LSUP_Term * }
			
 
				+%destructor subject     { LSUP_term_free ($$); }
			
 
				+%type predicate         { LSUP_Term * }
			
 
				+%destructor predicate   { LSUP_term_free ($$); }
			
 
				+%type object            { LSUP_Term * }
			
 
				+%destructor object      { LSUP_term_free ($$); }
			
 
				+%default_type           { void * }
			
 
				+
			
 
				+%extra_argument         { LSUP_GraphIterator *it }
			
 
				+
			
 
				+
			
 
				+// Rules.
			
 
				+
			
 
				+ntriplesDoc ::= triples EOF.
			
 
				+
			
 
				+triples     ::= eol.
			
 
				+triples     ::= triple eol.
			
 
				+triples     ::= triples triple eol.
			
 
				+
			
 
				+triple(A)   ::= ws subject(S) ws predicate(P) ws object(O) ws DOT. {
			
 
				+
			
 
				+                A = LSUP_triple_new (S, P, O);
			
 
				+                LSUP_graph_add_iter (it, A);
			
 
				+            }
			
 
				+
			
 
				+subject     ::= IRIREF.
			
 
				+subject     ::= BNODE.
			
 
				+
			
 
				+predicate   ::= IRIREF.
			
 
				+
			
 
				+object      ::= IRIREF.
			
 
				+object      ::= BNODE.
			
 
				+object      ::= LITERAL.
			
 
				+
			
 
				+eol         ::= EOL.
			
 
				+eol         ::= eol EOL.
			
 
				+
			
 
				+ws          ::=.
			
 
				+ws          ::= WS.
			
--- a/src/codec/ttl_lexer.re
+++ b/src/codec/ttl_lexer.re
@@ -0,0 +1,400 @@
 
				+#include "nt_grammar.h"
			
 
				+#include "nt_parser.h"
			
 
				+
			
 
				+
			
 
				+#define YYCTYPE     unsigned char
			
 
				+#define YYCURSOR    it->cur
			
 
				+#define YYMARKER    it->mar
			
 
				+#define YYLIMIT     it->lim
			
 
				+#define YYFILL      fill(it) == 0
			
 
				+
			
 
				+/**
			
 
				+ * Max chunk size passed to scanner at each iteration.
			
 
				+ */
			
 
				+#ifdef LSUP_RDF_STREAM_CHUNK_SIZE
			
 
				+#define CHUNK_SIZE LSUP_RDF_STREAM_CHUNK_SIZE
			
 
				+#else
			
 
				+#define CHUNK_SIZE 8192
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+typedef struct {
			
 
				+    FILE *          fh;                 // Input file handle.
			
 
				+    YYCTYPE         buf[CHUNK_SIZE + 1],// Start of buffer.
			
 
				+            *       lim,                // Position after the last available
			
 
				+                                        //   input character (YYLIMIT).
			
 
				+            *       cur,                // Next input character to be read
			
 
				+                                        //   (YYCURSOR)
			
 
				+            *       mar,                // Most recent match (YYMARKER)
			
 
				+            *       tok,                // Start of current token.
			
 
				+            *       bol;                // Address of the beginning of the
			
 
				+                                        //   current line (for debugging).
			
 
				+    unsigned        line;               // Current line no. (for debugging).
			
 
				+    unsigned        ct;                 // Number of parsed triples.
			
 
				+    bool            eof;                // if we have reached EOF.
			
 
				+    /*!stags:re2c format = "YYCTYPE *@@;"; */
			
 
				+} ParseIterator;
			
 
				+
			
 
				+
			
 
				+// TODO The opposite of this is in codec_nt.c. Find a better place for both.
			
 
				+static inline char unescape_char(const char c) {
			
 
				+    switch (c) {
			
 
				+        case 't': return '\t';
			
 
				+        case 'b': return '\b';
			
 
				+        case 'n': return '\n';
			
 
				+        case 'r': return '\r';
			
 
				+        case 'f': return '\f';
			
 
				+        default: return c;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static int fill(ParseIterator *it)
			
 
				+{
			
 
				+    if (it->eof) {
			
 
				+        return 1;
			
 
				+    }
			
 
				+    const size_t shift = it->tok - it->buf;
			
 
				+    if (shift < 1) {
			
 
				+        return 2;
			
 
				+    }
			
 
				+    log_debug ("Shifting bytes: %lu", shift);
			
 
				+    memmove(it->buf, it->tok, it->lim - it->tok);
			
 
				+    it->lim -= shift;
			
 
				+    it->cur -= shift;
			
 
				+    it->mar -= shift;
			
 
				+    it->tok -= shift;
			
 
				+    it->lim += fread(it->lim, 1, shift, it->fh);
			
 
				+    /*!stags:re2c format = "if (it->@@) it->@@ -= shift; "; */
			
 
				+    it->lim[0] = 0;
			
 
				+    it->eof |= it->lim < it->buf + CHUNK_SIZE;
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static void parse_init(ParseIterator *it, FILE *fh)
			
 
				+{
			
 
				+    it->fh = fh;
			
 
				+    it->cur = it->mar = it->tok = it->lim = it->buf + CHUNK_SIZE;
			
 
				+    it->line = 1;
			
 
				+    it->bol = it->buf;
			
 
				+    it->ct = 0;
			
 
				+    it->eof = 0;
			
 
				+    /*!stags:re2c format = "it->@@ = NULL; "; */
			
 
				+    fill (it);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/** @brief Replace \uxxxx and \Uxxxxxxxx with Unicode bytes.
			
 
				+ */
			
 
				+static YYCTYPE *unescape_unicode (const YYCTYPE *esc_str, size_t size)
			
 
				+{
			
 
				+    YYCTYPE *uc_str = malloc (size + 1);
			
 
				+
			
 
				+    size_t j = 0;
			
 
				+    YYCTYPE tmp_chr[5];
			
 
				+    for (size_t i = 0; i < size;) {
			
 
				+        if (esc_str[i] == '\\') {
			
 
				+            i++; // Skip over '\\'
			
 
				+
			
 
				+            // 4-hex sequence.
			
 
				+            if (esc_str[i] == 'u') {
			
 
				+                i ++; // Skip over 'u'
			
 
				+
			
 
				+                // Use tmp_chr to hold the hex string for the code point.
			
 
				+                memcpy(tmp_chr, esc_str + i, sizeof (tmp_chr) - 1);
			
 
				+                tmp_chr[4] = '\0';
			
 
				+
			
 
				+                uint32_t tmp_val = strtol ((char*)tmp_chr, NULL, 16);
			
 
				+                log_debug ("tmp_val: %d", tmp_val);
			
 
				+
			
 
				+                // Reuse tmp_chr to hold the byte values for the code point.
			
 
				+                int nbytes = utf8_encode (tmp_val, tmp_chr);
			
 
				+
			
 
				+                // Copy bytes into destination.
			
 
				+                memcpy (uc_str + j, tmp_chr, nbytes);
			
 
				+                log_debug ("UC byte value: %x %x", uc_str[j], uc_str[j + 1]);
			
 
				+
			
 
				+                j += nbytes;
			
 
				+                i += 4;
			
 
				+
			
 
				+            // 8-hex sequence.
			
 
				+            } else if (esc_str[i] == 'U') {
			
 
				+                i ++; // Skip over 'U'
			
 
				+                log_error ("UTF-16 sequence unescaping not yet implemented.");
			
 
				+                return NULL; // TODO encode UTF-16
			
 
				+
			
 
				+            // Unescape other escaped characters.
			
 
				+            } else uc_str[j++] = unescape_char(esc_str[i++]);
			
 
				+        } else {
			
 
				+            // Copy ASCII char verbatim.
			
 
				+            uc_str[j++] = esc_str[i++];
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    YYCTYPE *tmp = realloc (uc_str, j + 1);
			
 
				+    if (UNLIKELY (!tmp)) return NULL;
			
 
				+    uc_str = tmp;
			
 
				+    uc_str[j] = '\0';
			
 
				+
			
 
				+    return uc_str;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+// Parser interface.
			
 
				+
			
 
				+void *ParseAlloc();
			
 
				+void Parse();
			
 
				+void ParseFree();
			
 
				+
			
 
				+
			
 
				+// Lexer.
			
 
				+
			
 
				+static int lex (ParseIterator *it, LSUP_Term **term)
			
 
				+{
			
 
				+    const YYCTYPE *lit_data_e, *dtype_s, *lang_s;
			
 
				+
			
 
				+loop:
			
 
				+
			
 
				+    it->tok = it->cur;
			
 
				+
			
 
				+    *term = NULL;
			
 
				+
			
 
				+    /*!re2c
			
 
				+    re2c:eof = 0;
			
 
				+    re2c:flags:8 = 1;
			
 
				+    re2c:flags:tags = 1;
			
 
				+    re2c:tags:expression = "it->@@";
			
 
				+    re2c:api:style = functions;
			
 
				+    re2c:define:YYFILL:naked = 1;
			
 
				+
			
 
				+
			
 
				+    // For unresolved and partially resolved inconsistencies of the spec, see
			
 
				+    // https://lists.w3.org/Archives/Public/public-rdf-comments/2017Jun/0000.html
			
 
				+    _WS                 = [\x09\x20];
			
 
				+    WS                  = _WS+;
			
 
				+    EOL                 = [\x0D\x0A] (_WS | [\x0D\x0A])*;
			
 
				+    DOT                 = [.];
			
 
				+    HEX                 = [0-9A-Fa-f];
			
 
				+    ECHAR               = [\\] [tbnrf"'\\];
			
 
				+    UCHAR               = "\\u" HEX{4} | "\\U" HEX{8};
			
 
				+    PN_CHARS_BASE       = [A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF];
			
 
				+    PN_CHARS_U          = PN_CHARS_BASE | '_' | ':';
			
 
				+    PN_CHARS            = PN_CHARS_U | '-' | [0-9\u00B7\u0300-\u036F\u203F-\u2040];
			
 
				+    IRI_CHARS           = ([^\x00-\x20<>"{}|^`\\] | UCHAR)*;
			
 
				+    LITERAL_QUOTE       = ["] ([^\x22\x5C\x0A\x0D] | ECHAR|UCHAR)* ["];
			
 
				+    LANGTAG             = [@] [a-zA-Z]+ ("-" [a-zA-Z0-9]+)*;
			
 
				+
			
 
				+    IRIREF              = [<] IRI_CHARS [>];
			
 
				+    LITERAL             = LITERAL_QUOTE @lit_data_e _WS* ("^^" _WS* @dtype_s IRIREF | @lang_s LANGTAG)?;
			
 
				+    BNODE               = "_:" ((PN_CHARS_U | [0-9]) ((PN_CHARS | ".")* PN_CHARS)?);
			
 
				+    COMMENT             = "#" .*;
			
 
				+
			
 
				+
			
 
				+    EOL {
			
 
				+        it->line ++;
			
 
				+        it->bol = YYCURSOR;
			
 
				+        log_debug ("New line: #%u.", it->line);
			
 
				+        return T_EOL;
			
 
				+    }
			
 
				+
			
 
				+    $ {
			
 
				+        log_debug ("End of buffer.");
			
 
				+        return T_EOF;
			
 
				+    }
			
 
				+
			
 
				+    IRIREF {
			
 
				+        YYCTYPE *data = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
			
 
				+
			
 
				+        log_debug ("URI data: %s", data);
			
 
				+
			
 
				+        *term = LSUP_iriref_new ((char*)data, NULL);
			
 
				+        free (data);
			
 
				+
			
 
				+        return T_IRIREF;
			
 
				+    }
			
 
				+
			
 
				+    LITERAL {
			
 
				+        // Only unescape Unicode from data.
			
 
				+        size_t size = lit_data_e - it->tok - 2;
			
 
				+        YYCTYPE *data = unescape_unicode (it->tok + 1, size);
			
 
				+        log_trace ("Literal data: %s", data);
			
 
				+
			
 
				+        char *metadata = NULL;
			
 
				+        const YYCTYPE *md_marker;
			
 
				+        LSUP_TermType type = LSUP_TERM_LITERAL;
			
 
				+
			
 
				+        if (dtype_s) {
			
 
				+            md_marker = dtype_s;
			
 
				+            size = YYCURSOR - md_marker - 1;
			
 
				+        } else if (lang_s) {
			
 
				+            type = LSUP_TERM_LT_LITERAL;
			
 
				+            md_marker = lang_s;
			
 
				+            size = YYCURSOR - md_marker;
			
 
				+        } else md_marker = NULL;
			
 
				+
			
 
				+        if (md_marker) {
			
 
				+            metadata = malloc (size);
			
 
				+            memcpy (metadata, md_marker + 1, size);
			
 
				+            metadata [size - 1] = '\0';
			
 
				+            log_trace ("metadata: %s", metadata);
			
 
				+        }
			
 
				+
			
 
				+        if (type == LSUP_TERM_LITERAL) {
			
 
				+            LSUP_Term *dtype;
			
 
				+            dtype = (
			
 
				+                metadata ? LSUP_iriref_new ((char *) metadata, NULL) : NULL);
			
 
				+
			
 
				+            *term = LSUP_literal_new ((char *) data, dtype);
			
 
				+
			
 
				+        } else *term = LSUP_lt_literal_new ((char *) data, (char *) metadata);
			
 
				+
			
 
				+        free (data);
			
 
				+        free (metadata);
			
 
				+
			
 
				+        return T_LITERAL;
			
 
				+    }
			
 
				+
			
 
				+    BNODE {
			
 
				+        YYCTYPE *data = unescape_unicode (it->tok + 2, YYCURSOR - it->tok - 2);
			
 
				+
			
 
				+        log_debug ("BNode data: %s", data);
			
 
				+
			
 
				+        *term = LSUP_term_new (LSUP_TERM_BNODE, (char*)data, NULL);
			
 
				+        free (data);
			
 
				+
			
 
				+        return T_BNODE;
			
 
				+    }
			
 
				+
			
 
				+    DOT {
			
 
				+        log_debug ("End of triple.");
			
 
				+        it->ct ++;
			
 
				+
			
 
				+        return T_DOT;
			
 
				+    }
			
 
				+
			
 
				+    WS {
			
 
				+        log_debug ("Separator.");
			
 
				+
			
 
				+        return T_WS;
			
 
				+    }
			
 
				+
			
 
				+    COMMENT {
			
 
				+        size_t size = YYCURSOR - it->tok + 1;
			
 
				+        YYCTYPE *data = malloc (size);
			
 
				+        memcpy (data, it->tok, size);
			
 
				+        data [size - 1] = '\0';
			
 
				+        log_debug ("Comment: `%s`", data);
			
 
				+        free (data);
			
 
				+
			
 
				+        goto loop;
			
 
				+    }
			
 
				+
			
 
				+    * {
			
 
				+        log_debug (
			
 
				+            "Invalid token @ %lu: %s (\\x%x)",
			
 
				+            YYCURSOR - it->buf - 1, it->tok, *it->tok);
			
 
				+
			
 
				+        return -1;
			
 
				+    }
			
 
				+
			
 
				+    */
			
 
				+}
			
 
				+
			
 
				+
			
 
				+LSUP_rc
			
 
				+LSUP_nt_parse_term (const char *rep, const LSUP_NSMap *map, LSUP_Term **term)
			
 
				+{
			
 
				+    FILE *fh = fmemopen ((void *)rep, strlen (rep), "r");
			
 
				+
			
 
				+    ParseIterator it;
			
 
				+    parse_init (&it, fh);
			
 
				+
			
 
				+    int ttype = lex (&it, term);
			
 
				+
			
 
				+    fclose (fh);
			
 
				+
			
 
				+    switch (ttype) {
			
 
				+        case T_IRIREF:
			
 
				+        case T_LITERAL:
			
 
				+        case T_BNODE:
			
 
				+            return LSUP_OK;
			
 
				+        default:
			
 
				+            return LSUP_VALUE_ERR;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+LSUP_rc
			
 
				+LSUP_nt_parse_doc (FILE *fh, LSUP_Graph **gr_p, size_t *ct, char **err_p)
			
 
				+{
			
 
				+    *err_p = NULL;
			
 
				+    *gr_p = NULL;
			
 
				+
			
 
				+    ParseIterator parse_it;
			
 
				+    parse_init (&parse_it, fh);
			
 
				+
			
 
				+    void *parser = ParseAlloc (malloc);
			
 
				+
			
 
				+    LSUP_rc rc;
			
 
				+
			
 
				+    LSUP_Graph *gr = LSUP_graph_new (
			
 
				+            LSUP_iriref_new (NULL, NULL), LSUP_STORE_HTABLE, NULL, NULL, 0);
			
 
				+    if (UNLIKELY (!gr)) return LSUP_MEM_ERR;
			
 
				+
			
 
				+    LSUP_GraphIterator *it = LSUP_graph_add_init (gr);
			
 
				+    if (UNLIKELY (!it)) {
			
 
				+        LSUP_graph_free (gr);
			
 
				+        return LSUP_MEM_ERR;
			
 
				+    }
			
 
				+
			
 
				+    LSUP_Term *term = NULL;
			
 
				+
			
 
				+    for (;;) {
			
 
				+        int ttype = lex (&parse_it, &term);
			
 
				+
			
 
				+        if (ttype == -1) {
			
 
				+            char token[16] = {'\0'};
			
 
				+            strncpy (token, (const char *)parse_it.tok, 15);
			
 
				+
			
 
				+            char *err_start = "Parse error near token `";
			
 
				+
			
 
				+            char err_info [64];
			
 
				+            sprintf(
			
 
				+                    err_info, "[...]' at line %u, character %ld.\n",
			
 
				+                    parse_it.line, parse_it.cur - parse_it.bol);
			
 
				+
			
 
				+            size_t err_size = strlen (err_start) + 16 + strlen(err_info);
			
 
				+            char *err_str = malloc (err_size);
			
 
				+            sprintf (err_str, "%s%s%s", err_start, token, err_info);
			
 
				+
			
 
				+            rc = LSUP_VALUE_ERR;
			
 
				+            *err_p = err_str;
			
 
				+
			
 
				+            goto finally;
			
 
				+        }
			
 
				+
			
 
				+        Parse (parser, ttype, term, it);
			
 
				+
			
 
				+        if (ttype == T_EOF) break;
			
 
				+    };
			
 
				+
			
 
				+    if (ct) *ct = parse_it.ct;
			
 
				+
			
 
				+    log_info ("Parsed %u triples.", parse_it.ct);
			
 
				+    log_debug ("Graph size: %lu", LSUP_graph_size (gr));
			
 
				+
			
 
				+    rc = parse_it.ct > 0 ? LSUP_OK : LSUP_NORESULT;
			
 
				+    *gr_p = gr;
			
 
				+
			
 
				+finally:
			
 
				+    Parse (parser, 0, NULL, it);
			
 
				+    ParseFree (parser, free);
			
 
				+
			
 
				+    LSUP_graph_add_done (it);
			
 
				+    LSUP_term_free (term);
			
 
				+
			
 
				+    if (rc < 0) LSUP_graph_free (gr);
			
 
				+
			
 
				+    return rc;
			
 
				+}
			
 
				+
			
--- a/src/codec_ttl.c
+++ b/src/codec_ttl.c
@@ -0,0 +1,257 @@
 
				+#include "codec_nt.h"
			
 
				+#include "nt_parser.h"
			
 
				+
			
 
				+/** @brief List of characters to be escaped in serialized literals.
			
 
				+ *
			
 
				+ * @sa https://www.w3.org/TR/n-triples/#grammar-production-ECHAR
			
 
				+ */
			
 
				+#define LIT_ECHAR "\t\b\n\r\f\"\'\\"
			
 
				+
			
 
				+/** @brief Regex of characters to be escaped in serialized IRIs.
			
 
				+ *
			
 
				+ * @sa https://www.w3.org/TR/n-triples/#grammar-production-IRIREF
			
 
				+ */
			
 
				+#define IRI_ECHAR_PTN "[\x00-\x20<>\"\\{\\}\\|\\^`\\\\]"
			
 
				+
			
 
				+
			
 
				+/* * * Static prototypes. * * */
			
 
				+
			
 
				+static LSUP_rc escape_lit (const char *in, char **out_p);
			
 
				+
			
 
				+
			
 
				+/* * * Codec functions. * * */
			
 
				+
			
 
				+static LSUP_rc
			
 
				+term_to_nt (const LSUP_Term *term, const LSUP_NSMap *nsm, char **out_p)
			
 
				+{
			
 
				+    LSUP_rc rc;
			
 
				+    char *out = NULL, *tmp, *escaped;
			
 
				+    const char *metadata = NULL;
			
 
				+    size_t buf_len;
			
 
				+
			
 
				+    // Free previous content if not NULL.
			
 
				+    if (*out_p != NULL) out = realloc (*out_p, 0);
			
 
				+
			
 
				+    switch (term->type) {
			
 
				+        case LSUP_TERM_IRIREF:
			
 
				+            tmp = realloc (out, strlen (term->data) + 3);
			
 
				+            if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
			
 
				+            out = tmp;
			
 
				+
			
 
				+            sprintf (out, "<%s>", term->data);
			
 
				+            rc = LSUP_OK;
			
 
				+            break;
			
 
				+
			
 
				+        case LSUP_TERM_LITERAL:
			
 
				+            // Calculate string length.
			
 
				+            if (escape_lit (term->data, &escaped) != LSUP_OK)
			
 
				+                return LSUP_ERROR;
			
 
				+            buf_len = strlen (escaped) + 3; // Room for "" and terminator
			
 
				+
			
 
				+            if (
			
 
				+                term->datatype != 0
			
 
				+                && term->datatype != LSUP_default_datatype
			
 
				+            ) {
			
 
				+                metadata = term->datatype->data;
			
 
				+                buf_len += strlen (metadata) + 4; // Room for ^^<>
			
 
				+            }
			
 
				+
			
 
				+            tmp = realloc (out, buf_len);
			
 
				+            if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
			
 
				+            out = tmp;
			
 
				+
			
 
				+            sprintf (out, "\"%s\"", escaped);
			
 
				+            free (escaped);
			
 
				+
			
 
				+            // Add datatype.
			
 
				+            if (metadata)
			
 
				+                out = strcat (strcat (strcat (out, "^^<"), metadata), ">");
			
 
				+
			
 
				+            rc = LSUP_OK;
			
 
				+
			
 
				+            break;
			
 
				+
			
 
				+        case LSUP_TERM_LT_LITERAL:
			
 
				+            // Calculate string length.
			
 
				+            if (escape_lit (term->data, &escaped) != LSUP_OK)
			
 
				+                return LSUP_ERROR;
			
 
				+            buf_len = strlen (escaped) + 3; // Room for "" and terminator
			
 
				+
			
 
				+            if (term->lang != 0) {
			
 
				+                metadata = term->lang;
			
 
				+                buf_len += strlen (metadata) + 1; // Room for @
			
 
				+            }
			
 
				+
			
 
				+            tmp = realloc (out, buf_len);
			
 
				+            if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
			
 
				+            out = tmp;
			
 
				+
			
 
				+            sprintf (out, "\"%s\"", escaped);
			
 
				+            free (escaped);
			
 
				+
			
 
				+            // Add lang.
			
 
				+            if (metadata) out = strcat (strcat (out, "@"), metadata);
			
 
				+
			
 
				+            rc = LSUP_OK;
			
 
				+
			
 
				+            break;
			
 
				+
			
 
				+        case LSUP_TERM_BNODE:
			
 
				+            tmp = realloc (out, strlen (term->data) + 3);
			
 
				+            if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
			
 
				+            out = tmp;
			
 
				+
			
 
				+            sprintf (out, "_:%s", term->data);
			
 
				+            rc = LSUP_OK;
			
 
				+
			
 
				+            break;
			
 
				+
			
 
				+        default:
			
 
				+            out = NULL;
			
 
				+            rc = LSUP_VALUE_ERR;
			
 
				+    }
			
 
				+
			
 
				+    *out_p = out;
			
 
				+    return rc;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static LSUP_CodecIterator *
			
 
				+gr_to_nt_init (const LSUP_Graph *gr);
			
 
				+
			
 
				+
			
 
				+static LSUP_rc
			
 
				+gr_to_nt_iter (LSUP_CodecIterator *it, unsigned char **res) {
			
 
				+    LSUP_rc rc = LSUP_graph_iter_next (it->gr_it, it->trp);
			
 
				+    if (rc != LSUP_OK) goto finally;
			
 
				+
			
 
				+    term_to_nt (it->trp->s, it->nsm, &it->str_s);
			
 
				+    term_to_nt (it->trp->p, it->nsm, &it->str_p);
			
 
				+    term_to_nt (it->trp->o, it->nsm, &it->str_o);
			
 
				+
			
 
				+    // 3 term separators + dot + newline + terminal = 6
			
 
				+    unsigned char *tmp = realloc (
			
 
				+            *res, strlen (it->str_s) + strlen (it->str_p)
			
 
				+            + strlen (it->str_o) + 6);
			
 
				+    if (UNLIKELY (!tmp)) {
			
 
				+        *res = NULL;
			
 
				+        rc = LSUP_MEM_ERR;
			
 
				+        goto finally;
			
 
				+    }
			
 
				+
			
 
				+    sprintf ((char*)tmp, "%s %s %s .\n", it->str_s, it->str_p, it->str_o);
			
 
				+    *res = tmp;
			
 
				+
			
 
				+    it->cur++;
			
 
				+
			
 
				+finally:
			
 
				+    LSUP_term_free (it->trp->s); it->trp->s = NULL;
			
 
				+    LSUP_term_free (it->trp->p); it->trp->p = NULL;
			
 
				+    LSUP_term_free (it->trp->o); it->trp->o = NULL;
			
 
				+
			
 
				+    return rc;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static void
			
 
				+gr_to_nt_done (LSUP_CodecIterator *it)
			
 
				+{
			
 
				+    LSUP_graph_iter_free (it->gr_it);
			
 
				+    LSUP_triple_free (it->trp);
			
 
				+    free (it->rep);
			
 
				+    free (it->str_s);
			
 
				+    free (it->str_p);
			
 
				+    free (it->str_o);
			
 
				+    free (it);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+const LSUP_Codec nt_codec = {
			
 
				+    .name               = "N-Triples",
			
 
				+    .mimetype           = "application/n-triples",
			
 
				+    .extension          = "nt",
			
 
				+
			
 
				+    .encode_term        = term_to_nt,
			
 
				+
			
 
				+    .encode_graph_init  = gr_to_nt_init,
			
 
				+    .encode_graph_iter  = gr_to_nt_iter,
			
 
				+    .encode_graph_done  = gr_to_nt_done,
			
 
				+
			
 
				+    .decode_term        = LSUP_nt_parse_term,
			
 
				+    .decode_graph       = LSUP_nt_parse_doc,
			
 
				+};
			
 
				+
			
 
				+
			
 
				+/* * * Other internal functions. * * */
			
 
				+
			
 
				+/** Replace non-printable characters with their literal byte.
			
 
				+ *
			
 
				+ *  Escape backslash is to be added separately.
			
 
				+ */
			
 
				+static inline char replace_char(const char c) {
			
 
				+    switch (c) {
			
 
				+        case '\t': return 't';
			
 
				+        case '\b': return 'b';
			
 
				+        case '\n': return 'n';
			
 
				+        case '\r': return 'r';
			
 
				+        case '\f': return 'f';
			
 
				+        default: return c;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static LSUP_CodecIterator *
			
 
				+gr_to_nt_init (const LSUP_Graph *gr)
			
 
				+{
			
 
				+    LSUP_CodecIterator *it;
			
 
				+    MALLOC_GUARD (it, NULL);
			
 
				+
			
 
				+    it->codec = &nt_codec;
			
 
				+    it->gr_it = LSUP_graph_lookup(gr, NULL, NULL, NULL, &it->cur);
			
 
				+    it->nsm = LSUP_graph_namespace (gr);
			
 
				+    it->cur = 0;
			
 
				+    it->trp = TRP_DUMMY;
			
 
				+    it->rep = NULL;
			
 
				+    it->str_s = NULL;
			
 
				+    it->str_p = NULL;
			
 
				+    it->str_o = NULL;
			
 
				+
			
 
				+    return it;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/** @brief Add escape character (backslash) to illegal literal characters.
			
 
				+ */
			
 
				+static LSUP_rc
			
 
				+escape_lit (const char *in, char **out_p)
			
 
				+{
			
 
				+    size_t out_size = strlen (in) + 1;
			
 
				+
			
 
				+    // Expand output string size to accommodate escape characters.
			
 
				+    for (
			
 
				+            size_t i = strcspn (in, LIT_ECHAR);
			
 
				+            i < strlen (in);
			
 
				+            i += strcspn (in + i + 1, LIT_ECHAR) + 1) {
			
 
				+        out_size ++;
			
 
				+    }
			
 
				+
			
 
				+    char *out = calloc (1, out_size);
			
 
				+    if (UNLIKELY (!out)) return LSUP_MEM_ERR;
			
 
				+
			
 
				+    size_t boundary;
			
 
				+    boundary = strcspn (in, LIT_ECHAR);
			
 
				+    for (size_t i = 0, j = 0;;) {
			
 
				+        out = strncat (out, in + i, boundary);
			
 
				+
			
 
				+        i += boundary;
			
 
				+        j += boundary;
			
 
				+        if (i >= strlen (in)) break;
			
 
				+
			
 
				+        out[j++] = '\\';
			
 
				+        out[j++] = replace_char (in[i++]);
			
 
				+        boundary = strcspn (in + i, LIT_ECHAR);
			
 
				+    }
			
 
				+
			
 
				+    *out_p = out;
			
 
				+    return 0;
			
 
				+}