3 years ago · 0cfac367a5
--- a/TODO.md
+++ b/TODO.md
@@ -24,7 +24,7 @@
 
															 - *D* Relative IRIs
														
 
															 - *D* Flexible store interface
														
 
															 - *D* Transaction control
														
 
															-- *P* Turtle serialization / deserialization
														
 
															+- *W* Turtle serialization / deserialization
														
 
															 - *P* Full UTF-8 support
														
 
															 - *P* Extended tests
														
 
															     - *P* C API
														
--- a/include/codec_ttl.h
+++ b/include/codec_ttl.h
@@ -0,0 +1,10 @@
 
															+#ifndef _LSUP_CODEC_NT_H
														
 
															+#define _LSUP_CODEC_NT_H
														
 
															+
														
 
															+#include "codec_base.h"
														
 
															+
														
 
															+/** @brief N-Triples codec.
														
 
															+ */
														
 
															+extern const LSUP_Codec nt_codec;
														
 
															+
														
 
															+#endif
														
--- a/src/codec/ttl_grammar.y
+++ b/src/codec/ttl_grammar.y
@@ -0,0 +1,58 @@
 
															+%include {
														
 
															+
														
 
															+/** @brief Lemon parser grammar for N-Triples.
														
 
															+ *
														
 
															+ * The `lemon' parser generator executable must be in your PATH:
														
 
															+ * https://sqlite.org/src/doc/trunk/doc/lemon.html
														
 
															+ *
														
 
															+ * To generate the parser, run: `lemon ${FILE}'
														
 
															+ */
														
 
															+
														
 
															+#include "graph.h"
														
 
															+}
														
 
															+
														
 
															+
														
 
															+%token_type { LSUP_Term * }
														
 
															+%token_prefix "T_"
														
 
															+
														
 
															+%type triple            { LSUP_Triple * }
														
 
															+%destructor triple      { LSUP_triple_free ($$); }
														
 
															+%type subject           { LSUP_Term * }
														
 
															+%destructor subject     { LSUP_term_free ($$); }
														
 
															+%type predicate         { LSUP_Term * }
														
 
															+%destructor predicate   { LSUP_term_free ($$); }
														
 
															+%type object            { LSUP_Term * }
														
 
															+%destructor object      { LSUP_term_free ($$); }
														
 
															+%default_type           { void * }
														
 
															+
														
 
															+%extra_argument         { LSUP_GraphIterator *it }
														
 
															+
														
 
															+
														
 
															+// Rules.
														
 
															+
														
 
															+ntriplesDoc ::= triples EOF.
														
 
															+
														
 
															+triples     ::= eol.
														
 
															+triples     ::= triple eol.
														
 
															+triples     ::= triples triple eol.
														
 
															+
														
 
															+triple(A)   ::= ws subject(S) ws predicate(P) ws object(O) ws DOT. {
														
 
															+
														
 
															+                A = LSUP_triple_new (S, P, O);
														
 
															+                LSUP_graph_add_iter (it, A);
														
 
															+            }
														
 
															+
														
 
															+subject     ::= IRIREF.
														
 
															+subject     ::= BNODE.
														
 
															+
														
 
															+predicate   ::= IRIREF.
														
 
															+
														
 
															+object      ::= IRIREF.
														
 
															+object      ::= BNODE.
														
 
															+object      ::= LITERAL.
														
 
															+
														
 
															+eol         ::= EOL.
														
 
															+eol         ::= eol EOL.
														
 
															+
														
 
															+ws          ::=.
														
 
															+ws          ::= WS.
														
--- a/src/codec/ttl_lexer.re
+++ b/src/codec/ttl_lexer.re
@@ -0,0 +1,400 @@
 
															+#include "nt_grammar.h"
														
 
															+#include "nt_parser.h"
														
 
															+
														
 
															+
														
 
															+#define YYCTYPE     unsigned char
														
 
															+#define YYCURSOR    it->cur
														
 
															+#define YYMARKER    it->mar
														
 
															+#define YYLIMIT     it->lim
														
 
															+#define YYFILL      fill(it) == 0
														
 
															+
														
 
															+/**
														
 
															+ * Max chunk size passed to scanner at each iteration.
														
 
															+ */
														
 
															+#ifdef LSUP_RDF_STREAM_CHUNK_SIZE
														
 
															+#define CHUNK_SIZE LSUP_RDF_STREAM_CHUNK_SIZE
														
 
															+#else
														
 
															+#define CHUNK_SIZE 8192
														
 
															+#endif
														
 
															+
														
 
															+
														
 
															+typedef struct {
														
 
															+    FILE *          fh;                 // Input file handle.
														
 
															+    YYCTYPE         buf[CHUNK_SIZE + 1],// Start of buffer.
														
 
															+            *       lim,                // Position after the last available
														
 
															+                                        //   input character (YYLIMIT).
														
 
															+            *       cur,                // Next input character to be read
														
 
															+                                        //   (YYCURSOR)
														
 
															+            *       mar,                // Most recent match (YYMARKER)
														
 
															+            *       tok,                // Start of current token.
														
 
															+            *       bol;                // Address of the beginning of the
														
 
															+                                        //   current line (for debugging).
														
 
															+    unsigned        line;               // Current line no. (for debugging).
														
 
															+    unsigned        ct;                 // Number of parsed triples.
														
 
															+    bool            eof;                // if we have reached EOF.
														
 
															+    /*!stags:re2c format = "YYCTYPE *@@;"; */
														
 
															+} ParseIterator;
														
 
															+
														
 
															+
														
 
															+// TODO The opposite of this is in codec_nt.c. Find a better place for both.
														
 
															+static inline char unescape_char(const char c) {
														
 
															+    switch (c) {
														
 
															+        case 't': return '\t';
														
 
															+        case 'b': return '\b';
														
 
															+        case 'n': return '\n';
														
 
															+        case 'r': return '\r';
														
 
															+        case 'f': return '\f';
														
 
															+        default: return c;
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+
														
 
															+static int fill(ParseIterator *it)
														
 
															+{
														
 
															+    if (it->eof) {
														
 
															+        return 1;
														
 
															+    }
														
 
															+    const size_t shift = it->tok - it->buf;
														
 
															+    if (shift < 1) {
														
 
															+        return 2;
														
 
															+    }
														
 
															+    log_debug ("Shifting bytes: %lu", shift);
														
 
															+    memmove(it->buf, it->tok, it->lim - it->tok);
														
 
															+    it->lim -= shift;
														
 
															+    it->cur -= shift;
														
 
															+    it->mar -= shift;
														
 
															+    it->tok -= shift;
														
 
															+    it->lim += fread(it->lim, 1, shift, it->fh);
														
 
															+    /*!stags:re2c format = "if (it->@@) it->@@ -= shift; "; */
														
 
															+    it->lim[0] = 0;
														
 
															+    it->eof |= it->lim < it->buf + CHUNK_SIZE;
														
 
															+    return 0;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+static void parse_init(ParseIterator *it, FILE *fh)
														
 
															+{
														
 
															+    it->fh = fh;
														
 
															+    it->cur = it->mar = it->tok = it->lim = it->buf + CHUNK_SIZE;
														
 
															+    it->line = 1;
														
 
															+    it->bol = it->buf;
														
 
															+    it->ct = 0;
														
 
															+    it->eof = 0;
														
 
															+    /*!stags:re2c format = "it->@@ = NULL; "; */
														
 
															+    fill (it);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+/** @brief Replace \uxxxx and \Uxxxxxxxx with Unicode bytes.
														
 
															+ */
														
 
															+static YYCTYPE *unescape_unicode (const YYCTYPE *esc_str, size_t size)
														
 
															+{
														
 
															+    YYCTYPE *uc_str = malloc (size + 1);
														
 
															+
														
 
															+    size_t j = 0;
														
 
															+    YYCTYPE tmp_chr[5];
														
 
															+    for (size_t i = 0; i < size;) {
														
 
															+        if (esc_str[i] == '\\') {
														
 
															+            i++; // Skip over '\\'
														
 
															+
														
 
															+            // 4-hex sequence.
														
 
															+            if (esc_str[i] == 'u') {
														
 
															+                i ++; // Skip over 'u'
														
 
															+
														
 
															+                // Use tmp_chr to hold the hex string for the code point.
														
 
															+                memcpy(tmp_chr, esc_str + i, sizeof (tmp_chr) - 1);
														
 
															+                tmp_chr[4] = '\0';
														
 
															+
														
 
															+                uint32_t tmp_val = strtol ((char*)tmp_chr, NULL, 16);
														
 
															+                log_debug ("tmp_val: %d", tmp_val);
														
 
															+
														
 
															+                // Reuse tmp_chr to hold the byte values for the code point.
														
 
															+                int nbytes = utf8_encode (tmp_val, tmp_chr);
														
 
															+
														
 
															+                // Copy bytes into destination.
														
 
															+                memcpy (uc_str + j, tmp_chr, nbytes);
														
 
															+                log_debug ("UC byte value: %x %x", uc_str[j], uc_str[j + 1]);
														
 
															+
														
 
															+                j += nbytes;
														
 
															+                i += 4;
														
 
															+
														
 
															+            // 8-hex sequence.
														
 
															+            } else if (esc_str[i] == 'U') {
														
 
															+                i ++; // Skip over 'U'
														
 
															+                log_error ("UTF-16 sequence unescaping not yet implemented.");
														
 
															+                return NULL; // TODO encode UTF-16
														
 
															+
														
 
															+            // Unescape other escaped characters.
														
 
															+            } else uc_str[j++] = unescape_char(esc_str[i++]);
														
 
															+        } else {
														
 
															+            // Copy ASCII char verbatim.
														
 
															+            uc_str[j++] = esc_str[i++];
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    YYCTYPE *tmp = realloc (uc_str, j + 1);
														
 
															+    if (UNLIKELY (!tmp)) return NULL;
														
 
															+    uc_str = tmp;
														
 
															+    uc_str[j] = '\0';
														
 
															+
														
 
															+    return uc_str;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+// Parser interface.
														
 
															+
														
 
															+void *ParseAlloc();
														
 
															+void Parse();
														
 
															+void ParseFree();
														
 
															+
														
 
															+
														
 
															+// Lexer.
														
 
															+
														
 
															+static int lex (ParseIterator *it, LSUP_Term **term)
														
 
															+{
														
 
															+    const YYCTYPE *lit_data_e, *dtype_s, *lang_s;
														
 
															+
														
 
															+loop:
														
 
															+
														
 
															+    it->tok = it->cur;
														
 
															+
														
 
															+    *term = NULL;
														
 
															+
														
 
															+    /*!re2c
														
 
															+    re2c:eof = 0;
														
 
															+    re2c:flags:8 = 1;
														
 
															+    re2c:flags:tags = 1;
														
 
															+    re2c:tags:expression = "it->@@";
														
 
															+    re2c:api:style = functions;
														
 
															+    re2c:define:YYFILL:naked = 1;
														
 
															+
														
 
															+
														
 
															+    // For unresolved and partially resolved inconsistencies of the spec, see
														
 
															+    // https://lists.w3.org/Archives/Public/public-rdf-comments/2017Jun/0000.html
														
 
															+    _WS                 = [\x09\x20];
														
 
															+    WS                  = _WS+;
														
 
															+    EOL                 = [\x0D\x0A] (_WS | [\x0D\x0A])*;
														
 
															+    DOT                 = [.];
														
 
															+    HEX                 = [0-9A-Fa-f];
														
 
															+    ECHAR               = [\\] [tbnrf"'\\];
														
 
															+    UCHAR               = "\\u" HEX{4} | "\\U" HEX{8};
														
 
															+    PN_CHARS_BASE       = [A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF];
														
 
															+    PN_CHARS_U          = PN_CHARS_BASE | '_' | ':';
														
 
															+    PN_CHARS            = PN_CHARS_U | '-' | [0-9\u00B7\u0300-\u036F\u203F-\u2040];
														
 
															+    IRI_CHARS           = ([^\x00-\x20<>"{}|^`\\] | UCHAR)*;
														
 
															+    LITERAL_QUOTE       = ["] ([^\x22\x5C\x0A\x0D] | ECHAR|UCHAR)* ["];
														
 
															+    LANGTAG             = [@] [a-zA-Z]+ ("-" [a-zA-Z0-9]+)*;
														
 
															+
														
 
															+    IRIREF              = [<] IRI_CHARS [>];
														
 
															+    LITERAL             = LITERAL_QUOTE @lit_data_e _WS* ("^^" _WS* @dtype_s IRIREF | @lang_s LANGTAG)?;
														
 
															+    BNODE               = "_:" ((PN_CHARS_U | [0-9]) ((PN_CHARS | ".")* PN_CHARS)?);
														
 
															+    COMMENT             = "#" .*;
														
 
															+
														
 
															+
														
 
															+    EOL {
														
 
															+        it->line ++;
														
 
															+        it->bol = YYCURSOR;
														
 
															+        log_debug ("New line: #%u.", it->line);
														
 
															+        return T_EOL;
														
 
															+    }
														
 
															+
														
 
															+    $ {
														
 
															+        log_debug ("End of buffer.");
														
 
															+        return T_EOF;
														
 
															+    }
														
 
															+
														
 
															+    IRIREF {
														
 
															+        YYCTYPE *data = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
														
 
															+
														
 
															+        log_debug ("URI data: %s", data);
														
 
															+
														
 
															+        *term = LSUP_iriref_new ((char*)data, NULL);
														
 
															+        free (data);
														
 
															+
														
 
															+        return T_IRIREF;
														
 
															+    }
														
 
															+
														
 
															+    LITERAL {
														
 
															+        // Only unescape Unicode from data.
														
 
															+        size_t size = lit_data_e - it->tok - 2;
														
 
															+        YYCTYPE *data = unescape_unicode (it->tok + 1, size);
														
 
															+        log_trace ("Literal data: %s", data);
														
 
															+
														
 
															+        char *metadata = NULL;
														
 
															+        const YYCTYPE *md_marker;
														
 
															+        LSUP_TermType type = LSUP_TERM_LITERAL;
														
 
															+
														
 
															+        if (dtype_s) {
														
 
															+            md_marker = dtype_s;
														
 
															+            size = YYCURSOR - md_marker - 1;
														
 
															+        } else if (lang_s) {
														
 
															+            type = LSUP_TERM_LT_LITERAL;
														
 
															+            md_marker = lang_s;
														
 
															+            size = YYCURSOR - md_marker;
														
 
															+        } else md_marker = NULL;
														
 
															+
														
 
															+        if (md_marker) {
														
 
															+            metadata = malloc (size);
														
 
															+            memcpy (metadata, md_marker + 1, size);
														
 
															+            metadata [size - 1] = '\0';
														
 
															+            log_trace ("metadata: %s", metadata);
														
 
															+        }
														
 
															+
														
 
															+        if (type == LSUP_TERM_LITERAL) {
														
 
															+            LSUP_Term *dtype;
														
 
															+            dtype = (
														
 
															+                metadata ? LSUP_iriref_new ((char *) metadata, NULL) : NULL);
														
 
															+
														
 
															+            *term = LSUP_literal_new ((char *) data, dtype);
														
 
															+
														
 
															+        } else *term = LSUP_lt_literal_new ((char *) data, (char *) metadata);
														
 
															+
														
 
															+        free (data);
														
 
															+        free (metadata);
														
 
															+
														
 
															+        return T_LITERAL;
														
 
															+    }
														
 
															+
														
 
															+    BNODE {
														
 
															+        YYCTYPE *data = unescape_unicode (it->tok + 2, YYCURSOR - it->tok - 2);
														
 
															+
														
 
															+        log_debug ("BNode data: %s", data);
														
 
															+
														
 
															+        *term = LSUP_term_new (LSUP_TERM_BNODE, (char*)data, NULL);
														
 
															+        free (data);
														
 
															+
														
 
															+        return T_BNODE;
														
 
															+    }
														
 
															+
														
 
															+    DOT {
														
 
															+        log_debug ("End of triple.");
														
 
															+        it->ct ++;
														
 
															+
														
 
															+        return T_DOT;
														
 
															+    }
														
 
															+
														
 
															+    WS {
														
 
															+        log_debug ("Separator.");
														
 
															+
														
 
															+        return T_WS;
														
 
															+    }
														
 
															+
														
 
															+    COMMENT {
														
 
															+        size_t size = YYCURSOR - it->tok + 1;
														
 
															+        YYCTYPE *data = malloc (size);
														
 
															+        memcpy (data, it->tok, size);
														
 
															+        data [size - 1] = '\0';
														
 
															+        log_debug ("Comment: `%s`", data);
														
 
															+        free (data);
														
 
															+
														
 
															+        goto loop;
														
 
															+    }
														
 
															+
														
 
															+    * {
														
 
															+        log_debug (
														
 
															+            "Invalid token @ %lu: %s (\\x%x)",
														
 
															+            YYCURSOR - it->buf - 1, it->tok, *it->tok);
														
 
															+
														
 
															+        return -1;
														
 
															+    }
														
 
															+
														
 
															+    */
														
 
															+}
														
 
															+
														
 
															+
														
 
															+LSUP_rc
														
 
															+LSUP_nt_parse_term (const char *rep, const LSUP_NSMap *map, LSUP_Term **term)
														
 
															+{
														
 
															+    FILE *fh = fmemopen ((void *)rep, strlen (rep), "r");
														
 
															+
														
 
															+    ParseIterator it;
														
 
															+    parse_init (&it, fh);
														
 
															+
														
 
															+    int ttype = lex (&it, term);
														
 
															+
														
 
															+    fclose (fh);
														
 
															+
														
 
															+    switch (ttype) {
														
 
															+        case T_IRIREF:
														
 
															+        case T_LITERAL:
														
 
															+        case T_BNODE:
														
 
															+            return LSUP_OK;
														
 
															+        default:
														
 
															+            return LSUP_VALUE_ERR;
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+LSUP_rc
														
 
															+LSUP_nt_parse_doc (FILE *fh, LSUP_Graph **gr_p, size_t *ct, char **err_p)
														
 
															+{
														
 
															+    *err_p = NULL;
														
 
															+    *gr_p = NULL;
														
 
															+
														
 
															+    ParseIterator parse_it;
														
 
															+    parse_init (&parse_it, fh);
														
 
															+
														
 
															+    void *parser = ParseAlloc (malloc);
														
 
															+
														
 
															+    LSUP_rc rc;
														
 
															+
														
 
															+    LSUP_Graph *gr = LSUP_graph_new (
														
 
															+            LSUP_iriref_new (NULL, NULL), LSUP_STORE_HTABLE, NULL, NULL, 0);
														
 
															+    if (UNLIKELY (!gr)) return LSUP_MEM_ERR;
														
 
															+
														
 
															+    LSUP_GraphIterator *it = LSUP_graph_add_init (gr);
														
 
															+    if (UNLIKELY (!it)) {
														
 
															+        LSUP_graph_free (gr);
														
 
															+        return LSUP_MEM_ERR;
														
 
															+    }
														
 
															+
														
 
															+    LSUP_Term *term = NULL;
														
 
															+
														
 
															+    for (;;) {
														
 
															+        int ttype = lex (&parse_it, &term);
														
 
															+
														
 
															+        if (ttype == -1) {
														
 
															+            char token[16] = {'\0'};
														
 
															+            strncpy (token, (const char *)parse_it.tok, 15);
														
 
															+
														
 
															+            char *err_start = "Parse error near token `";
														
 
															+
														
 
															+            char err_info [64];
														
 
															+            sprintf(
														
 
															+                    err_info, "[...]' at line %u, character %ld.\n",
														
 
															+                    parse_it.line, parse_it.cur - parse_it.bol);
														
 
															+
														
 
															+            size_t err_size = strlen (err_start) + 16 + strlen(err_info);
														
 
															+            char *err_str = malloc (err_size);
														
 
															+            sprintf (err_str, "%s%s%s", err_start, token, err_info);
														
 
															+
														
 
															+            rc = LSUP_VALUE_ERR;
														
 
															+            *err_p = err_str;
														
 
															+
														
 
															+            goto finally;
														
 
															+        }
														
 
															+
														
 
															+        Parse (parser, ttype, term, it);
														
 
															+
														
 
															+        if (ttype == T_EOF) break;
														
 
															+    };
														
 
															+
														
 
															+    if (ct) *ct = parse_it.ct;
														
 
															+
														
 
															+    log_info ("Parsed %u triples.", parse_it.ct);
														
 
															+    log_debug ("Graph size: %lu", LSUP_graph_size (gr));
														
 
															+
														
 
															+    rc = parse_it.ct > 0 ? LSUP_OK : LSUP_NORESULT;
														
 
															+    *gr_p = gr;
														
 
															+
														
 
															+finally:
														
 
															+    Parse (parser, 0, NULL, it);
														
 
															+    ParseFree (parser, free);
														
 
															+
														
 
															+    LSUP_graph_add_done (it);
														
 
															+    LSUP_term_free (term);
														
 
															+
														
 
															+    if (rc < 0) LSUP_graph_free (gr);
														
 
															+
														
 
															+    return rc;
														
 
															+}
														
 
															+
														
--- a/src/codec_ttl.c
+++ b/src/codec_ttl.c
@@ -0,0 +1,257 @@
 
															+#include "codec_nt.h"
														
 
															+#include "nt_parser.h"
														
 
															+
														
 
															+/** @brief List of characters to be escaped in serialized literals.
														
 
															+ *
														
 
															+ * @sa https://www.w3.org/TR/n-triples/#grammar-production-ECHAR
														
 
															+ */
														
 
															+#define LIT_ECHAR "\t\b\n\r\f\"\'\\"
														
 
															+
														
 
															+/** @brief Regex of characters to be escaped in serialized IRIs.
														
 
															+ *
														
 
															+ * @sa https://www.w3.org/TR/n-triples/#grammar-production-IRIREF
														
 
															+ */
														
 
															+#define IRI_ECHAR_PTN "[\x00-\x20<>\"\\{\\}\\|\\^`\\\\]"
														
 
															+
														
 
															+
														
 
															+/* * * Static prototypes. * * */
														
 
															+
														
 
															+static LSUP_rc escape_lit (const char *in, char **out_p);
														
 
															+
														
 
															+
														
 
															+/* * * Codec functions. * * */
														
 
															+
														
 
															+static LSUP_rc
														
 
															+term_to_nt (const LSUP_Term *term, const LSUP_NSMap *nsm, char **out_p)
														
 
															+{
														
 
															+    LSUP_rc rc;
														
 
															+    char *out = NULL, *tmp, *escaped;
														
 
															+    const char *metadata = NULL;
														
 
															+    size_t buf_len;
														
 
															+
														
 
															+    // Free previous content if not NULL.
														
 
															+    if (*out_p != NULL) out = realloc (*out_p, 0);
														
 
															+
														
 
															+    switch (term->type) {
														
 
															+        case LSUP_TERM_IRIREF:
														
 
															+            tmp = realloc (out, strlen (term->data) + 3);
														
 
															+            if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
														
 
															+            out = tmp;
														
 
															+
														
 
															+            sprintf (out, "<%s>", term->data);
														
 
															+            rc = LSUP_OK;
														
 
															+            break;
														
 
															+
														
 
															+        case LSUP_TERM_LITERAL:
														
 
															+            // Calculate string length.
														
 
															+            if (escape_lit (term->data, &escaped) != LSUP_OK)
														
 
															+                return LSUP_ERROR;
														
 
															+            buf_len = strlen (escaped) + 3; // Room for "" and terminator
														
 
															+
														
 
															+            if (
														
 
															+                term->datatype != 0
														
 
															+                && term->datatype != LSUP_default_datatype
														
 
															+            ) {
														
 
															+                metadata = term->datatype->data;
														
 
															+                buf_len += strlen (metadata) + 4; // Room for ^^<>
														
 
															+            }
														
 
															+
														
 
															+            tmp = realloc (out, buf_len);
														
 
															+            if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
														
 
															+            out = tmp;
														
 
															+
														
 
															+            sprintf (out, "\"%s\"", escaped);
														
 
															+            free (escaped);
														
 
															+
														
 
															+            // Add datatype.
														
 
															+            if (metadata)
														
 
															+                out = strcat (strcat (strcat (out, "^^<"), metadata), ">");
														
 
															+
														
 
															+            rc = LSUP_OK;
														
 
															+
														
 
															+            break;
														
 
															+
														
 
															+        case LSUP_TERM_LT_LITERAL:
														
 
															+            // Calculate string length.
														
 
															+            if (escape_lit (term->data, &escaped) != LSUP_OK)
														
 
															+                return LSUP_ERROR;
														
 
															+            buf_len = strlen (escaped) + 3; // Room for "" and terminator
														
 
															+
														
 
															+            if (term->lang != 0) {
														
 
															+                metadata = term->lang;
														
 
															+                buf_len += strlen (metadata) + 1; // Room for @
														
 
															+            }
														
 
															+
														
 
															+            tmp = realloc (out, buf_len);
														
 
															+            if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
														
 
															+            out = tmp;
														
 
															+
														
 
															+            sprintf (out, "\"%s\"", escaped);
														
 
															+            free (escaped);
														
 
															+
														
 
															+            // Add lang.
														
 
															+            if (metadata) out = strcat (strcat (out, "@"), metadata);
														
 
															+
														
 
															+            rc = LSUP_OK;
														
 
															+
														
 
															+            break;
														
 
															+
														
 
															+        case LSUP_TERM_BNODE:
														
 
															+            tmp = realloc (out, strlen (term->data) + 3);
														
 
															+            if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
														
 
															+            out = tmp;
														
 
															+
														
 
															+            sprintf (out, "_:%s", term->data);
														
 
															+            rc = LSUP_OK;
														
 
															+
														
 
															+            break;
														
 
															+
														
 
															+        default:
														
 
															+            out = NULL;
														
 
															+            rc = LSUP_VALUE_ERR;
														
 
															+    }
														
 
															+
														
 
															+    *out_p = out;
														
 
															+    return rc;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+static LSUP_CodecIterator *
														
 
															+gr_to_nt_init (const LSUP_Graph *gr);
														
 
															+
														
 
															+
														
 
															+static LSUP_rc
														
 
															+gr_to_nt_iter (LSUP_CodecIterator *it, unsigned char **res) {
														
 
															+    LSUP_rc rc = LSUP_graph_iter_next (it->gr_it, it->trp);
														
 
															+    if (rc != LSUP_OK) goto finally;
														
 
															+
														
 
															+    term_to_nt (it->trp->s, it->nsm, &it->str_s);
														
 
															+    term_to_nt (it->trp->p, it->nsm, &it->str_p);
														
 
															+    term_to_nt (it->trp->o, it->nsm, &it->str_o);
														
 
															+
														
 
															+    // 3 term separators + dot + newline + terminal = 6
														
 
															+    unsigned char *tmp = realloc (
														
 
															+            *res, strlen (it->str_s) + strlen (it->str_p)
														
 
															+            + strlen (it->str_o) + 6);
														
 
															+    if (UNLIKELY (!tmp)) {
														
 
															+        *res = NULL;
														
 
															+        rc = LSUP_MEM_ERR;
														
 
															+        goto finally;
														
 
															+    }
														
 
															+
														
 
															+    sprintf ((char*)tmp, "%s %s %s .\n", it->str_s, it->str_p, it->str_o);
														
 
															+    *res = tmp;
														
 
															+
														
 
															+    it->cur++;
														
 
															+
														
 
															+finally:
														
 
															+    LSUP_term_free (it->trp->s); it->trp->s = NULL;
														
 
															+    LSUP_term_free (it->trp->p); it->trp->p = NULL;
														
 
															+    LSUP_term_free (it->trp->o); it->trp->o = NULL;
														
 
															+
														
 
															+    return rc;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+static void
														
 
															+gr_to_nt_done (LSUP_CodecIterator *it)
														
 
															+{
														
 
															+    LSUP_graph_iter_free (it->gr_it);
														
 
															+    LSUP_triple_free (it->trp);
														
 
															+    free (it->rep);
														
 
															+    free (it->str_s);
														
 
															+    free (it->str_p);
														
 
															+    free (it->str_o);
														
 
															+    free (it);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+const LSUP_Codec nt_codec = {
														
 
															+    .name               = "N-Triples",
														
 
															+    .mimetype           = "application/n-triples",
														
 
															+    .extension          = "nt",
														
 
															+
														
 
															+    .encode_term        = term_to_nt,
														
 
															+
														
 
															+    .encode_graph_init  = gr_to_nt_init,
														
 
															+    .encode_graph_iter  = gr_to_nt_iter,
														
 
															+    .encode_graph_done  = gr_to_nt_done,
														
 
															+
														
 
															+    .decode_term        = LSUP_nt_parse_term,
														
 
															+    .decode_graph       = LSUP_nt_parse_doc,
														
 
															+};
														
 
															+
														
 
															+
														
 
															+/* * * Other internal functions. * * */
														
 
															+
														
 
															+/** Replace non-printable characters with their literal byte.
														
 
															+ *
														
 
															+ *  Escape backslash is to be added separately.
														
 
															+ */
														
 
															+static inline char replace_char(const char c) {
														
 
															+    switch (c) {
														
 
															+        case '\t': return 't';
														
 
															+        case '\b': return 'b';
														
 
															+        case '\n': return 'n';
														
 
															+        case '\r': return 'r';
														
 
															+        case '\f': return 'f';
														
 
															+        default: return c;
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+
														
 
															+static LSUP_CodecIterator *
														
 
															+gr_to_nt_init (const LSUP_Graph *gr)
														
 
															+{
														
 
															+    LSUP_CodecIterator *it;
														
 
															+    MALLOC_GUARD (it, NULL);
														
 
															+
														
 
															+    it->codec = &nt_codec;
														
 
															+    it->gr_it = LSUP_graph_lookup(gr, NULL, NULL, NULL, &it->cur);
														
 
															+    it->nsm = LSUP_graph_namespace (gr);
														
 
															+    it->cur = 0;
														
 
															+    it->trp = TRP_DUMMY;
														
 
															+    it->rep = NULL;
														
 
															+    it->str_s = NULL;
														
 
															+    it->str_p = NULL;
														
 
															+    it->str_o = NULL;
														
 
															+
														
 
															+    return it;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+/** @brief Add escape character (backslash) to illegal literal characters.
														
 
															+ */
														
 
															+static LSUP_rc
														
 
															+escape_lit (const char *in, char **out_p)
														
 
															+{
														
 
															+    size_t out_size = strlen (in) + 1;
														
 
															+
														
 
															+    // Expand output string size to accommodate escape characters.
														
 
															+    for (
														
 
															+            size_t i = strcspn (in, LIT_ECHAR);
														
 
															+            i < strlen (in);
														
 
															+            i += strcspn (in + i + 1, LIT_ECHAR) + 1) {
														
 
															+        out_size ++;
														
 
															+    }
														
 
															+
														
 
															+    char *out = calloc (1, out_size);
														
 
															+    if (UNLIKELY (!out)) return LSUP_MEM_ERR;
														
 
															+
														
 
															+    size_t boundary;
														
 
															+    boundary = strcspn (in, LIT_ECHAR);
														
 
															+    for (size_t i = 0, j = 0;;) {
														
 
															+        out = strncat (out, in + i, boundary);
														
 
															+
														
 
															+        i += boundary;
														
 
															+        j += boundary;
														
 
															+        if (i >= strlen (in)) break;
														
 
															+
														
 
															+        out[j++] = '\\';
														
 
															+        out[j++] = replace_char (in[i++]);
														
 
															+        boundary = strcspn (in + i, LIT_ECHAR);
														
 
															+    }
														
 
															+
														
 
															+    *out_p = out;
														
 
															+    return 0;
														
 
															+}