|
@@ -0,0 +1,177 @@
|
|
|
|
+#include "codec_nt.h"
|
|
|
|
+
|
|
|
|
+/** @brief List of characters to be escaped in serialized literals.
|
|
|
|
+ *
|
|
|
|
+ * https://www.w3.org/TR/n-triples/#grammar-production-ECHAR
|
|
|
|
+ */
|
|
|
|
+#define LIT_ECHAR "\t\b\n\r\f\"\'\\"
|
|
|
|
+
|
|
|
|
+/** @brief Regex of characters to be escaped in serialized IRIs.
|
|
|
|
+ *
|
|
|
|
+ * https://www.w3.org/TR/n-triples/#grammar-production-IRIREF
|
|
|
|
+ */
|
|
|
|
+#define IRI_ECHAR_PTN "[\x00-\x20<>\"\\{\\}\\|\\^`\\\\]"
|
|
|
|
+
|
|
|
|
+/** @brief Default NT literal type.
|
|
|
|
+ */
|
|
|
|
+#define XSD_STRING "http://www.w3.org/2001/XMLSchema#string"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+/* * * Static prototypes. * * */
|
|
|
|
+
|
|
|
|
+static LSUP_rc escape_lit (const char *in, char **out_p);
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+/* * * Codec functions. * * */
|
|
|
|
+
|
|
|
|
+static LSUP_rc
|
|
|
|
+term_to_nt (const LSUP_Term *term, const LSUP_NSMap *nsm, char **out_p)
|
|
|
|
+{
|
|
|
|
+ LSUP_rc rc;
|
|
|
|
+ char *out = NULL;
|
|
|
|
+ size_t buf_len;
|
|
|
|
+
|
|
|
|
+ switch (term->type) {
|
|
|
|
+ case LSUP_TERM_URI:
|
|
|
|
+ out = malloc (strlen (term->data) + 3);
|
|
|
|
+ if (UNLIKELY (!out)) return LSUP_MEM_ERR;
|
|
|
|
+
|
|
|
|
+ sprintf (out, "<%s>", term->data);
|
|
|
|
+ rc = LSUP_OK;
|
|
|
|
+ break;
|
|
|
|
+
|
|
|
|
+ case LSUP_TERM_LITERAL:
|
|
|
|
+ buf_len = strlen (term->data) + 3; // Room for ""
|
|
|
|
+
|
|
|
|
+ if (term->datatype && strcmp (term->datatype, XSD_STRING) != 0)
|
|
|
|
+ buf_len += strlen (term->datatype) + 2; // Room for ^^
|
|
|
|
+
|
|
|
|
+ if (strlen (term->lang) > 0) buf_len += strlen(term->lang) + 1; //@
|
|
|
|
+
|
|
|
|
+ out = malloc (buf_len);
|
|
|
|
+ if (UNLIKELY (!out)) return LSUP_MEM_ERR;
|
|
|
|
+
|
|
|
|
+ char *escaped;
|
|
|
|
+ if (escape_lit (term->data, &escaped) != LSUP_OK)
|
|
|
|
+ return LSUP_ERROR;
|
|
|
|
+ sprintf (out, "\"%s\"", escaped);
|
|
|
|
+ free (escaped);
|
|
|
|
+
|
|
|
|
+ // Always suppress xsd:string data type.
|
|
|
|
+ if (term->datatype && strcmp (term->datatype, XSD_STRING) != 0)
|
|
|
|
+ out = strcat (strcat (out, "^^"), term->datatype);
|
|
|
|
+
|
|
|
|
+ if (strlen (term->lang) > 0)
|
|
|
|
+ out = strcat (strcat (out, "@"), term->lang);
|
|
|
|
+
|
|
|
|
+ rc = LSUP_OK;
|
|
|
|
+
|
|
|
|
+ break;
|
|
|
|
+
|
|
|
|
+ case LSUP_TERM_BNODE:
|
|
|
|
+ out = malloc (strlen (term->data) + 2);
|
|
|
|
+ if (UNLIKELY (!out)) return LSUP_MEM_ERR;
|
|
|
|
+
|
|
|
|
+ sprintf (out, "_:%s", term->data);
|
|
|
|
+ rc = LSUP_OK;
|
|
|
|
+
|
|
|
|
+ break;
|
|
|
|
+
|
|
|
|
+ default:
|
|
|
|
+ out = NULL;
|
|
|
|
+ rc = LSUP_VALUE_ERR;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ *out_p = out;
|
|
|
|
+ return rc;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+static LSUP_rc
|
|
|
|
+nt_to_term (const char *rep, const LSUP_NSMap *nsm, LSUP_Term **term)
|
|
|
|
+{
|
|
|
|
+ // TODO
|
|
|
|
+ return LSUP_NOT_IMPL_ERR;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+static LSUP_rc
|
|
|
|
+gr_to_nt (const LSUP_Graph *gr, char **rep)
|
|
|
|
+{
|
|
|
|
+ // TODO
|
|
|
|
+ return LSUP_NOT_IMPL_ERR;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+static LSUP_rc
|
|
|
|
+nt_to_gr (const char *rep, LSUP_Graph **gr)
|
|
|
|
+{
|
|
|
|
+ // TODO
|
|
|
|
+ return LSUP_NOT_IMPL_ERR;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+const LSUP_Codec nt_codec = {
|
|
|
|
+ .name = "N-Triples",
|
|
|
|
+ .mimetype = "application/n-triples",
|
|
|
|
+ .extension = "nt",
|
|
|
|
+ .term_encoder = term_to_nt,
|
|
|
|
+ .term_decoder = nt_to_term,
|
|
|
|
+ .gr_encoder = gr_to_nt,
|
|
|
|
+ .gr_decoder = nt_to_gr,
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+/* * * Other internal functions. * * */
|
|
|
|
+
|
|
|
|
+/** Replace non-printable characters with their literal byte.
|
|
|
|
+ *
|
|
|
|
+ * Escape backslash is to be added separately.
|
|
|
|
+ */
|
|
|
|
+static inline char replace_char(const char c) {
|
|
|
|
+ switch (c) {
|
|
|
|
+ case '\t': return 't';
|
|
|
|
+ case '\b': return 'b';
|
|
|
|
+ case '\n': return 'n';
|
|
|
|
+ case '\r': return 'r';
|
|
|
|
+ case '\f': return 'f';
|
|
|
|
+ default: return c;
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+/** @brief Add escape character (backslash) to illegal literal characters.
|
|
|
|
+ */
|
|
|
|
+static LSUP_rc
|
|
|
|
+escape_lit (const char *in, char **out_p)
|
|
|
|
+{
|
|
|
|
+ size_t out_size = strlen (in) + 1;
|
|
|
|
+
|
|
|
|
+ // Expand output string size to accommodate escape characters.
|
|
|
|
+ //size_t i = strcspn (in, LIT_ECHAR);
|
|
|
|
+ for (
|
|
|
|
+ size_t i = strcspn (in, LIT_ECHAR);
|
|
|
|
+ i < strlen (in);
|
|
|
|
+ i += strcspn (in + i + 1, LIT_ECHAR) + 1) {
|
|
|
|
+ out_size ++;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ char *out = calloc (1, out_size);
|
|
|
|
+
|
|
|
|
+ size_t boundary;
|
|
|
|
+ boundary = strcspn (in, LIT_ECHAR);
|
|
|
|
+ for (size_t i = 0, j = 0;;) {
|
|
|
|
+ out = strncat (out, in + i, boundary);
|
|
|
|
+
|
|
|
|
+ i += boundary;
|
|
|
|
+ j += boundary;
|
|
|
|
+ if (i >= strlen (in)) break;
|
|
|
|
+
|
|
|
|
+ out[j++] = '\\';
|
|
|
|
+ out[j++] = replace_char (in[i++]);
|
|
|
|
+ boundary = strcspn (in + i, LIT_ECHAR);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ *out_p = out;
|
|
|
|
+ return 0;
|
|
|
|
+}
|