#include "codec_nt.h" /** @brief List of characters to be escaped in serialized literals. * * https://www.w3.org/TR/n-triples/#grammar-production-ECHAR */ #define LIT_ECHAR "\t\b\n\r\f\"\'\\" /** @brief Regex of characters to be escaped in serialized IRIs. * * https://www.w3.org/TR/n-triples/#grammar-production-IRIREF */ #define IRI_ECHAR_PTN "[\x00-\x20<>\"\\{\\}\\|\\^`\\\\]" /** @brief Default NT literal type. */ #define XSD_STRING "http://www.w3.org/2001/XMLSchema#string" /* * * Static prototypes. * * */ static LSUP_rc escape_lit (const char *in, char **out_p); /* * * Codec functions. * * */ static LSUP_rc term_to_nt (const LSUP_Term *term, const LSUP_NSMap *nsm, char **out_p) { LSUP_rc rc; char *out = NULL, *tmp, *escaped; size_t buf_len; // Free previous content if not NULL. if (*out_p != NULL) out = realloc (*out_p, 0); switch (term->type) { case LSUP_TERM_URI: tmp = realloc (out, strlen (term->data) + 3); if (UNLIKELY (!tmp)) return LSUP_MEM_ERR; out = tmp; sprintf (out, "<%s>", term->data); rc = LSUP_OK; break; case LSUP_TERM_LITERAL: // Calculate string length. if (escape_lit (term->data, &escaped) != LSUP_OK) return LSUP_ERROR; buf_len = strlen (escaped) + 3; // Room for "" and terminator if (term->datatype && strcmp (term->datatype, XSD_STRING) != 0) buf_len += strlen (term->datatype) + 2; // Room for ^^ if (strlen (term->lang) > 0) buf_len += strlen(term->lang) + 1; // Room for @ TRACE ("nt rep length: %lu\n", buf_len); tmp = realloc (out, buf_len); if (UNLIKELY (!tmp)) return LSUP_MEM_ERR; out = tmp; sprintf (out, "\"%s\"", escaped); free (escaped); // Always suppress xsd:string data type. if (term->datatype && strcmp (term->datatype, XSD_STRING) != 0) out = strcat (strcat (out, "^^"), term->datatype); if (strlen (term->lang) > 0) out = strcat (strcat (out, "@"), term->lang); rc = LSUP_OK; break; case LSUP_TERM_BNODE: tmp = realloc (out, strlen (term->data) + 3); if (UNLIKELY (!tmp)) return LSUP_MEM_ERR; out = tmp; sprintf (out, "_:%s", term->data); rc = LSUP_OK; break; default: out = NULL; rc = LSUP_VALUE_ERR; } *out_p = out; return rc; } static LSUP_rc nt_to_term (const char *rep, const LSUP_NSMap *nsm, LSUP_Term **term) { // TODO return LSUP_NOT_IMPL_ERR; } static LSUP_CodecIterator * gr_to_nt_init (const LSUP_Graph *gr) { LSUP_CodecIterator *it; MALLOC_GUARD (it, NULL); LSUP_Triple lut = {NULL, NULL, NULL}; it->gr_it = LSUP_graph_lookup(gr, &lut, &it->cur); it->nsm = LSUP_graph_namespace (gr); it->cur = 0; it->trp = LSUP_triple_new (TERM_DUMMY, TERM_DUMMY, TERM_DUMMY); it->rep = NULL; it->str_s = NULL; it->str_p = NULL; it->str_o = NULL; return it; } static LSUP_rc gr_to_nt_iter (LSUP_CodecIterator *it) { LSUP_rc rc = LSUP_graph_iter_next (it->gr_it, it->trp); if (rc != LSUP_OK) return rc; term_to_nt (it->trp->s, it->nsm, &it->str_s); term_to_nt (it->trp->p, it->nsm, &it->str_p); term_to_nt (it->trp->o, it->nsm, &it->str_o); char *tmp = realloc ( it->rep, strlen (it->str_s) + strlen (it->str_p) + strlen (it->str_o) + 6); if (UNLIKELY (!tmp)) return LSUP_MEM_ERR; it->rep = tmp; sprintf (it->rep, "%s %s %s .\n", it->str_s, it->str_p, it->str_o); it->cur++; return LSUP_OK; } static void gr_to_nt_done (LSUP_CodecIterator *it) { LSUP_graph_iter_free (it->gr_it); LSUP_triple_free (it->trp); free (it->rep); free (it->str_s); free (it->str_p); free (it->str_o); free (it); } static LSUP_CodecIterator * nt_to_gr_init (const LSUP_Graph *gr) { // TODO return NULL; } static LSUP_rc nt_to_gr_iter (LSUP_CodecIterator *it) { // TODO return LSUP_NOT_IMPL_ERR; } static void nt_to_gr_done (LSUP_CodecIterator *it) { free (it); } const LSUP_Codec nt_codec = { .name = "N-Triples", .mimetype = "application/n-triples", .extension = "nt", .term_encoder = term_to_nt, .term_decoder = nt_to_term, .gr_encode_init = gr_to_nt_init, .gr_encode_iter = gr_to_nt_iter, .gr_encode_done = gr_to_nt_done, .gr_decode_init = nt_to_gr_init, .gr_decode_iter = nt_to_gr_iter, .gr_decode_done = nt_to_gr_done, }; /* * * Other internal functions. * * */ /** Replace non-printable characters with their literal byte. * * Escape backslash is to be added separately. */ static inline char replace_char(const char c) { switch (c) { case '\t': return 't'; case '\b': return 'b'; case '\n': return 'n'; case '\r': return 'r'; case '\f': return 'f'; default: return c; } } /** @brief Add escape character (backslash) to illegal literal characters. */ static LSUP_rc escape_lit (const char *in, char **out_p) { size_t out_size = strlen (in) + 1; // Expand output string size to accommodate escape characters. //size_t i = strcspn (in, LIT_ECHAR); for ( size_t i = strcspn (in, LIT_ECHAR); i < strlen (in); i += strcspn (in + i + 1, LIT_ECHAR) + 1) { out_size ++; } char *out = calloc (1, out_size); size_t boundary; boundary = strcspn (in, LIT_ECHAR); for (size_t i = 0, j = 0;;) { out = strncat (out, in + i, boundary); i += boundary; j += boundary; if (i >= strlen (in)) break; out[j++] = '\\'; out[j++] = replace_char (in[i++]); boundary = strcspn (in + i, LIT_ECHAR); } *out_p = out; return 0; }