123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257 |
- #include "codec_ttl.h"
- #include "parser_ttl.h"
- /** @brief List of characters to be escaped in serialized literals.
- *
- * @sa https://www.w3.org/TR/n-triples/#grammar-production-ECHAR
- */
- #define LIT_ECHAR "\t\b\n\r\f\"\'\\"
- /** @brief Regex of characters to be escaped in serialized IRIs.
- *
- * @sa https://www.w3.org/TR/n-triples/#grammar-production-IRIREF
- */
- #define IRI_ECHAR_PTN "[\x00-\x20<>\"\\{\\}\\|\\^`\\\\]"
- /* * * Static prototypes. * * */
- static LSUP_rc escape_lit (const char *in, char **out_p);
- /* * * Codec functions. * * */
- static LSUP_rc
- term_to_ttl (const LSUP_Term *term, const LSUP_NSMap *nsm, char **out_p)
- {
- LSUP_rc rc;
- char *out = NULL, *tmp, *escaped;
- const char *metadata = NULL;
- size_t buf_len;
- // Free previous content if not NULL.
- if (*out_p != NULL) out = realloc (*out_p, 0);
- switch (term->type) {
- case LSUP_TERM_IRIREF:
- tmp = realloc (out, strlen (term->data) + 3);
- if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
- out = tmp;
- sprintf (out, "<%s>", term->data);
- rc = LSUP_OK;
- break;
- case LSUP_TERM_LITERAL:
- // Calculate string length.
- if (escape_lit (term->data, &escaped) != LSUP_OK)
- return LSUP_ERROR;
- buf_len = strlen (escaped) + 3; // Room for "" and terminator
- if (
- term->datatype != 0
- && term->datatype != LSUP_default_datatype
- ) {
- metadata = term->datatype->data;
- buf_len += strlen (metadata) + 4; // Room for ^^<>
- }
- tmp = realloc (out, buf_len);
- if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
- out = tmp;
- sprintf (out, "\"%s\"", escaped);
- free (escaped);
- // Add datatype.
- if (metadata)
- out = strcat (strcat (strcat (out, "^^<"), metadata), ">");
- rc = LSUP_OK;
- break;
- case LSUP_TERM_LT_LITERAL:
- // Calculate string length.
- if (escape_lit (term->data, &escaped) != LSUP_OK)
- return LSUP_ERROR;
- buf_len = strlen (escaped) + 3; // Room for "" and terminator
- if (term->lang != 0) {
- metadata = term->lang;
- buf_len += strlen (metadata) + 1; // Room for @
- }
- tmp = realloc (out, buf_len);
- if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
- out = tmp;
- sprintf (out, "\"%s\"", escaped);
- free (escaped);
- // Add lang.
- if (metadata) out = strcat (strcat (out, "@"), metadata);
- rc = LSUP_OK;
- break;
- case LSUP_TERM_BNODE:
- tmp = realloc (out, strlen (term->data) + 3);
- if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
- out = tmp;
- sprintf (out, "_:%s", term->data);
- rc = LSUP_OK;
- break;
- default:
- out = NULL;
- rc = LSUP_VALUE_ERR;
- }
- *out_p = out;
- return rc;
- }
- static LSUP_CodecIterator *
- gr_to_ttl_init (const LSUP_Graph *gr);
- static LSUP_rc
- gr_to_ttl_iter (LSUP_CodecIterator *it, unsigned char **res) {
- LSUP_rc rc = LSUP_graph_iter_next (it->gr_it, it->trp);
- if (rc != LSUP_OK) goto finally;
- term_to_ttl (it->trp->s, it->nsm, &it->str_s);
- term_to_ttl (it->trp->p, it->nsm, &it->str_p);
- term_to_ttl (it->trp->o, it->nsm, &it->str_o);
- // 3 term separators + dot + newline + terminal = 6
- unsigned char *tmp = realloc (
- *res, strlen (it->str_s) + strlen (it->str_p)
- + strlen (it->str_o) + 6);
- if (UNLIKELY (!tmp)) {
- *res = NULL;
- rc = LSUP_MEM_ERR;
- goto finally;
- }
- sprintf ((char*)tmp, "%s %s %s .\n", it->str_s, it->str_p, it->str_o);
- *res = tmp;
- it->cur++;
- finally:
- LSUP_term_free (it->trp->s); it->trp->s = NULL;
- LSUP_term_free (it->trp->p); it->trp->p = NULL;
- LSUP_term_free (it->trp->o); it->trp->o = NULL;
- return rc;
- }
- static void
- gr_to_ttl_done (LSUP_CodecIterator *it)
- {
- LSUP_graph_iter_free (it->gr_it);
- LSUP_triple_free (it->trp);
- free (it->rep);
- free (it->str_s);
- free (it->str_p);
- free (it->str_o);
- free (it);
- }
- const LSUP_Codec ttl_codec = {
- .name = "Turtle",
- .mimetype = "text/turtle",
- .extension = "ttl",
- .encode_term = term_to_ttl,
- .encode_graph_init = gr_to_ttl_init,
- .encode_graph_iter = gr_to_ttl_iter,
- .encode_graph_done = gr_to_ttl_done,
- .decode_term = LSUP_ttl_parse_term,
- .decode_graph = LSUP_ttl_parse_doc,
- };
- /* * * Other internal functions. * * */
- /** Replace non-printable characters with their literal byte.
- *
- * Escape backslash is to be added separately.
- */
- static inline char replace_char(const char c) {
- switch (c) {
- case '\t': return 't';
- case '\b': return 'b';
- case '\n': return 'n';
- case '\r': return 'r';
- case '\f': return 'f';
- default: return c;
- }
- }
- static LSUP_CodecIterator *
- gr_to_ttl_init (const LSUP_Graph *gr)
- {
- LSUP_CodecIterator *it;
- MALLOC_GUARD (it, NULL);
- it->codec = &ttl_codec;
- it->gr_it = LSUP_graph_lookup(gr, NULL, NULL, NULL, &it->cur);
- it->nsm = LSUP_graph_namespace (gr);
- it->cur = 0;
- it->trp = TRP_DUMMY;
- it->rep = NULL;
- it->str_s = NULL;
- it->str_p = NULL;
- it->str_o = NULL;
- return it;
- }
- /** @brief Add escape character (backslash) to illegal literal characters.
- */
- static LSUP_rc
- escape_lit (const char *in, char **out_p)
- {
- size_t out_size = strlen (in) + 1;
- // Expand output string size to accommodate escape characters.
- for (
- size_t i = strcspn (in, LIT_ECHAR);
- i < strlen (in);
- i += strcspn (in + i + 1, LIT_ECHAR) + 1) {
- out_size ++;
- }
- char *out = calloc (1, out_size);
- if (UNLIKELY (!out)) return LSUP_MEM_ERR;
- size_t boundary;
- boundary = strcspn (in, LIT_ECHAR);
- for (size_t i = 0, j = 0;;) {
- out = strncat (out, in + i, boundary);
- i += boundary;
- j += boundary;
- if (i >= strlen (in)) break;
- out[j++] = '\\';
- out[j++] = replace_char (in[i++]);
- boundary = strcspn (in + i, LIT_ECHAR);
- }
- *out_p = out;
- return 0;
- }
|