codec_ttl.c 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. #include "codec_ttl.h"
  2. #include "parser_ttl.h"
  3. /** @brief List of characters to be escaped in serialized literals.
  4. *
  5. * @sa https://www.w3.org/TR/n-triples/#grammar-production-ECHAR
  6. */
  7. #define LIT_ECHAR "\t\b\n\r\f\"\'\\"
  8. /** @brief Regex of characters to be escaped in serialized IRIs.
  9. *
  10. * @sa https://www.w3.org/TR/n-triples/#grammar-production-IRIREF
  11. */
  12. #define IRI_ECHAR_PTN "[\x00-\x20<>\"\\{\\}\\|\\^`\\\\]"
  13. /* * * Static prototypes. * * */
  14. static LSUP_rc escape_lit (const char *in, char **out_p);
  15. /* * * Codec functions. * * */
  16. static LSUP_rc
  17. term_to_ttl (const LSUP_Term *term, const LSUP_NSMap *nsm, char **out_p)
  18. {
  19. LSUP_rc rc;
  20. char *out = NULL, *tmp, *escaped;
  21. const char *metadata = NULL;
  22. size_t buf_len;
  23. // Free previous content if not NULL.
  24. if (*out_p != NULL) out = realloc (*out_p, 0);
  25. switch (term->type) {
  26. case LSUP_TERM_IRIREF:
  27. tmp = realloc (out, strlen (term->data) + 3);
  28. if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
  29. out = tmp;
  30. sprintf (out, "<%s>", term->data);
  31. rc = LSUP_OK;
  32. break;
  33. case LSUP_TERM_LITERAL:
  34. // Calculate string length.
  35. if (escape_lit (term->data, &escaped) != LSUP_OK)
  36. return LSUP_ERROR;
  37. buf_len = strlen (escaped) + 3; // Room for "" and terminator
  38. if (
  39. term->datatype != 0
  40. && term->datatype != LSUP_default_datatype
  41. ) {
  42. metadata = term->datatype->data;
  43. buf_len += strlen (metadata) + 4; // Room for ^^<>
  44. }
  45. tmp = realloc (out, buf_len);
  46. if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
  47. out = tmp;
  48. sprintf (out, "\"%s\"", escaped);
  49. free (escaped);
  50. // Add datatype.
  51. if (metadata)
  52. out = strcat (strcat (strcat (out, "^^<"), metadata), ">");
  53. rc = LSUP_OK;
  54. break;
  55. case LSUP_TERM_LT_LITERAL:
  56. // Calculate string length.
  57. if (escape_lit (term->data, &escaped) != LSUP_OK)
  58. return LSUP_ERROR;
  59. buf_len = strlen (escaped) + 3; // Room for "" and terminator
  60. if (term->lang != 0) {
  61. metadata = term->lang;
  62. buf_len += strlen (metadata) + 1; // Room for @
  63. }
  64. tmp = realloc (out, buf_len);
  65. if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
  66. out = tmp;
  67. sprintf (out, "\"%s\"", escaped);
  68. free (escaped);
  69. // Add lang.
  70. if (metadata) out = strcat (strcat (out, "@"), metadata);
  71. rc = LSUP_OK;
  72. break;
  73. case LSUP_TERM_BNODE:
  74. tmp = realloc (out, strlen (term->data) + 3);
  75. if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
  76. out = tmp;
  77. sprintf (out, "_:%s", term->data);
  78. rc = LSUP_OK;
  79. break;
  80. default:
  81. out = NULL;
  82. rc = LSUP_VALUE_ERR;
  83. }
  84. *out_p = out;
  85. return rc;
  86. }
  87. static LSUP_CodecIterator *
  88. gr_to_ttl_init (const LSUP_Graph *gr);
  89. static LSUP_rc
  90. gr_to_ttl_iter (LSUP_CodecIterator *it, unsigned char **res) {
  91. LSUP_rc rc = LSUP_graph_iter_next (it->gr_it, it->trp);
  92. if (rc != LSUP_OK) goto finally;
  93. term_to_ttl (it->trp->s, it->nsm, &it->str_s);
  94. term_to_ttl (it->trp->p, it->nsm, &it->str_p);
  95. term_to_ttl (it->trp->o, it->nsm, &it->str_o);
  96. // 3 term separators + dot + newline + terminal = 6
  97. unsigned char *tmp = realloc (
  98. *res, strlen (it->str_s) + strlen (it->str_p)
  99. + strlen (it->str_o) + 6);
  100. if (UNLIKELY (!tmp)) {
  101. *res = NULL;
  102. rc = LSUP_MEM_ERR;
  103. goto finally;
  104. }
  105. sprintf ((char*)tmp, "%s %s %s .\n", it->str_s, it->str_p, it->str_o);
  106. *res = tmp;
  107. it->cur++;
  108. finally:
  109. LSUP_term_free (it->trp->s); it->trp->s = NULL;
  110. LSUP_term_free (it->trp->p); it->trp->p = NULL;
  111. LSUP_term_free (it->trp->o); it->trp->o = NULL;
  112. return rc;
  113. }
  114. static void
  115. gr_to_ttl_done (LSUP_CodecIterator *it)
  116. {
  117. LSUP_graph_iter_free (it->gr_it);
  118. LSUP_triple_free (it->trp);
  119. free (it->rep);
  120. free (it->str_s);
  121. free (it->str_p);
  122. free (it->str_o);
  123. free (it);
  124. }
  125. const LSUP_Codec ttl_codec = {
  126. .name = "Turtle",
  127. .mimetype = "text/turtle",
  128. .extension = "ttl",
  129. .encode_term = term_to_ttl,
  130. .encode_graph_init = gr_to_ttl_init,
  131. .encode_graph_iter = gr_to_ttl_iter,
  132. .encode_graph_done = gr_to_ttl_done,
  133. .decode_term = LSUP_ttl_parse_term,
  134. .decode_graph = LSUP_ttl_parse_doc,
  135. };
  136. /* * * Other internal functions. * * */
  137. /** Replace non-printable characters with their literal byte.
  138. *
  139. * Escape backslash is to be added separately.
  140. */
  141. static inline char replace_char(const char c) {
  142. switch (c) {
  143. case '\t': return 't';
  144. case '\b': return 'b';
  145. case '\n': return 'n';
  146. case '\r': return 'r';
  147. case '\f': return 'f';
  148. default: return c;
  149. }
  150. }
  151. static LSUP_CodecIterator *
  152. gr_to_ttl_init (const LSUP_Graph *gr)
  153. {
  154. LSUP_CodecIterator *it;
  155. MALLOC_GUARD (it, NULL);
  156. it->codec = &ttl_codec;
  157. it->gr_it = LSUP_graph_lookup(gr, NULL, NULL, NULL, &it->cur);
  158. it->nsm = LSUP_graph_namespace (gr);
  159. it->cur = 0;
  160. it->trp = TRP_DUMMY;
  161. it->rep = NULL;
  162. it->str_s = NULL;
  163. it->str_p = NULL;
  164. it->str_o = NULL;
  165. return it;
  166. }
  167. /** @brief Add escape character (backslash) to illegal literal characters.
  168. */
  169. static LSUP_rc
  170. escape_lit (const char *in, char **out_p)
  171. {
  172. size_t out_size = strlen (in) + 1;
  173. // Expand output string size to accommodate escape characters.
  174. for (
  175. size_t i = strcspn (in, LIT_ECHAR);
  176. i < strlen (in);
  177. i += strcspn (in + i + 1, LIT_ECHAR) + 1) {
  178. out_size ++;
  179. }
  180. char *out = calloc (1, out_size);
  181. if (UNLIKELY (!out)) return LSUP_MEM_ERR;
  182. size_t boundary;
  183. boundary = strcspn (in, LIT_ECHAR);
  184. for (size_t i = 0, j = 0;;) {
  185. out = strncat (out, in + i, boundary);
  186. i += boundary;
  187. j += boundary;
  188. if (i >= strlen (in)) break;
  189. out[j++] = '\\';
  190. out[j++] = replace_char (in[i++]);
  191. boundary = strcspn (in + i, LIT_ECHAR);
  192. }
  193. *out_p = out;
  194. return 0;
  195. }