codec_nt.c 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. #include "codec/codec_nt.h"
  2. /** @brief List of characters to be escaped in serialized literals.
  3. *
  4. * @sa https://www.w3.org/TR/n-triples/#grammar-production-ECHAR
  5. */
  6. #define LIT_ECHAR "\t\b\n\r\f\"\'\\"
  7. /** @brief Regex of characters to be escaped in serialized IRIs.
  8. *
  9. * @sa https://www.w3.org/TR/n-triples/#grammar-production-IRIREF
  10. */
  11. #define IRI_ECHAR_PTN "[\x00-\x20<>\"\\{\\}\\|\\^`\\\\]"
  12. /* * * Static prototypes. * * */
  13. static LSUP_rc escape_lit (const char *in, char **out_p);
  14. /* * * Codec functions. * * */
  15. static LSUP_rc
  16. term_to_nt (const LSUP_Term *term, const LSUP_NSMap *nsm, char **out_p)
  17. {
  18. LSUP_rc rc;
  19. char *out = NULL, *escaped;
  20. const char *metadata = NULL;
  21. size_t buf_len;
  22. // Free previous content if not NULL.
  23. if (*out_p != NULL) out = realloc (*out_p, 0);
  24. char *data = term->data;
  25. switch (term->type) {
  26. case LSUP_TERM_NS_IRIREF:
  27. LSUP_nsmap_normalize_uri (nsm, term->data, &data);
  28. case LSUP_TERM_IRIREF:
  29. out = realloc (out, strlen (data) + 3);
  30. if (UNLIKELY (!out)) return LSUP_MEM_ERR;
  31. sprintf (out, "<%s>", data);
  32. if (data != term->data) free (data);
  33. rc = LSUP_OK;
  34. break;
  35. case LSUP_TERM_LITERAL:
  36. // Calculate string length.
  37. if (escape_lit (term->data, &escaped) != LSUP_OK)
  38. return LSUP_ERROR;
  39. buf_len = strlen (escaped) + 3; // Room for "" and terminator
  40. if (
  41. term->datatype != 0
  42. && term->datatype != LSUP_default_datatype
  43. ) {
  44. metadata = term->datatype->data;
  45. buf_len += strlen (metadata) + 4; // Room for ^^<>
  46. }
  47. out = realloc (out, buf_len);
  48. if (UNLIKELY (!out)) return LSUP_MEM_ERR;
  49. sprintf (out, "\"%s\"", escaped);
  50. free (escaped);
  51. // Add datatype.
  52. if (metadata)
  53. out = strcat (strcat (strcat (out, "^^<"), metadata), ">");
  54. rc = LSUP_OK;
  55. break;
  56. case LSUP_TERM_LT_LITERAL:
  57. // Calculate string length.
  58. if (escape_lit (term->data, &escaped) != LSUP_OK)
  59. return LSUP_ERROR;
  60. buf_len = strlen (escaped) + 3; // Room for "" and terminator
  61. if (term->lang != 0) {
  62. metadata = term->lang;
  63. buf_len += strlen (metadata) + 1; // Room for @
  64. }
  65. out = realloc (out, buf_len);
  66. if (UNLIKELY (!out)) return LSUP_MEM_ERR;
  67. sprintf (out, "\"%s\"", escaped);
  68. free (escaped);
  69. // Add lang.
  70. if (metadata) out = strcat (strcat (out, "@"), metadata);
  71. rc = LSUP_OK;
  72. break;
  73. case LSUP_TERM_BNODE:
  74. out = realloc (out, strlen (term->data) + 3);
  75. if (UNLIKELY (!out)) return LSUP_MEM_ERR;
  76. sprintf (out, "_:%s", term->data);
  77. rc = LSUP_OK;
  78. break;
  79. default:
  80. out = NULL;
  81. rc = LSUP_VALUE_ERR;
  82. }
  83. *out_p = out;
  84. return rc;
  85. }
  86. static LSUP_CodecIterator *
  87. gr_to_nt_init (const LSUP_Graph *gr);
  88. static LSUP_rc
  89. gr_to_nt_iter (LSUP_CodecIterator *it, unsigned char **res) {
  90. LSUP_rc rc = LSUP_graph_iter_next (it->gr_it, it->trp);
  91. if (rc != LSUP_OK) goto finally;
  92. term_to_nt (it->trp->s, it->nsm, &it->str_s);
  93. term_to_nt (it->trp->p, it->nsm, &it->str_p);
  94. term_to_nt (it->trp->o, it->nsm, &it->str_o);
  95. // 3 term separators + dot + newline + terminal = 6
  96. unsigned char *tmp = realloc (
  97. *res, strlen (it->str_s) + strlen (it->str_p)
  98. + strlen (it->str_o) + 6);
  99. if (UNLIKELY (!tmp)) {
  100. *res = NULL;
  101. rc = LSUP_MEM_ERR;
  102. goto finally;
  103. }
  104. sprintf ((char*)tmp, "%s %s %s .\n", it->str_s, it->str_p, it->str_o);
  105. *res = tmp;
  106. it->cur++;
  107. finally:
  108. LSUP_term_free (it->trp->s); it->trp->s = NULL;
  109. LSUP_term_free (it->trp->p); it->trp->p = NULL;
  110. LSUP_term_free (it->trp->o); it->trp->o = NULL;
  111. return rc;
  112. }
  113. static void
  114. gr_to_nt_done (LSUP_CodecIterator *it)
  115. {
  116. LSUP_graph_iter_free (it->gr_it);
  117. LSUP_triple_free (it->trp);
  118. free (it->rep);
  119. free (it->str_s);
  120. free (it->str_p);
  121. free (it->str_o);
  122. free (it);
  123. }
  124. const LSUP_Codec nt_codec = {
  125. .name = "N-Triples",
  126. .mimetype = "application/n-triples",
  127. .extension = "nt",
  128. .encode_term = term_to_nt,
  129. .encode_graph_init = gr_to_nt_init,
  130. .encode_graph_iter = gr_to_nt_iter,
  131. .encode_graph_done = gr_to_nt_done,
  132. .decode_term = LSUP_nt_parse_term,
  133. .decode_graph = LSUP_nt_parse_doc,
  134. };
  135. /* * * Other static functions. * * */
  136. static LSUP_CodecIterator *
  137. gr_to_nt_init (const LSUP_Graph *gr)
  138. {
  139. LSUP_CodecIterator *it;
  140. MALLOC_GUARD (it, NULL);
  141. it->codec = &nt_codec;
  142. it->gr_it = LSUP_graph_lookup(gr, NULL, NULL, NULL, &it->cur);
  143. it->nsm = LSUP_graph_namespace (gr);
  144. it->cur = 0;
  145. it->trp = TRP_DUMMY;
  146. it->rep = NULL;
  147. it->str_s = NULL;
  148. it->str_p = NULL;
  149. it->str_o = NULL;
  150. return it;
  151. }
  152. /** @brief Add escape character (backslash) to illegal literal characters.
  153. */
  154. static LSUP_rc
  155. escape_lit (const char *in, char **out_p)
  156. {
  157. size_t out_size = strlen (in) + 1;
  158. // Expand output string size to accommodate escape characters.
  159. for (
  160. size_t i = strcspn (in, LIT_ECHAR);
  161. i < strlen (in);
  162. i += strcspn (in + i + 1, LIT_ECHAR) + 1) {
  163. out_size ++;
  164. }
  165. char *out = calloc (1, out_size);
  166. if (UNLIKELY (!out)) return LSUP_MEM_ERR;
  167. size_t boundary;
  168. boundary = strcspn (in, LIT_ECHAR);
  169. for (size_t i = 0, j = 0;;) {
  170. out = strncat (out, in + i, boundary);
  171. i += boundary;
  172. j += boundary;
  173. if (i >= strlen (in)) break;
  174. out[j++] = '\\';
  175. out[j++] = escape_char (in[i++]);
  176. boundary = strcspn (in + i, LIT_ECHAR);
  177. }
  178. *out_p = out;
  179. return 0;
  180. }