codec_nt.c 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. #include "codec_nt.h"
  2. /** @brief List of characters to be escaped in serialized literals.
  3. *
  4. * https://www.w3.org/TR/n-triples/#grammar-production-ECHAR
  5. */
  6. #define LIT_ECHAR "\t\b\n\r\f\"\'\\"
  7. /** @brief Regex of characters to be escaped in serialized IRIs.
  8. *
  9. * https://www.w3.org/TR/n-triples/#grammar-production-IRIREF
  10. */
  11. #define IRI_ECHAR_PTN "[\x00-\x20<>\"\\{\\}\\|\\^`\\\\]"
  12. /** @brief Default NT literal type.
  13. */
  14. #define XSD_STRING "http://www.w3.org/2001/XMLSchema#string"
  15. /* * * Static prototypes. * * */
  16. static LSUP_rc escape_lit (const char *in, char **out_p);
  17. /* * * Codec functions. * * */
  18. static LSUP_rc
  19. term_to_nt (const LSUP_Term *term, const LSUP_NSMap *nsm, char **out_p)
  20. {
  21. LSUP_rc rc;
  22. char *out = NULL, *tmp, *escaped;
  23. size_t buf_len;
  24. // Free previous content if not NULL.
  25. if (*out_p != NULL) out = realloc (*out_p, 0);
  26. switch (term->type) {
  27. case LSUP_TERM_URI:
  28. tmp = realloc (out, strlen (term->data) + 3);
  29. if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
  30. out = tmp;
  31. sprintf (out, "<%s>", term->data);
  32. rc = LSUP_OK;
  33. break;
  34. case LSUP_TERM_LITERAL:
  35. // Calculate string length.
  36. if (escape_lit (term->data, &escaped) != LSUP_OK)
  37. return LSUP_ERROR;
  38. buf_len = strlen (escaped) + 3; // Room for "" and terminator
  39. if (term->datatype && strcmp (term->datatype, XSD_STRING) != 0)
  40. buf_len += strlen (term->datatype) + 2; // Room for ^^
  41. if (strlen (term->lang) > 0)
  42. buf_len += strlen(term->lang) + 1; // Room for @
  43. TRACE ("nt rep length: %lu\n", buf_len);
  44. tmp = realloc (out, buf_len);
  45. if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
  46. out = tmp;
  47. sprintf (out, "\"%s\"", escaped);
  48. free (escaped);
  49. // Always suppress xsd:string data type.
  50. if (term->datatype && strcmp (term->datatype, XSD_STRING) != 0)
  51. out = strcat (strcat (out, "^^"), term->datatype);
  52. if (strlen (term->lang) > 0)
  53. out = strcat (strcat (out, "@"), term->lang);
  54. rc = LSUP_OK;
  55. break;
  56. case LSUP_TERM_BNODE:
  57. tmp = realloc (out, strlen (term->data) + 3);
  58. if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
  59. out = tmp;
  60. sprintf (out, "_:%s", term->data);
  61. rc = LSUP_OK;
  62. break;
  63. default:
  64. out = NULL;
  65. rc = LSUP_VALUE_ERR;
  66. }
  67. *out_p = out;
  68. return rc;
  69. }
  70. static LSUP_rc
  71. nt_to_term (const char *rep, const LSUP_NSMap *nsm, LSUP_Term **term)
  72. {
  73. // TODO
  74. return LSUP_NOT_IMPL_ERR;
  75. }
  76. static LSUP_CodecIterator *
  77. gr_to_nt_init (const LSUP_Graph *gr)
  78. {
  79. LSUP_CodecIterator *it;
  80. MALLOC_GUARD (it, NULL);
  81. LSUP_Triple lut = {NULL, NULL, NULL};
  82. it->gr_it = LSUP_graph_lookup(gr, &lut, &it->cur);
  83. it->nsm = LSUP_graph_namespace (gr);
  84. it->cur = 0;
  85. it->trp = LSUP_triple_new (TERM_DUMMY, TERM_DUMMY, TERM_DUMMY);
  86. it->rep = NULL;
  87. it->str_s = NULL;
  88. it->str_p = NULL;
  89. it->str_o = NULL;
  90. return it;
  91. }
  92. static LSUP_rc
  93. gr_to_nt_iter (LSUP_CodecIterator *it) {
  94. LSUP_rc rc = LSUP_graph_iter_next (it->gr_it, it->trp);
  95. if (rc != LSUP_OK) return rc;
  96. term_to_nt (it->trp->s, it->nsm, &it->str_s);
  97. term_to_nt (it->trp->p, it->nsm, &it->str_p);
  98. term_to_nt (it->trp->o, it->nsm, &it->str_o);
  99. char *tmp = realloc (
  100. it->rep, strlen (it->str_s) + strlen (it->str_p)
  101. + strlen (it->str_o) + 6);
  102. if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
  103. it->rep = tmp;
  104. sprintf (it->rep, "%s %s %s .\n", it->str_s, it->str_p, it->str_o);
  105. it->cur++;
  106. return LSUP_OK;
  107. }
  108. static void
  109. gr_to_nt_done (LSUP_CodecIterator *it)
  110. {
  111. LSUP_graph_iter_free (it->gr_it);
  112. LSUP_triple_free (it->trp);
  113. free (it->rep);
  114. free (it->str_s);
  115. free (it->str_p);
  116. free (it->str_o);
  117. free (it);
  118. }
  119. static LSUP_CodecIterator *
  120. nt_to_gr_init (const LSUP_Graph *gr)
  121. {
  122. // TODO
  123. return NULL;
  124. }
  125. static LSUP_rc
  126. nt_to_gr_iter (LSUP_CodecIterator *it) {
  127. // TODO
  128. return LSUP_NOT_IMPL_ERR;
  129. }
  130. static void
  131. nt_to_gr_done (LSUP_CodecIterator *it) {
  132. free (it);
  133. }
  134. const LSUP_Codec nt_codec = {
  135. .name = "N-Triples",
  136. .mimetype = "application/n-triples",
  137. .extension = "nt",
  138. .term_encoder = term_to_nt,
  139. .term_decoder = nt_to_term,
  140. .gr_encode_init = gr_to_nt_init,
  141. .gr_encode_iter = gr_to_nt_iter,
  142. .gr_encode_done = gr_to_nt_done,
  143. .gr_decode_init = nt_to_gr_init,
  144. .gr_decode_iter = nt_to_gr_iter,
  145. .gr_decode_done = nt_to_gr_done,
  146. };
  147. /* * * Other internal functions. * * */
  148. /** Replace non-printable characters with their literal byte.
  149. *
  150. * Escape backslash is to be added separately.
  151. */
  152. static inline char replace_char(const char c) {
  153. switch (c) {
  154. case '\t': return 't';
  155. case '\b': return 'b';
  156. case '\n': return 'n';
  157. case '\r': return 'r';
  158. case '\f': return 'f';
  159. default: return c;
  160. }
  161. }
  162. /** @brief Add escape character (backslash) to illegal literal characters.
  163. */
  164. static LSUP_rc
  165. escape_lit (const char *in, char **out_p)
  166. {
  167. size_t out_size = strlen (in) + 1;
  168. // Expand output string size to accommodate escape characters.
  169. //size_t i = strcspn (in, LIT_ECHAR);
  170. for (
  171. size_t i = strcspn (in, LIT_ECHAR);
  172. i < strlen (in);
  173. i += strcspn (in + i + 1, LIT_ECHAR) + 1) {
  174. out_size ++;
  175. }
  176. char *out = calloc (1, out_size);
  177. size_t boundary;
  178. boundary = strcspn (in, LIT_ECHAR);
  179. for (size_t i = 0, j = 0;;) {
  180. out = strncat (out, in + i, boundary);
  181. i += boundary;
  182. j += boundary;
  183. if (i >= strlen (in)) break;
  184. out[j++] = '\\';
  185. out[j++] = replace_char (in[i++]);
  186. boundary = strcspn (in + i, LIT_ECHAR);
  187. }
  188. *out_p = out;
  189. return 0;
  190. }