codec_ttl.c 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. #include "codec/codec_ttl.h"
  2. /** @brief NT codec iterator.
  3. *
  4. * This iterator yields one or more triples at a time, one group per subject,
  5. * with the most compact form allowed by Turtle, e.g.
  6. *
  7. * :s :p1 :o1, :o2, o3; p2 o4, o5, <http://example.com/ext1> .
  8. */
  9. typedef struct {
  10. const LSUP_Codec * codec; ///< Codec that generated this iterator.
  11. const LSUP_Graph *gr; ///< Graph being encoded.
  12. LSUP_TermSet * subjects; ///< All subjects in the graph.
  13. size_t s_cur; ///< Term set cursor.
  14. LSUP_rc rc; ///< Internal return code.
  15. } LSUP_TTLCodecIterator;
  16. /* * * Codec functions. * * */
  17. static LSUP_rc
  18. term_to_ttl (const LSUP_Term *term, const LSUP_NSMap *nsm, char **out_p)
  19. {
  20. LSUP_rc rc;
  21. char *out = NULL, *escaped;
  22. char *metadata = NULL;
  23. size_t buf_len;
  24. LSUP_rc md_rc;
  25. switch (term->type) {
  26. case LSUP_TERM_IRIREF:
  27. md_rc = LSUP_nsmap_denormalize_uri (nsm, term->data, &out);
  28. PRCCK (md_rc);
  29. if (md_rc == LSUP_NORESULT) {
  30. // If URI counld not be shortened, add `<>`
  31. char *tmp = realloc (out, strlen (out) + 2);
  32. if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
  33. out = tmp;
  34. out = strcat (strcat (strcat (tmp, "<"), tmp), ">");
  35. }
  36. rc = LSUP_OK;
  37. break;
  38. case LSUP_TERM_NS_IRIREF:
  39. out = strdup (term->data);
  40. if (UNLIKELY (!out)) return LSUP_MEM_ERR;
  41. rc = LSUP_OK;
  42. break;
  43. case LSUP_TERM_LITERAL:
  44. // Calculate string length.
  45. if (escape_lit (term->data, &escaped) != LSUP_OK)
  46. return LSUP_ERROR;
  47. buf_len = strlen (escaped) + 3; // Room for "" and terminator
  48. if (
  49. term->datatype != 0
  50. && term->datatype != LSUP_default_datatype
  51. ) {
  52. md_rc = LSUP_nsmap_denormalize_uri (
  53. nsm, term->datatype->data, &metadata);
  54. PRCCK (md_rc);
  55. // Room for `^^<>` for FQURI, `^^` for NS URI
  56. unsigned padding = md_rc == LSUP_NORESULT ? 4 : 2;
  57. buf_len += strlen (metadata) + padding;
  58. }
  59. out = realloc (out, buf_len);
  60. if (UNLIKELY (!out)) return LSUP_MEM_ERR;
  61. sprintf (out, "\"%s\"", escaped);
  62. free (escaped);
  63. // Add datatype.
  64. if (metadata)
  65. out = strcat (strcat (strcat (out, "^^<"), metadata), ">");
  66. rc = LSUP_OK;
  67. break;
  68. case LSUP_TERM_LT_LITERAL:
  69. // Calculate string length.
  70. if (escape_lit (term->data, &escaped) != LSUP_OK)
  71. return LSUP_ERROR;
  72. buf_len = strlen (escaped) + 3; // Room for "" and terminator
  73. if (term->lang[0] != '\0') {
  74. metadata = strndup (term->lang, sizeof (LSUP_LangTag));
  75. buf_len += strlen (metadata) + 1; // Room for @
  76. }
  77. out = realloc (out, buf_len);
  78. if (UNLIKELY (!out)) return LSUP_MEM_ERR;
  79. sprintf (out, "\"%s\"", escaped);
  80. free (escaped);
  81. // Add lang.
  82. if (metadata) out = strcat (strcat (out, "@"), metadata);
  83. rc = LSUP_OK;
  84. break;
  85. case LSUP_TERM_BNODE:
  86. out = realloc (out, strlen (term->data) + 3);
  87. if (UNLIKELY (!out)) return LSUP_MEM_ERR;
  88. sprintf (out, "_:%s", term->data);
  89. rc = LSUP_OK;
  90. break;
  91. default:
  92. out = NULL;
  93. rc = LSUP_PARSE_ERR;
  94. }
  95. free (metadata);
  96. *out_p = out;
  97. return rc;
  98. }
  99. static void *
  100. gr_to_ttl_init (const LSUP_Graph *gr)
  101. {
  102. LSUP_TTLCodecIterator *it;
  103. MALLOC_GUARD (it, NULL);
  104. it->codec = &ttl_codec;
  105. it->gr = gr;
  106. it->subjects = LSUP_graph_unique_terms (gr, TRP_POS_S);
  107. // Sets the condition to build the prolog on 1st iteration.
  108. it->s_cur = 0;
  109. it->rc = LSUP_NORESULT;
  110. return it;
  111. }
  112. /// Build header and prolog.
  113. static LSUP_rc
  114. build_prolog (LSUP_TTLCodecIterator *it, char **res_p)
  115. {
  116. char *res = fmt_header ("# ");
  117. const char ***nsm = LSUP_nsmap_dump (LSUP_graph_namespace (it->gr));
  118. const char *ns_tpl = "@prefix %s: <%s> .\n";
  119. // Prefix map.
  120. for (size_t i = 0; nsm[i]; i++) {
  121. const char **ns = nsm[i];
  122. size_t old_len = strlen (res);
  123. size_t ns_len = strlen (ns[0]) + strlen (ns[1]) + strlen (ns_tpl);
  124. char *tmp = realloc (res, old_len + ns_len);
  125. if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
  126. res = tmp;
  127. sprintf (res + old_len, ns_tpl, ns[0], ns[1]);
  128. }
  129. // Base.
  130. char *base_uri_str;
  131. LSUP_rc rc = LSUP_nsmap_denormalize_uri (
  132. LSUP_graph_namespace (it->gr), LSUP_graph_uri (it->gr)->data,
  133. &base_uri_str);
  134. PRCCK (rc);
  135. char *base_stmt_tpl = "\n@base <%s> .\n\n";
  136. char *base_stmt = malloc (strlen (base_stmt_tpl) + strlen (base_uri_str));
  137. if (!UNLIKELY (base_stmt)) return LSUP_MEM_ERR;
  138. sprintf (base_stmt, base_stmt_tpl, base_uri_str);
  139. res = realloc (res, strlen (res) + strlen (base_stmt));
  140. if (!UNLIKELY (res)) return LSUP_MEM_ERR;
  141. res = strcat (res, base_stmt);
  142. *res_p = res;
  143. it->rc = LSUP_OK;
  144. return LSUP_OK;
  145. }
  146. /// Encode all the triples for a single subject.
  147. static LSUP_rc
  148. gr_to_ttl_iter (void *h, char **res_p) {
  149. LSUP_TTLCodecIterator *it = h;
  150. if (it->rc == LSUP_NORESULT) return build_prolog (it, res_p);
  151. LSUP_Term *s = NULL;
  152. char *res = NULL; // Result string.
  153. RCCK (LSUP_term_set_next (it->subjects, &it->s_cur, &s));
  154. term_to_ttl (s, LSUP_graph_namespace (it->gr), &res);
  155. LSUP_LinkMap *lmap = LSUP_graph_connections (
  156. it->gr, s, LSUP_LINK_OUTBOUND);
  157. LSUP_LinkMapIterator *lmit = LSUP_link_map_iter_new (lmap, s);
  158. LSUP_Term *p = NULL;
  159. char *p_str = NULL;
  160. LSUP_TermSet *o_ts = NULL;
  161. char *p_join = " ";
  162. // Begin predicate loop.
  163. while (LSUP_link_map_next (lmit, &p, &o_ts) != LSUP_END) {
  164. // Add predicate representation.
  165. RCCK (term_to_ttl (p, LSUP_graph_namespace (it->gr), &p_str));
  166. char *tmp = realloc (
  167. res, strlen (res) + strlen (p_str) + strlen (p_join) + 1);
  168. if (UNLIKELY (!tmp)) {
  169. it->rc = LSUP_MEM_ERR;
  170. goto finally;
  171. }
  172. res = strcat (strcat (tmp, p_join), p_str);
  173. free (p_str);
  174. p_join = "; ";
  175. // Add objects for predicate.
  176. size_t i = 0;
  177. LSUP_Term *o = NULL;
  178. char *o_str = NULL;
  179. char *o_join = " ";
  180. while (LSUP_term_set_next (o_ts, &i, &o) != LSUP_END) {
  181. it->rc = term_to_ttl (o, LSUP_graph_namespace (it->gr), &o_str);
  182. RCCK (it->rc);
  183. char *tmp = realloc (
  184. res, strlen (res) + strlen (o_str) + strlen (o_join) + 1);
  185. if (UNLIKELY (!tmp)) {
  186. it->rc = LSUP_MEM_ERR;
  187. goto finally;
  188. }
  189. res = strcat (strcat (tmp, o_join), o_str);
  190. o_join = ", ";
  191. }
  192. free (o_str);
  193. }
  194. char *s_sep = ".\n";
  195. char *tmp = realloc (res, strlen (res) + strlen (s_sep) + 1);
  196. if (UNLIKELY (!tmp)) {
  197. res = NULL;
  198. it->rc = LSUP_MEM_ERR;
  199. goto finally;
  200. }
  201. *res_p = strcat (res, s_sep);
  202. finally:
  203. LSUP_link_map_iter_free (lmit);
  204. return it->rc;
  205. }
  206. static void
  207. gr_to_ttl_done (void *h)
  208. {
  209. LSUP_TTLCodecIterator *it = h;
  210. LSUP_term_set_free (it->subjects);
  211. free (it);
  212. }
  213. const LSUP_Codec ttl_codec = {
  214. .name = "Turtle",
  215. .mimetype = "text/turtle",
  216. .extension = "ttl",
  217. .encode_term = term_to_ttl,
  218. .encode_graph_init = gr_to_ttl_init,
  219. .encode_graph_iter = gr_to_ttl_iter,
  220. .encode_graph_done = gr_to_ttl_done,
  221. //.decode_term = LSUP_ttl_parse_term,
  222. .decode_graph = LSUP_ttl_parse_doc,
  223. };