codec_ttl.c 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. #include "codec/codec_ttl.h"
  2. /** @brief NT codec iterator.
  3. *
  4. * This iterator yields one or more triples at a time, one group per subject,
  5. * with the most compact form allowed by Turtle, e.g.
  6. *
  7. * :s :p1 :o1, :o2, o3; p2 o4, o5, <http://example.com/ext1> .
  8. */
  9. typedef struct {
  10. const LSUP_Codec * codec; ///< Codec that generated this iterator.
  11. const LSUP_Graph * gr; ///< Graph being encoded.
  12. LSUP_TermSet * subjects; ///< All subjects in the graph.
  13. size_t s_cur; ///< Term set cursor.
  14. LSUP_rc rc; ///< Internal return code.
  15. char * s_str; ///< Serialized subject block (output).
  16. char * p_str; ///< Serialized predicate block.
  17. char * o_str; ///< Serialized object block.
  18. } LSUP_TTLCodecIterator;
  19. /* * * Codec functions. * * */
  20. static LSUP_rc
  21. term_to_ttl (const LSUP_Term *term, const LSUP_NSMap *nsm, char **out_p)
  22. {
  23. LSUP_rc rc;
  24. char *tmp = NULL, *out = NULL;
  25. char *metadata = NULL;
  26. size_t buf_len;
  27. LSUP_rc md_rc = LSUP_NORESULT;
  28. switch (term->type) {
  29. case LSUP_TERM_IRIREF:
  30. if (strcmp (term->data, LSUP_RDF_TYPE) == 0) {
  31. // Shorten RDF type
  32. buf_len = 2;
  33. tmp = "a";
  34. } else {
  35. md_rc = LSUP_nsmap_denormalize_uri (nsm, term->data, &tmp);
  36. PRCCK (md_rc);
  37. if (md_rc == LSUP_NORESULT) {
  38. // If URI counld not be shortened, add `<>`
  39. out = realloc (*out_p, strlen (tmp) + 3);
  40. if (UNLIKELY (!out)) return LSUP_MEM_ERR;
  41. sprintf (out, "<%s>", tmp);
  42. free (tmp);
  43. } else {
  44. free (*out_p);
  45. out = tmp;
  46. }
  47. }
  48. rc = LSUP_OK;
  49. break;
  50. case LSUP_TERM_NS_IRIREF:
  51. if (strcmp (term->data, LSUP_RDF_TYPE_NS) == 0) {
  52. // Shorten RDF type
  53. tmp = "a";
  54. buf_len = 2;
  55. } else {
  56. tmp = term->data;
  57. buf_len = strlen (term->data) + 1;
  58. }
  59. out = realloc (*out_p, buf_len);
  60. if (UNLIKELY (!out)) return LSUP_MEM_ERR;
  61. strcpy (out, tmp);
  62. rc = LSUP_OK;
  63. break;
  64. case LSUP_TERM_LITERAL:
  65. // Calculate string length.
  66. if (escape_lit (term->data, &tmp) != LSUP_OK)
  67. return LSUP_ERROR;
  68. buf_len = strlen (tmp) + 3; // Room for "" and terminator
  69. // Data type.
  70. bool shorten = false;
  71. if (
  72. term->datatype != 0
  73. && term->datatype != LSUP_default_datatype
  74. ) {
  75. md_rc = LSUP_nsmap_denormalize_uri (
  76. nsm, term->datatype->data, &metadata);
  77. PRCCK (md_rc);
  78. unsigned padding = 0;
  79. // Shorten numeric and boolean types.
  80. if (strcmp (metadata, "xsd:integer") == 0) {
  81. // TODO check for valid format.
  82. shorten = true;
  83. } else if (strcmp (metadata, "xsd:double") == 0) {
  84. // TODO check for valid format.
  85. shorten = true;
  86. } else if (strcmp (metadata, "xsd:decimal") == 0) {
  87. // TODO check for valid format.
  88. shorten = true;
  89. } else if (strcmp (metadata, "xsd:boolean") == 0) {
  90. // TODO check for valid format.
  91. shorten = true;
  92. } else {
  93. // Room for `^^<>` for FQURI, `^^` for NS URI
  94. padding = md_rc == LSUP_NORESULT ? 4 : 2;
  95. }
  96. buf_len += strlen (metadata) + padding;
  97. }
  98. out = realloc (*out_p, buf_len);
  99. if (UNLIKELY (!out)) return LSUP_MEM_ERR;
  100. if (shorten) {
  101. strcpy (out, tmp);
  102. } else if (metadata) {
  103. char *fmt = (
  104. md_rc == LSUP_NORESULT ? "\"%s\"^^<%s>"
  105. : "\"%s\"^^%s");
  106. sprintf (out, fmt, tmp, metadata);
  107. }
  108. else {
  109. sprintf (out, "\"%s\"", tmp);
  110. }
  111. free (tmp);
  112. rc = LSUP_OK;
  113. break;
  114. case LSUP_TERM_LT_LITERAL:
  115. // Calculate string length.
  116. if (escape_lit (term->data, &tmp) != LSUP_OK)
  117. return LSUP_ERROR;
  118. buf_len = strlen (tmp) + 3; // Room for "" and terminator
  119. if (term->lang[0] != '\0') {
  120. metadata = strndup (term->lang, sizeof (LSUP_LangTag));
  121. buf_len += strlen (metadata) + 1; // Room for @
  122. }
  123. out = realloc (*out_p, buf_len);
  124. if (UNLIKELY (!out)) return LSUP_MEM_ERR;
  125. sprintf (out, "\"%s\"", tmp);
  126. free (tmp);
  127. // Add lang.
  128. if (metadata) out = strcat (strcat (out, "@"), metadata);
  129. rc = LSUP_OK;
  130. break;
  131. case LSUP_TERM_BNODE:
  132. out = realloc (*out_p, strlen (term->data) + 3);
  133. if (UNLIKELY (!out)) return LSUP_MEM_ERR;
  134. sprintf (out, "_:%s", term->data);
  135. rc = LSUP_OK;
  136. break;
  137. default:
  138. out = *out_p; // This is considered garbage.
  139. rc = LSUP_PARSE_ERR;
  140. }
  141. free (metadata);
  142. *out_p = out;
  143. return rc;
  144. }
  145. static void *
  146. gr_to_ttl_init (const LSUP_Graph *gr)
  147. {
  148. LSUP_TTLCodecIterator *it;
  149. CALLOC_GUARD (it, NULL);
  150. it->codec = &ttl_codec;
  151. it->gr = gr;
  152. it->subjects = LSUP_graph_unique_terms (gr, TRP_POS_S);
  153. // Sets the condition to build the prolog on 1st iteration.
  154. it->rc = LSUP_NORESULT;
  155. return it;
  156. }
  157. /// Build header and prolog.
  158. static LSUP_rc
  159. build_prolog (LSUP_TTLCodecIterator *it, char **res_p)
  160. {
  161. char *res = fmt_header ("# ");
  162. const char ***nsm = LSUP_nsmap_dump (LSUP_graph_namespace (it->gr));
  163. char *ns_tpl = "@prefix %s: <%s> .\n";
  164. // Prefix map.
  165. for (size_t i = 0; nsm[i]; i++) {
  166. const char **ns = nsm[i];
  167. size_t old_len = strlen (res);
  168. size_t ns_len = strlen (ns[0]) + strlen (ns[1]) + strlen (ns_tpl);
  169. char *tmp = realloc (res, old_len + ns_len + 1);
  170. if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
  171. res = tmp;
  172. sprintf (res + old_len, ns_tpl, ns[0], ns[1]);
  173. free (ns);
  174. }
  175. free (nsm);
  176. // Base.
  177. char *base_uri_str = NULL;
  178. LSUP_rc rc = LSUP_nsmap_denormalize_uri (
  179. LSUP_graph_namespace (it->gr), LSUP_graph_uri (it->gr)->data,
  180. &base_uri_str);
  181. PRCCK (rc);
  182. char *base_stmt_tpl = "\n@base <%s> .\n\n";
  183. char *base_stmt = malloc (strlen (base_stmt_tpl) + strlen (base_uri_str));
  184. if (!UNLIKELY (base_stmt)) return LSUP_MEM_ERR;
  185. sprintf (base_stmt, base_stmt_tpl, base_uri_str);
  186. free (base_uri_str);
  187. res = realloc (res, strlen (res) + strlen (base_stmt) + 1);
  188. if (!UNLIKELY (res)) return LSUP_MEM_ERR;
  189. res = strcat (res, base_stmt);
  190. free (base_stmt);
  191. *res_p = res;
  192. it->rc = LSUP_OK;
  193. return LSUP_OK;
  194. }
  195. /// Encode all the triples for a single subject.
  196. static LSUP_rc
  197. gr_to_ttl_iter (void *h, char **res_p) {
  198. LSUP_TTLCodecIterator *it = h;
  199. if (it->rc == LSUP_NORESULT) return build_prolog (it, res_p);
  200. LSUP_Term *s = NULL;
  201. char *res = *res_p; // Result string will be reallocated.
  202. RCCK (LSUP_term_set_next (it->subjects, &it->s_cur, &s));
  203. term_to_ttl (s, LSUP_graph_namespace (it->gr), &res);
  204. LSUP_LinkMap *lmap = LSUP_graph_connections (
  205. it->gr, s, LSUP_LINK_OUTBOUND);
  206. LSUP_LinkMapIterator *lmit = LSUP_link_map_iter_new (lmap, s);
  207. LSUP_Term *p = NULL;
  208. LSUP_TermSet *o_ts = NULL;
  209. char *p_join = " ";
  210. // Begin predicate loop.
  211. while (LSUP_link_map_next (lmit, &p, &o_ts) != LSUP_END) {
  212. // Add predicate representation.
  213. RCCK (term_to_ttl (p, LSUP_graph_namespace (it->gr), &it->p_str));
  214. char *tmp = realloc (
  215. res, strlen (res) + strlen (it->p_str) + strlen (p_join) + 1);
  216. if (UNLIKELY (!tmp)) goto memfail;
  217. res = strcat (strcat (tmp, p_join), it->p_str);
  218. p_join = " ; ";
  219. // Add objects for predicate.
  220. size_t i = 0;
  221. LSUP_Term *o = NULL;
  222. char *o_join = " ";
  223. while (LSUP_term_set_next (o_ts, &i, &o) != LSUP_END) {
  224. it->rc = term_to_ttl (
  225. o, LSUP_graph_namespace (it->gr), &it->o_str);
  226. RCCK (it->rc);
  227. char *tmp = realloc (
  228. res, strlen (res) + strlen (it->o_str) + strlen (o_join) + 1);
  229. if (UNLIKELY (!tmp)) goto memfail;
  230. res = strcat (strcat (tmp, o_join), it->o_str);
  231. o_join = " , ";
  232. }
  233. }
  234. char *s_sep = " .\n";
  235. char *tmp = realloc (res, strlen (res) + strlen (s_sep) + 1);
  236. if (UNLIKELY (!tmp)) goto memfail;
  237. *res_p = strcat (tmp, s_sep);
  238. LSUP_link_map_iter_free (lmit);
  239. LSUP_link_map_free (lmap);
  240. return it->rc;
  241. memfail:
  242. free (res);
  243. *res_p = NULL;
  244. return LSUP_MEM_ERR;
  245. }
  246. static void
  247. gr_to_ttl_done (void *h)
  248. {
  249. LSUP_TTLCodecIterator *it = h;
  250. LSUP_term_set_free (it->subjects);
  251. free (it->s_str);
  252. free (it->p_str);
  253. free (it->o_str);
  254. free (it);
  255. }
  256. const LSUP_Codec ttl_codec = {
  257. .name = "Turtle",
  258. .mimetype = "text/turtle",
  259. .extension = "ttl",
  260. .encode_term = term_to_ttl,
  261. .encode_graph_init = gr_to_ttl_init,
  262. .encode_graph_iter = gr_to_ttl_iter,
  263. .encode_graph_done = gr_to_ttl_done,
  264. //.decode_term = LSUP_ttl_parse_term,
  265. .decode_graph = LSUP_ttl_parse_doc,
  266. };