123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326 |
- #include "codec/codec_ttl.h"
- /** @brief NT codec iterator.
- *
- * This iterator yields one or more triples at a time, one group per subject,
- * with the most compact form allowed by Turtle, e.g.
- *
- * :s :p1 :o1, :o2, o3; p2 o4, o5, <http://example.com/ext1> .
- */
- typedef struct {
- const LSUP_Codec * codec; ///< Codec that generated this iterator.
- const LSUP_Graph * gr; ///< Graph being encoded.
- LSUP_TermSet * subjects; ///< All subjects in the graph.
- size_t s_cur; ///< Term set cursor.
- LSUP_rc rc; ///< Internal return code.
- char * s_str; ///< Serialized subject block (output).
- char * p_str; ///< Serialized predicate block.
- char * o_str; ///< Serialized object block.
- } LSUP_TTLCodecIterator;
- /* * * Codec functions. * * */
- static LSUP_rc
- term_to_ttl (const LSUP_Term *term, const LSUP_NSMap *nsm, char **out_p)
- {
- LSUP_rc rc;
- char *tmp = NULL, *out = NULL;
- char *metadata = NULL;
- size_t buf_len;
- LSUP_rc md_rc = LSUP_NORESULT;
- switch (term->type) {
- case LSUP_TERM_IRIREF:
- if (strcmp (term->data, LSUP_RDF_TYPE) == 0) {
- // Shorten RDF type
- buf_len = 2;
- tmp = "a";
- } else {
- md_rc = LSUP_nsmap_denormalize_uri (nsm, term->data, &tmp);
- PRCCK (md_rc);
- if (md_rc == LSUP_NORESULT) {
- // If URI counld not be shortened, add `<>`
- out = realloc (*out_p, strlen (tmp) + 3);
- if (UNLIKELY (!out)) return LSUP_MEM_ERR;
- sprintf (out, "<%s>", tmp);
- free (tmp);
- } else {
- free (*out_p);
- out = tmp;
- }
- }
- rc = LSUP_OK;
- break;
- case LSUP_TERM_NS_IRIREF:
- if (strcmp (term->data, LSUP_RDF_TYPE_NS) == 0) {
- // Shorten RDF type
- tmp = "a";
- buf_len = 2;
- } else {
- tmp = term->data;
- buf_len = strlen (term->data) + 1;
- }
- out = realloc (*out_p, buf_len);
- if (UNLIKELY (!out)) return LSUP_MEM_ERR;
- strcpy (out, tmp);
- rc = LSUP_OK;
- break;
- case LSUP_TERM_LITERAL:
- // Calculate string length.
- if (escape_lit (term->data, &tmp) != LSUP_OK)
- return LSUP_ERROR;
- buf_len = strlen (tmp) + 3; // Room for "" and terminator
- // Data type.
- bool shorten = false;
- if (
- term->datatype != 0
- && term->datatype != LSUP_default_datatype
- ) {
- md_rc = LSUP_nsmap_denormalize_uri (
- nsm, term->datatype->data, &metadata);
- PRCCK (md_rc);
- unsigned padding = 0;
- // Shorten numeric and boolean types.
- if (strcmp (metadata, "xsd:integer") == 0) {
- // TODO check for valid format.
- shorten = true;
- } else if (strcmp (metadata, "xsd:double") == 0) {
- // TODO check for valid format.
- shorten = true;
- } else if (strcmp (metadata, "xsd:decimal") == 0) {
- // TODO check for valid format.
- shorten = true;
- } else if (strcmp (metadata, "xsd:boolean") == 0) {
- // TODO check for valid format.
- shorten = true;
- } else {
- // Room for `^^<>` for FQURI, `^^` for NS URI
- padding = md_rc == LSUP_NORESULT ? 4 : 2;
- }
- buf_len += strlen (metadata) + padding;
- }
- out = realloc (*out_p, buf_len);
- if (UNLIKELY (!out)) return LSUP_MEM_ERR;
- if (shorten) {
- strcpy (out, tmp);
- } else if (metadata) {
- char *fmt = (
- md_rc == LSUP_NORESULT ? "\"%s\"^^<%s>"
- : "\"%s\"^^%s");
- sprintf (out, fmt, tmp, metadata);
- }
- else {
- sprintf (out, "\"%s\"", tmp);
- }
- free (tmp);
- rc = LSUP_OK;
- break;
- case LSUP_TERM_LT_LITERAL:
- // Calculate string length.
- if (escape_lit (term->data, &tmp) != LSUP_OK)
- return LSUP_ERROR;
- buf_len = strlen (tmp) + 3; // Room for "" and terminator
- if (term->lang[0] != '\0') {
- metadata = strndup (term->lang, sizeof (LSUP_LangTag));
- buf_len += strlen (metadata) + 1; // Room for @
- }
- out = realloc (*out_p, buf_len);
- if (UNLIKELY (!out)) return LSUP_MEM_ERR;
- sprintf (out, "\"%s\"", tmp);
- free (tmp);
- // Add lang.
- if (metadata) out = strcat (strcat (out, "@"), metadata);
- rc = LSUP_OK;
- break;
- case LSUP_TERM_BNODE:
- out = realloc (*out_p, strlen (term->data) + 3);
- if (UNLIKELY (!out)) return LSUP_MEM_ERR;
- sprintf (out, "_:%s", term->data);
- rc = LSUP_OK;
- break;
- default:
- out = *out_p; // This is considered garbage.
- rc = LSUP_PARSE_ERR;
- }
- free (metadata);
- *out_p = out;
- return rc;
- }
- static void *
- gr_to_ttl_init (const LSUP_Graph *gr)
- {
- LSUP_TTLCodecIterator *it;
- CALLOC_GUARD (it, NULL);
- it->codec = &ttl_codec;
- it->gr = gr;
- it->subjects = LSUP_graph_unique_terms (gr, TRP_POS_S);
- // Sets the condition to build the prolog on 1st iteration.
- it->rc = LSUP_NORESULT;
- return it;
- }
- /// Build header and prolog.
- static LSUP_rc
- build_prolog (LSUP_TTLCodecIterator *it, char **res_p)
- {
- char *res = fmt_header ("# ");
- const char ***nsm = LSUP_nsmap_dump (LSUP_graph_namespace (it->gr));
- char *ns_tpl = "@prefix %s: <%s> .\n";
- // Prefix map.
- for (size_t i = 0; nsm[i]; i++) {
- const char **ns = nsm[i];
- size_t old_len = strlen (res);
- size_t ns_len = strlen (ns[0]) + strlen (ns[1]) + strlen (ns_tpl);
- char *tmp = realloc (res, old_len + ns_len + 1);
- if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
- res = tmp;
- sprintf (res + old_len, ns_tpl, ns[0], ns[1]);
- free (ns);
- }
- free (nsm);
- // Base.
- char *base_uri_str = NULL;
- LSUP_rc rc = LSUP_nsmap_denormalize_uri (
- LSUP_graph_namespace (it->gr), LSUP_graph_uri (it->gr)->data,
- &base_uri_str);
- PRCCK (rc);
- char *base_stmt_tpl = "\n@base <%s> .\n\n";
- char *base_stmt = malloc (strlen (base_stmt_tpl) + strlen (base_uri_str));
- if (!UNLIKELY (base_stmt)) return LSUP_MEM_ERR;
- sprintf (base_stmt, base_stmt_tpl, base_uri_str);
- free (base_uri_str);
- res = realloc (res, strlen (res) + strlen (base_stmt) + 1);
- if (!UNLIKELY (res)) return LSUP_MEM_ERR;
- res = strcat (res, base_stmt);
- free (base_stmt);
- *res_p = res;
- it->rc = LSUP_OK;
- return LSUP_OK;
- }
- /// Encode all the triples for a single subject.
- static LSUP_rc
- gr_to_ttl_iter (void *h, char **res_p) {
- LSUP_TTLCodecIterator *it = h;
- if (it->rc == LSUP_NORESULT) return build_prolog (it, res_p);
- LSUP_Term *s = NULL;
- char *res = *res_p; // Result string will be reallocated.
- RCCK (LSUP_term_set_next (it->subjects, &it->s_cur, &s));
- term_to_ttl (s, LSUP_graph_namespace (it->gr), &res);
- LSUP_LinkMap *lmap = LSUP_graph_connections (
- it->gr, s, LSUP_LINK_OUTBOUND);
- LSUP_LinkMapIterator *lmit = LSUP_link_map_iter_new (lmap, s);
- LSUP_Term *p = NULL;
- LSUP_TermSet *o_ts = NULL;
- char *p_join = " ";
- // Begin predicate loop.
- while (LSUP_link_map_next (lmit, &p, &o_ts) != LSUP_END) {
- // Add predicate representation.
- RCCK (term_to_ttl (p, LSUP_graph_namespace (it->gr), &it->p_str));
- char *tmp = realloc (
- res, strlen (res) + strlen (it->p_str) + strlen (p_join) + 1);
- if (UNLIKELY (!tmp)) goto memfail;
- res = strcat (strcat (tmp, p_join), it->p_str);
- p_join = " ; ";
- // Add objects for predicate.
- size_t i = 0;
- LSUP_Term *o = NULL;
- char *o_join = " ";
- while (LSUP_term_set_next (o_ts, &i, &o) != LSUP_END) {
- it->rc = term_to_ttl (
- o, LSUP_graph_namespace (it->gr), &it->o_str);
- RCCK (it->rc);
- char *tmp = realloc (
- res, strlen (res) + strlen (it->o_str) + strlen (o_join) + 1);
- if (UNLIKELY (!tmp)) goto memfail;
- res = strcat (strcat (tmp, o_join), it->o_str);
- o_join = " , ";
- }
- }
- char *s_sep = " .\n";
- char *tmp = realloc (res, strlen (res) + strlen (s_sep) + 1);
- if (UNLIKELY (!tmp)) goto memfail;
- *res_p = strcat (tmp, s_sep);
- LSUP_link_map_iter_free (lmit);
- LSUP_link_map_free (lmap);
- return it->rc;
- memfail:
- free (res);
- *res_p = NULL;
- return LSUP_MEM_ERR;
- }
- static void
- gr_to_ttl_done (void *h)
- {
- LSUP_TTLCodecIterator *it = h;
- LSUP_term_set_free (it->subjects);
- free (it->s_str);
- free (it->p_str);
- free (it->o_str);
- free (it);
- }
- const LSUP_Codec ttl_codec = {
- .name = "Turtle",
- .mimetype = "text/turtle",
- .extension = "ttl",
- .encode_term = term_to_ttl,
- .encode_graph_init = gr_to_ttl_init,
- .encode_graph_iter = gr_to_ttl_iter,
- .encode_graph_done = gr_to_ttl_done,
- //.decode_term = LSUP_ttl_parse_term,
- .decode_graph = LSUP_ttl_parse_doc,
- };
|