#include "codec/codec_ttl.h" /** @brief NT codec iterator. * * This iterator yields one or more triples at a time, one group per subject, * with the most compact form allowed by Turtle, e.g. * * :s :p1 :o1, :o2, o3; p2 o4, o5, . */ typedef struct { const LSUP_Codec * codec; ///< Codec that generated this iterator. const LSUP_Graph * gr; ///< Graph being encoded. LSUP_TermSet * subjects; ///< All subjects in the graph. size_t s_cur; ///< Term set cursor. LSUP_rc rc; ///< Internal return code. char * s_str; ///< Serialized subject block (output). char * p_str; ///< Serialized predicate block. char * o_str; ///< Serialized object block. } LSUP_TTLCodecIterator; /* * * Codec functions. * * */ static LSUP_rc term_to_ttl (const LSUP_Term *term, const LSUP_NSMap *nsm, char **out_p) { LSUP_rc rc; char *tmp = NULL, *out; char *metadata = NULL; size_t buf_len; LSUP_rc md_rc = LSUP_NORESULT; switch (term->type) { case LSUP_TERM_IRIREF: if (strcmp (term->data, LSUP_RDF_TYPE) == 0) { // Shorten RDF type buf_len = 2; tmp = "a"; } else { md_rc = LSUP_nsmap_denormalize_uri (nsm, term->data, &tmp); PRCCK (md_rc); if (md_rc == LSUP_NORESULT) { // If URI counld not be shortened, add `<>` out = realloc (*out_p, strlen (tmp) + 3); if (UNLIKELY (!out)) return LSUP_MEM_ERR; sprintf (out, "<%s>", tmp); free (tmp); } else { free (*out_p); out = tmp; } } rc = LSUP_OK; break; case LSUP_TERM_NS_IRIREF: if (strcmp (term->data, LSUP_RDF_TYPE_NS) == 0) { // Shorten RDF type tmp = "a"; buf_len = 2; } else { tmp = term->data; buf_len = strlen (term->data) + 1; } out = realloc (*out_p, buf_len); if (UNLIKELY (!out)) return LSUP_MEM_ERR; strcpy (out, tmp); rc = LSUP_OK; break; case LSUP_TERM_LITERAL: // Calculate string length. if (escape_lit (term->data, &tmp) != LSUP_OK) return LSUP_ERROR; buf_len = strlen (tmp) + 3; // Room for "" and terminator // Data type. bool shorten = false; if ( term->datatype != 0 && term->datatype != LSUP_default_datatype ) { md_rc = LSUP_nsmap_denormalize_uri ( nsm, term->datatype->data, &metadata); PRCCK (md_rc); unsigned padding = 0; // Shorten numeric and boolean types. if (strcmp (metadata, "xsd:integer") == 0) { // TODO check for valid format. shorten = true; } else if (strcmp (metadata, "xsd:double") == 0) { // TODO check for valid format. shorten = true; } else if (strcmp (metadata, "xsd:decimal") == 0) { // TODO check for valid format. shorten = true; } else if (strcmp (metadata, "xsd:boolean") == 0) { // TODO check for valid format. shorten = true; } else { // Room for `^^<>` for FQURI, `^^` for NS URI padding = md_rc == LSUP_NORESULT ? 4 : 2; } buf_len += strlen (metadata) + padding; } out = realloc (*out_p, buf_len); if (UNLIKELY (!out)) return LSUP_MEM_ERR; if (shorten) { strcpy (out, tmp); } else if (metadata) { char *fmt = ( md_rc == LSUP_NORESULT ? "\"%s\"^^<%s>" : "\"%s\"^^%s"); sprintf (out, fmt, tmp, metadata); } else { sprintf (out, "\"%s\"", tmp); } free (tmp); rc = LSUP_OK; break; case LSUP_TERM_LT_LITERAL: // Calculate string length. if (escape_lit (term->data, &tmp) != LSUP_OK) return LSUP_ERROR; buf_len = strlen (tmp) + 3; // Room for "" and terminator if (term->lang[0] != '\0') { metadata = strndup (term->lang, sizeof (LSUP_LangTag)); buf_len += strlen (metadata) + 1; // Room for @ } out = realloc (*out_p, buf_len); if (UNLIKELY (!out)) return LSUP_MEM_ERR; sprintf (out, "\"%s\"", tmp); free (tmp); // Add lang. if (metadata) out = strcat (strcat (out, "@"), metadata); rc = LSUP_OK; break; case LSUP_TERM_BNODE: out = realloc (*out_p, strlen (term->data) + 3); if (UNLIKELY (!out)) return LSUP_MEM_ERR; sprintf (out, "_:%s", term->data); rc = LSUP_OK; break; default: out = *out_p; // This is considered garbage. rc = LSUP_PARSE_ERR; } free (metadata); *out_p = out; return rc; } static void * gr_to_ttl_init (const LSUP_Graph *gr) { LSUP_TTLCodecIterator *it; CALLOC_GUARD (it, NULL); it->codec = &ttl_codec; it->gr = gr; it->subjects = LSUP_graph_unique_terms (gr, TRP_POS_S); // Sets the condition to build the prolog on 1st iteration. it->rc = LSUP_NORESULT; return it; } /// Build header and prolog. static LSUP_rc build_prolog (LSUP_TTLCodecIterator *it, char **res_p) { char *res = fmt_header ("# "); const char ***nsm = LSUP_nsmap_dump (LSUP_graph_namespace (it->gr)); char *ns_tpl = "@prefix %s: <%s> .\n"; // Prefix map. for (size_t i = 0; nsm[i]; i++) { const char **ns = nsm[i]; size_t old_len = strlen (res); size_t ns_len = strlen (ns[0]) + strlen (ns[1]) + strlen (ns_tpl); char *tmp = realloc (res, old_len + ns_len + 1); if (UNLIKELY (!tmp)) return LSUP_MEM_ERR; res = tmp; sprintf (res + old_len, ns_tpl, ns[0], ns[1]); free (ns); } free (nsm); // Base. char *base_uri_str = NULL; LSUP_rc rc = LSUP_nsmap_denormalize_uri ( LSUP_graph_namespace (it->gr), LSUP_graph_uri (it->gr)->data, &base_uri_str); PRCCK (rc); char *base_stmt_tpl = "\n@base <%s> .\n\n"; char *base_stmt = malloc (strlen (base_stmt_tpl) + strlen (base_uri_str)); if (!UNLIKELY (base_stmt)) return LSUP_MEM_ERR; sprintf (base_stmt, base_stmt_tpl, base_uri_str); free (base_uri_str); res = realloc (res, strlen (res) + strlen (base_stmt) + 1); if (!UNLIKELY (res)) return LSUP_MEM_ERR; res = strcat (res, base_stmt); free (base_stmt); *res_p = res; it->rc = LSUP_OK; return LSUP_OK; } /// Encode all the triples for a single subject. static LSUP_rc gr_to_ttl_iter (void *h, char **res_p) { LSUP_TTLCodecIterator *it = h; if (it->rc == LSUP_NORESULT) return build_prolog (it, res_p); LSUP_Term *s = NULL; char *res = *res_p; // Result string will be reallocated. RCCK (LSUP_term_set_next (it->subjects, &it->s_cur, &s)); term_to_ttl (s, LSUP_graph_namespace (it->gr), &res); LSUP_LinkMap *lmap = LSUP_graph_connections ( it->gr, s, LSUP_LINK_OUTBOUND); LSUP_LinkMapIterator *lmit = LSUP_link_map_iter_new (lmap, s); LSUP_Term *p = NULL; LSUP_TermSet *o_ts = NULL; char *p_join = " "; // Begin predicate loop. while (LSUP_link_map_next (lmit, &p, &o_ts) != LSUP_END) { // Add predicate representation. RCCK (term_to_ttl (p, LSUP_graph_namespace (it->gr), &it->p_str)); char *tmp = realloc ( res, strlen (res) + strlen (it->p_str) + strlen (p_join) + 1); if (UNLIKELY (!tmp)) goto memfail; res = strcat (strcat (tmp, p_join), it->p_str); p_join = " ; "; // Add objects for predicate. size_t i = 0; LSUP_Term *o = NULL; char *o_join = " "; while (LSUP_term_set_next (o_ts, &i, &o) != LSUP_END) { it->rc = term_to_ttl ( o, LSUP_graph_namespace (it->gr), &it->o_str); RCCK (it->rc); char *tmp = realloc ( res, strlen (res) + strlen (it->o_str) + strlen (o_join) + 1); if (UNLIKELY (!tmp)) goto memfail; res = strcat (strcat (tmp, o_join), it->o_str); o_join = " , "; } } char *s_sep = " .\n"; char *tmp = realloc (res, strlen (res) + strlen (s_sep) + 1); if (UNLIKELY (!tmp)) goto memfail; *res_p = strcat (tmp, s_sep); LSUP_link_map_iter_free (lmit); LSUP_link_map_free (lmap); return it->rc; memfail: free (res); *res_p = NULL; return LSUP_MEM_ERR; } static void gr_to_ttl_done (void *h) { LSUP_TTLCodecIterator *it = h; LSUP_term_set_free (it->subjects); free (it->s_str); free (it->p_str); free (it->o_str); free (it); } const LSUP_Codec ttl_codec = { .name = "Turtle", .mimetype = "text/turtle", .extension = "ttl", .encode_term = term_to_ttl, .encode_graph_init = gr_to_ttl_init, .encode_graph_iter = gr_to_ttl_iter, .encode_graph_done = gr_to_ttl_done, //.decode_term = LSUP_ttl_parse_term, .decode_graph = LSUP_ttl_parse_doc, };