#include "volksdata/codec/codec_ttl.h" /** @brief NT codec iterator. * * This iterator yields one or more triples at a time, one group per subject, * with the most compact form allowed by Turtle, e.g. * * ``` * :s :p1 :o1, :o2, o3; :p2 :o4, :o5, . * ``` */ typedef struct { const VOLK_Codec * codec; ///< Codec that generated this iterator. const VOLK_Graph * gr; ///< Graph being encoded. VOLK_TermSet * subjects; ///< All subjects in the graph. size_t s_cur; ///< Term set cursor. VOLK_rc rc; ///< Internal return code. char * s_str; ///< Serialized subject block (output). char * p_str; ///< Serialized predicate block. char * o_str; ///< Serialized object block. } VOLK_TTLCodecIterator; /* * * Codec functions. * * */ static VOLK_rc term_to_ttl (const VOLK_Term *term, char **out_p) { VOLK_rc rc; char *tmp = NULL, *out = NULL, *metadata = NULL; size_t buf_len; VOLK_rc md_rc = VOLK_NORESULT; switch (term->type) { case VOLK_TERM_IRIREF: if (strcmp (term->data, VOLK_RDF_TYPE) == 0) { // Shorten RDF type buf_len = 2; out = realloc (*out_p, 2); if (UNLIKELY (!out)) return VOLK_MEM_ERR; out[0] = 'a'; out[1] = '\0'; } else { md_rc = VOLK_nsmap_denormalize_uri (term->data, &tmp); PRCCK (md_rc); if (md_rc == VOLK_NORESULT) { // If URI counld not be shortened, add `<>` // and copy term from the original. out = realloc (*out_p, strlen (term->data) + 3); if (UNLIKELY (!out)) return VOLK_MEM_ERR; sprintf (out, "<%s>", term->data); } else { // If URI was shortened, write it out without `<>` and // use previously allocated data from denormalization. // Free previous output pointer free (*out_p); out = tmp; } } rc = VOLK_OK; break; case VOLK_TERM_LITERAL: // Calculate string length. if (escape_lit (term->data, &tmp) != VOLK_OK) return VOLK_ERROR; buf_len = strlen (tmp) + 3; // Room for "" and terminator // Data type. bool shorten = false; if ( term->datatype != 0 && term->datatype != VOLK_default_datatype ) { md_rc = VOLK_nsmap_denormalize_uri ( term->datatype->data, &metadata); PRCCK (md_rc); unsigned padding = 0; // Shorten numeric and boolean types. if (strcmp (metadata, "xsd:integer") == 0) { // TODO check for valid format. shorten = true; } else if (strcmp (metadata, "xsd:double") == 0) { // TODO check for valid format. shorten = true; } else if (strcmp (metadata, "xsd:decimal") == 0) { // TODO check for valid format. shorten = true; } else if (strcmp (metadata, "xsd:boolean") == 0) { // TODO check for valid format. shorten = true; } else { // Room for `^^<>` for FQURI, `^^` for NS URI padding = md_rc == VOLK_NORESULT ? 4 : 2; } buf_len += strlen (metadata) + padding; } out = realloc (*out_p, buf_len); if (UNLIKELY (!out)) return VOLK_MEM_ERR; if (shorten) { strcpy (out, tmp); } else if (metadata) { char *fmt = ( md_rc == VOLK_NORESULT ? "\"%s\"^^<%s>" : "\"%s\"^^%s"); sprintf (out, fmt, tmp, metadata); } else { sprintf (out, "\"%s\"", tmp); } free (tmp); rc = VOLK_OK; break; case VOLK_TERM_LT_LITERAL: // Calculate string length. if (escape_lit (term->data, &tmp) != VOLK_OK) return VOLK_ERROR; buf_len = strlen (tmp) + 3; // Room for "" and terminator if (term->lang[0] != '\0') { metadata = strndup (term->lang, sizeof (VOLK_LangTag)); buf_len += strlen (metadata) + 1; // Room for @ } out = realloc (*out_p, buf_len); if (UNLIKELY (!out)) return VOLK_MEM_ERR; sprintf (out, "\"%s\"", tmp); free (tmp); // Add lang. if (metadata) out = strcat (strcat (out, "@"), metadata); rc = VOLK_OK; break; case VOLK_TERM_BNODE: out = realloc (*out_p, strlen (term->data) + 3); if (UNLIKELY (!out)) return VOLK_MEM_ERR; sprintf (out, "_:%s", term->data); rc = VOLK_OK; break; default: out = *out_p; // This is considered garbage. log_error ("Invalid term type: %d", term->type); rc = VOLK_PARSE_ERR; } free (metadata); *out_p = out; return rc; } static void * gr_to_ttl_init (const VOLK_Graph *gr) { VOLK_TTLCodecIterator *it; CALLOC_GUARD (it, NULL); it->codec = &ttl_codec; it->gr = gr; it->subjects = VOLK_graph_unique_terms (gr, TRP_POS_S); // Sets the condition to build the prolog on 1st iteration. it->rc = VOLK_NORESULT; return it; } /// Build header and prolog. static VOLK_rc build_prolog (VOLK_TTLCodecIterator *it, char **res_p) { char *res = fmt_header ("# "); const char ***nsm = VOLK_nsmap_dump (); char *ns_tpl = "@prefix %s: <%s> .\n"; // Prefix map. for (size_t i = 0; nsm[i]; i++) { const char **ns = nsm[i]; size_t old_len = strlen (res); size_t ns_len = strlen (ns[0]) + strlen (ns[1]) + strlen (ns_tpl); char *tmp = realloc (res, old_len + ns_len + 1); if (UNLIKELY (!tmp)) return VOLK_MEM_ERR; res = tmp; sprintf (res + old_len, ns_tpl, ns[0], ns[1]); free (ns); } free (nsm); // Base. char *base_uri_str = VOLK_graph_uri (it->gr)->data; char *base_stmt_tpl = "\n@base <%s> .\n\n"; char *base_stmt = malloc ( strlen (base_stmt_tpl) + strlen (base_uri_str) + 1); if (!UNLIKELY (base_stmt)) return VOLK_MEM_ERR; sprintf (base_stmt, base_stmt_tpl, base_uri_str); res = realloc (res, strlen (res) + strlen (base_stmt) + 1); if (!UNLIKELY (res)) return VOLK_MEM_ERR; res = strcat (res, base_stmt); free (base_stmt); *res_p = res; it->rc = VOLK_OK; return VOLK_OK; } /// Encode all the triples for a single subject. static VOLK_rc gr_to_ttl_iter (void *h, char **res_p) { VOLK_TTLCodecIterator *it = h; if (it->rc == VOLK_NORESULT) return build_prolog (it, res_p); VOLK_Term *s = NULL; char *res = *res_p; // Result string will be reallocated. VOLK_rc rc = VOLK_term_set_next (it->subjects, &it->s_cur, &s); if (rc == VOLK_END) return rc; // Return without logging error. RCCK (rc); // Log error or warning for anything else. term_to_ttl (s, &res); VOLK_LinkMap *lmap = VOLK_graph_connections ( it->gr, s, VOLK_LINK_OUTBOUND); VOLK_LinkMapIterator *lmit = VOLK_link_map_iter_new (lmap); VOLK_Term *p = NULL; VOLK_TermSet *o_ts = NULL; char *p_join = "\n "; // Newline & indent after subject. // Begin predicate loop. while (VOLK_link_map_next (lmit, &p, &o_ts) != VOLK_END) { // Add predicate representation. RCCK (term_to_ttl (p, &it->p_str)); char *tmp = realloc ( res, strlen (res) + strlen (it->p_str) + strlen (p_join) + 1); if (UNLIKELY (!tmp)) goto memfail; res = strcat (strcat (tmp, p_join), it->p_str); p_join = " ;\n "; // Add objects for predicate. size_t i = 0; VOLK_Term *o = NULL; char *o_join = " "; while (VOLK_term_set_next (o_ts, &i, &o) != VOLK_END) { it->rc = term_to_ttl (o, &it->o_str); RCCK (it->rc); char *tmp = realloc ( res, strlen (res) + strlen (it->o_str) + strlen (o_join) + 1); if (UNLIKELY (!tmp)) goto memfail; res = strcat (strcat (tmp, o_join), it->o_str); o_join = " ,\n "; // Double indent for objects. } } char *s_sep = "\n.\n\n"; // Period goes on its own line for visibility. char *tmp = realloc (res, strlen (res) + strlen (s_sep) + 1); if (UNLIKELY (!tmp)) goto memfail; *res_p = strcat (tmp, s_sep); VOLK_link_map_iter_free (lmit); VOLK_link_map_free (lmap); return it->rc; memfail: free (res); *res_p = NULL; return VOLK_MEM_ERR; } static void gr_to_ttl_done (void *h) { VOLK_TTLCodecIterator *it = h; VOLK_term_set_free (it->subjects); free (it->s_str); free (it->p_str); free (it->o_str); free (it); } const VOLK_Codec ttl_codec = { .name = "Turtle", .mimetype = "text/turtle", .extension = "ttl", .encode_term = term_to_ttl, .encode_graph_init = gr_to_ttl_init, .encode_graph_iter = gr_to_ttl_iter, .encode_graph_done = gr_to_ttl_done, //.decode_term = VOLK_ttl_parse_term, .decode_graph = VOLK_ttl_parse_doc, };