123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223 |
- #include "codec.h"
- uint8_t *unescape_unicode (const uint8_t *esc_str, size_t size)
- {
- // Output will not be longer than the escaped sequence.
- uint8_t *data = malloc (size + 1);
- size_t len = 0; // Size of output string.
- uint8_t tmp_chr[9];
- for (size_t i = 0; i < size;) {
- int esc_len; // Size of escape sequence.
- if (esc_str[i] == '\\') {
- i++; // Skip over '\\'
- if (esc_str[i] == 'u') {
- // 4-hex (2 bytes) sequence.
- esc_len = 4;
- } else if (esc_str[i] == 'U') {
- // 8-hex (4 bytes) sequence.
- esc_len = 8;
- } else {
- // Unescape other escaped characters.
- data[len++] = unescape_char (esc_str[i++]);
- continue;
- }
- // Continue encoding UTF code point.
- i ++; // Skip over 'u' / 'U'
- // Use tmp_chr to hold the hex string for the code point.
- memcpy (tmp_chr, esc_str + i, esc_len);
- tmp_chr[esc_len] = '\0';
- uint32_t tmp_val = strtol ((char *) tmp_chr, NULL, 16);
- log_debug ("tmp_val: %d", tmp_val);
- // Reuse tmp_chr to hold the byte values for the code point.
- int cp_len = utf8_encode (tmp_val, tmp_chr);
- if (cp_len == 0) {
- log_error ("Error encoding sequence: %s", tmp_chr);
- return NULL;
- }
- // Copy bytes into destination.
- memcpy (data + len, tmp_chr, cp_len);
- if (esc_len == 4)
- log_trace ("UC byte value: %2x %2x", data[len], data[len + 1]);
- else
- log_trace (
- "UC byte value: %2x %2x %2x %2x",
- data[len], data[len + 1], data[len + 2], data[len + 3]
- );
- len += cp_len;
- i += esc_len;
- } else {
- data[len++] = esc_str[i++];
- }
- }
- data[len++] = '\0';
- uint8_t *ret = realloc (data, len); // Compact result.
- if (UNLIKELY (!ret)) return NULL;
- return ret;
- }
- LSUP_Term **
- LSUP_obj_list_add (LSUP_Term **ol, LSUP_Term *o)
- {
- size_t i = 0;
- while (ol[i++]); // Count includes sentinel.
- LSUP_Term **ret = realloc (ol, sizeof (*ol) * (i + 1));
- if (!ret) return NULL;
- ret[i - 1] = o;
- ret[i] = NULL;
- return ret;
- }
- LSUP_PredObjList *
- LSUP_pred_obj_list_new (void)
- {
- /*
- * Init state:
- * {p: [NULL], o: [NULL]}
- */
- LSUP_PredObjList *po;
- MALLOC_GUARD (po, NULL);
- // Set sentinels.
- CALLOC_GUARD (po->p, NULL);
- CALLOC_GUARD (po->o, NULL);
- return po;
- }
- void
- LSUP_pred_obj_list_free (LSUP_PredObjList *po)
- {
- for (size_t i = 0; po->p[i]; i++) {
- LSUP_term_free (po->p[i]);
- }
- for (size_t i = 0; po->o[i]; i++) {
- for (size_t j = 0; po->o[i][j]; j++) {
- LSUP_term_free (po->o[i][j]);
- }
- }
- free (po);
- }
- LSUP_rc
- LSUP_pred_obj_list_add (LSUP_PredObjList *po, LSUP_Term *p, LSUP_Term **o)
- {
- size_t i;
- i = 0;
- while (po->p[i++]); // Count includes sentinel.
- LSUP_Term **tmp_p = realloc (po->p, sizeof (*po->p) * (i + 1));
- if (!tmp_p) return LSUP_MEM_ERR;
- tmp_p[i - 1] = p;
- tmp_p[i] = NULL;
- po->p = tmp_p;
- i = 0;
- while (po->o[i++]);
- LSUP_Term ***tmp_o = realloc (po->o, sizeof (*po->o) * (i + 1));
- if (!tmp_o) return LSUP_MEM_ERR;
- tmp_o[i - 1] = o;
- tmp_o[i] = NULL;
- po->o = tmp_o;
- return LSUP_OK;
- }
- size_t
- LSUP_spo_list_add_triples (
- LSUP_GraphIterator *it, LSUP_Term *s, const LSUP_PredObjList *po)
- {
- size_t ct = 0;
- if (!s) {
- log_error ("Subject is NULL!");
- return LSUP_VALUE_ERR;
- }
- if (!po->p) {
- log_error ("Predicate is NULL!");
- return LSUP_VALUE_ERR;
- }
- if (!po->o) {
- log_error ("Object list is NULL!");
- return LSUP_VALUE_ERR;
- }
- LSUP_Triple *spo = LSUP_triple_new (s, NULL, NULL);
- for (size_t i = 0; po->p[i]; i++) {
- spo->p = po->p[i];
- for (size_t j = 0; po->o[i][j]; j++) {
- spo->o = po->o[i][j];
- LSUP_rc rc = LSUP_graph_add_iter (it, spo);
- if (rc == LSUP_OK) ct++;
- PRCCK (rc);
- }
- }
- free (spo);
- return ct;
- }
- LSUP_Term *
- LSUP_bnode_add_collection (LSUP_GraphIterator *it, LSUP_Term **ol)
- {
- LSUP_NSMap *nsm = LSUP_graph_namespace (LSUP_graph_iter_graph (it));
- LSUP_Term
- *s = LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL),
- *rdf_first = LSUP_iriref_new ("rdf:first", nsm),
- *rdf_rest = LSUP_iriref_new ("rdf:rest", nsm),
- *rdf_nil = LSUP_iriref_new ("rdf:nil", nsm),
- *link;
- LSUP_Triple *spo = TRP_DUMMY;
- link = s;
- for (size_t i = 0; ol[i]; i++) {
- spo->s = link;
- spo->p = rdf_first;
- spo->o = ol[i];
- PRCNL (LSUP_graph_add_iter (it, spo));
- spo->p = rdf_rest;
- spo->o = (
- ol[i + 1] ? LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL)
- : rdf_nil);
- PRCNL (LSUP_graph_add_iter (it, spo));
- if (link != s) LSUP_term_free (link);
- // Current object becomes next subject. Irrelevant for last item.
- link = spo->o;
- }
- LSUP_term_free (rdf_first);
- LSUP_term_free (rdf_rest);
- LSUP_term_free (rdf_nil);
- free (spo);
- return s;
- }
- /*
- * Extern inline functions.
- */
- char unescape_char (const char c);
- uint8_t *uint8_dup (const uint8_t *str);
- uint8_t *uint8_ndup (const uint8_t *str, size_t size);
|