codec.c 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. #include "codec.h"
  2. uint8_t *unescape_unicode (const uint8_t *esc_str, size_t size)
  3. {
  4. // Output will not be longer than the escaped sequence.
  5. uint8_t *data = malloc (size + 1);
  6. size_t len = 0; // Size of output string.
  7. uint8_t tmp_chr[9];
  8. for (size_t i = 0; i < size;) {
  9. int esc_len; // Size of escape sequence.
  10. if (esc_str[i] == '\\') {
  11. i++; // Skip over '\\'
  12. if (esc_str[i] == 'u') {
  13. // 4-hex (2 bytes) sequence.
  14. esc_len = 4;
  15. } else if (esc_str[i] == 'U') {
  16. // 8-hex (4 bytes) sequence.
  17. esc_len = 8;
  18. } else {
  19. // Unescape other escaped characters.
  20. data[len++] = unescape_char (esc_str[i++]);
  21. continue;
  22. }
  23. // Continue encoding UTF code point.
  24. i ++; // Skip over 'u' / 'U'
  25. // Use tmp_chr to hold the hex string for the code point.
  26. memcpy (tmp_chr, esc_str + i, esc_len);
  27. tmp_chr[esc_len] = '\0';
  28. uint32_t tmp_val = strtol ((char *) tmp_chr, NULL, 16);
  29. log_debug ("tmp_val: %d", tmp_val);
  30. // Reuse tmp_chr to hold the byte values for the code point.
  31. int cp_len = utf8_encode (tmp_val, tmp_chr);
  32. if (cp_len == 0) {
  33. log_error ("Error encoding sequence: %s", tmp_chr);
  34. return NULL;
  35. }
  36. // Copy bytes into destination.
  37. memcpy (data + len, tmp_chr, cp_len);
  38. if (esc_len == 4)
  39. log_trace ("UC byte value: %2x %2x", data[len], data[len + 1]);
  40. else
  41. log_trace (
  42. "UC byte value: %2x %2x %2x %2x",
  43. data[len], data[len + 1], data[len + 2], data[len + 3]
  44. );
  45. len += cp_len;
  46. i += esc_len;
  47. } else {
  48. data[len++] = esc_str[i++];
  49. }
  50. }
  51. data[len++] = '\0';
  52. uint8_t *ret = realloc (data, len); // Compact result.
  53. if (UNLIKELY (!ret)) return NULL;
  54. return ret;
  55. }
  56. LSUP_Term **
  57. LSUP_obj_list_add (LSUP_Term **ol, LSUP_Term *o)
  58. {
  59. size_t i = 0;
  60. while (ol[i++]); // Count includes sentinel.
  61. LSUP_Term **ret = realloc (ol, sizeof (*ol) * (i + 1));
  62. if (!ret) return NULL;
  63. ret[i - 1] = o;
  64. ret[i] = NULL;
  65. return ret;
  66. }
  67. LSUP_PredObjList *
  68. LSUP_pred_obj_list_new (void)
  69. {
  70. /*
  71. * Init state:
  72. * {p: [NULL], o: [NULL]}
  73. */
  74. LSUP_PredObjList *po;
  75. MALLOC_GUARD (po, NULL);
  76. // Set sentinels.
  77. CALLOC_GUARD (po->p, NULL);
  78. CALLOC_GUARD (po->o, NULL);
  79. return po;
  80. }
  81. void
  82. LSUP_pred_obj_list_free (LSUP_PredObjList *po)
  83. {
  84. for (size_t i = 0; po->p[i]; i++) {
  85. LSUP_term_free (po->p[i]);
  86. }
  87. for (size_t i = 0; po->o[i]; i++) {
  88. for (size_t j = 0; po->o[i][j]; j++) {
  89. LSUP_term_free (po->o[i][j]);
  90. }
  91. }
  92. free (po);
  93. }
  94. LSUP_rc
  95. LSUP_pred_obj_list_add (LSUP_PredObjList *po, LSUP_Term *p, LSUP_Term **o)
  96. {
  97. size_t i;
  98. i = 0;
  99. while (po->p[i++]); // Count includes sentinel.
  100. LSUP_Term **tmp_p = realloc (po->p, sizeof (*po->p) * (i + 1));
  101. if (!tmp_p) return LSUP_MEM_ERR;
  102. tmp_p[i - 1] = p;
  103. tmp_p[i] = NULL;
  104. po->p = tmp_p;
  105. i = 0;
  106. while (po->o[i++]);
  107. LSUP_Term ***tmp_o = realloc (po->o, sizeof (*po->o) * (i + 1));
  108. if (!tmp_o) return LSUP_MEM_ERR;
  109. tmp_o[i - 1] = o;
  110. tmp_o[i] = NULL;
  111. po->o = tmp_o;
  112. return LSUP_OK;
  113. }
  114. size_t
  115. LSUP_spo_list_add_triples (
  116. LSUP_GraphIterator *it, LSUP_Term *s, const LSUP_PredObjList *po)
  117. {
  118. size_t ct = 0;
  119. if (!s) {
  120. log_error ("Subject is NULL!");
  121. return LSUP_VALUE_ERR;
  122. }
  123. if (!po->p) {
  124. log_error ("Predicate is NULL!");
  125. return LSUP_VALUE_ERR;
  126. }
  127. if (!po->o) {
  128. log_error ("Object list is NULL!");
  129. return LSUP_VALUE_ERR;
  130. }
  131. LSUP_Triple *spo = LSUP_triple_new (s, NULL, NULL);
  132. for (size_t i = 0; po->p[i]; i++) {
  133. spo->p = po->p[i];
  134. for (size_t j = 0; po->o[i][j]; j++) {
  135. spo->o = po->o[i][j];
  136. LSUP_rc rc = LSUP_graph_add_iter (it, spo);
  137. if (rc == LSUP_OK) ct++;
  138. PRCCK (rc);
  139. }
  140. }
  141. free (spo);
  142. return ct;
  143. }
  144. LSUP_Term *
  145. LSUP_bnode_add_collection (LSUP_GraphIterator *it, LSUP_Term **ol)
  146. {
  147. LSUP_NSMap *nsm = LSUP_graph_namespace (LSUP_graph_iter_graph (it));
  148. LSUP_Term
  149. *s = LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL),
  150. *rdf_first = LSUP_iriref_new ("rdf:first", nsm),
  151. *rdf_rest = LSUP_iriref_new ("rdf:rest", nsm),
  152. *rdf_nil = LSUP_iriref_new ("rdf:nil", nsm),
  153. *link;
  154. LSUP_Triple *spo = TRP_DUMMY;
  155. link = s;
  156. for (size_t i = 0; ol[i]; i++) {
  157. spo->s = link;
  158. spo->p = rdf_first;
  159. spo->o = ol[i];
  160. PRCNL (LSUP_graph_add_iter (it, spo));
  161. spo->p = rdf_rest;
  162. spo->o = (
  163. ol[i + 1] ? LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL)
  164. : rdf_nil);
  165. PRCNL (LSUP_graph_add_iter (it, spo));
  166. if (link != s) LSUP_term_free (link);
  167. // Current object becomes next subject. Irrelevant for last item.
  168. link = spo->o;
  169. }
  170. LSUP_term_free (rdf_first);
  171. LSUP_term_free (rdf_rest);
  172. LSUP_term_free (rdf_nil);
  173. free (spo);
  174. return s;
  175. }
  176. /*
  177. * Extern inline functions.
  178. */
  179. char unescape_char (const char c);
  180. uint8_t *uint8_dup (const uint8_t *str);
  181. uint8_t *uint8_ndup (const uint8_t *str, size_t size);