codec.c 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. #include "codec.h"
  2. uint8_t *unescape_unicode (const uint8_t *esc_str, size_t size)
  3. {
  4. // Output will not be longer than the escaped sequence.
  5. uint8_t *data = malloc (size + 1);
  6. size_t len = 0; // Size of output string.
  7. uint8_t tmp_chr[9];
  8. for (size_t i = 0; i < size;) {
  9. int esc_len; // Size of escape sequence.
  10. if (esc_str[i] == '\\') {
  11. i++; // Skip over '\\'
  12. if (esc_str[i] == 'u') {
  13. // 4-hex (2 bytes) sequence.
  14. esc_len = 4;
  15. } else if (esc_str[i] == 'U') {
  16. // 8-hex (4 bytes) sequence.
  17. esc_len = 8;
  18. } else {
  19. // Unescape other escaped characters.
  20. data[len++] = unescape_char (esc_str[i++]);
  21. continue;
  22. }
  23. // Continue encoding UTF code point.
  24. i ++; // Skip over 'u' / 'U'
  25. // Use tmp_chr to hold the hex string for the code point.
  26. memcpy (tmp_chr, esc_str + i, esc_len);
  27. tmp_chr[esc_len] = '\0';
  28. uint32_t tmp_val = strtol ((char *) tmp_chr, NULL, 16);
  29. //log_debug ("tmp_val: %d", tmp_val);
  30. // Reuse tmp_chr to hold the byte values for the code point.
  31. int cp_len = utf8_encode (tmp_val, tmp_chr);
  32. if (cp_len == 0) {
  33. log_error ("Error encoding sequence: %s", tmp_chr);
  34. return NULL;
  35. }
  36. // Copy bytes into destination.
  37. memcpy (data + len, tmp_chr, cp_len);
  38. #if 0
  39. // This can generate a LOT of output.
  40. if (esc_len == 4)
  41. log_trace ("UC byte value: %2x %2x", data[len], data[len + 1]);
  42. else
  43. log_trace (
  44. "UC byte value: %2x %2x %2x %2x",
  45. data[len], data[len + 1], data[len + 2], data[len + 3]
  46. );
  47. #endif
  48. len += cp_len;
  49. i += esc_len;
  50. } else {
  51. data[len++] = esc_str[i++];
  52. }
  53. }
  54. data[len++] = '\0';
  55. uint8_t *ret = realloc (data, len); // Compact result.
  56. if (UNLIKELY (!ret)) return NULL;
  57. return ret;
  58. }
  59. LSUP_Term **
  60. LSUP_obj_list_add (LSUP_Term **ol, LSUP_Term *o)
  61. {
  62. size_t i = 0;
  63. while (ol[i++]); // Count includes sentinel.
  64. LSUP_Term **ret = realloc (ol, sizeof (*ol) * (i + 1));
  65. if (!ret) return NULL;
  66. ret[i - 1] = o;
  67. ret[i] = NULL;
  68. return ret;
  69. }
  70. LSUP_PredObjList *
  71. LSUP_pred_obj_list_new (void)
  72. {
  73. /*
  74. * Init state:
  75. * {p: [NULL], o: [NULL]}
  76. */
  77. LSUP_PredObjList *po;
  78. MALLOC_GUARD (po, NULL);
  79. // Set sentinels.
  80. CALLOC_GUARD (po->p, NULL);
  81. CALLOC_GUARD (po->o, NULL);
  82. return po;
  83. }
  84. void
  85. LSUP_pred_obj_list_free (LSUP_PredObjList *po)
  86. {
  87. for (size_t i = 0; po->p[i]; i++) {
  88. LSUP_term_free (po->p[i]);
  89. }
  90. for (size_t i = 0; po->o[i]; i++) {
  91. for (size_t j = 0; po->o[i][j]; j++) {
  92. LSUP_term_free (po->o[i][j]);
  93. }
  94. }
  95. free (po);
  96. }
  97. LSUP_rc
  98. LSUP_pred_obj_list_add (LSUP_PredObjList *po, LSUP_Term *p, LSUP_Term **o)
  99. {
  100. size_t i;
  101. i = 0;
  102. while (po->p[i++]); // Count includes sentinel.
  103. LSUP_Term **tmp_p = realloc (po->p, sizeof (*po->p) * (i + 1));
  104. if (!tmp_p) return LSUP_MEM_ERR;
  105. tmp_p[i - 1] = p;
  106. tmp_p[i] = NULL;
  107. po->p = tmp_p;
  108. i = 0;
  109. while (po->o[i++]);
  110. LSUP_Term ***tmp_o = realloc (po->o, sizeof (*po->o) * (i + 1));
  111. if (!tmp_o) return LSUP_MEM_ERR;
  112. tmp_o[i - 1] = o;
  113. tmp_o[i] = NULL;
  114. po->o = tmp_o;
  115. return LSUP_OK;
  116. }
  117. size_t
  118. LSUP_spo_list_add_triples (
  119. LSUP_GraphIterator *it, LSUP_Term *s, const LSUP_PredObjList *po)
  120. {
  121. size_t ct = 0;
  122. if (!s) {
  123. log_error ("Subject is NULL!");
  124. return LSUP_VALUE_ERR;
  125. }
  126. if (!po->p) {
  127. log_error ("Predicate is NULL!");
  128. return LSUP_VALUE_ERR;
  129. }
  130. if (!po->o) {
  131. log_error ("Object list is NULL!");
  132. return LSUP_VALUE_ERR;
  133. }
  134. LSUP_Triple *spo = LSUP_triple_new (s, NULL, NULL);
  135. for (size_t i = 0; po->p[i]; i++) {
  136. spo->p = po->p[i];
  137. for (size_t j = 0; po->o[i][j]; j++) {
  138. spo->o = po->o[i][j];
  139. LSUP_rc rc = LSUP_graph_add_iter (it, spo);
  140. if (rc == LSUP_OK) ct++;
  141. PRCCK (rc);
  142. }
  143. }
  144. free (spo);
  145. return ct;
  146. }
  147. LSUP_Term *
  148. LSUP_bnode_add_collection (LSUP_GraphIterator *it, LSUP_Term **ol)
  149. {
  150. LSUP_NSMap *nsm = LSUP_graph_namespace (LSUP_graph_iter_graph (it));
  151. LSUP_Term
  152. *s = LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL),
  153. *rdf_first = LSUP_iriref_new ("rdf:first", nsm),
  154. *rdf_rest = LSUP_iriref_new ("rdf:rest", nsm),
  155. *rdf_nil = LSUP_iriref_new ("rdf:nil", nsm),
  156. *link;
  157. LSUP_Triple *spo = TRP_DUMMY;
  158. link = s;
  159. for (size_t i = 0; ol[i]; i++) {
  160. spo->s = link;
  161. spo->p = rdf_first;
  162. spo->o = ol[i];
  163. PRCNL (LSUP_graph_add_iter (it, spo));
  164. spo->p = rdf_rest;
  165. spo->o = (
  166. ol[i + 1] ? LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL)
  167. : rdf_nil);
  168. PRCNL (LSUP_graph_add_iter (it, spo));
  169. if (link != s) LSUP_term_free (link);
  170. // Current object becomes next subject. Irrelevant for last item.
  171. link = spo->o;
  172. }
  173. LSUP_term_free (rdf_first);
  174. LSUP_term_free (rdf_rest);
  175. LSUP_term_free (rdf_nil);
  176. free (spo);
  177. return s;
  178. }
  179. /*
  180. * Extern inline functions.
  181. */
  182. char unescape_char (const char c);
  183. uint8_t *uint8_dup (const uint8_t *str);
  184. uint8_t *uint8_ndup (const uint8_t *str, size_t size);