codec.c 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. #include "codec.h"
  2. uint8_t *unescape_unicode (const uint8_t *esc_str)
  3. {
  4. size_t size = strlen ((char *)esc_str);
  5. uint8_t *data = malloc (size + 1);
  6. size_t j = 0;
  7. uint8_t tmp_chr[5];
  8. for (size_t i = 0; i < size;) {
  9. if (esc_str[i] == '\\') {
  10. i++; // Skip over '\\'
  11. // 4-hex sequence.
  12. if (esc_str[i] == 'u') {
  13. i ++; // Skip over 'u'
  14. // Use tmp_chr to hold the hex string for the code point.
  15. memcpy (tmp_chr, esc_str + i, sizeof (tmp_chr) - 1);
  16. tmp_chr[4] = '\0';
  17. uint32_t tmp_val = strtol ((char*)tmp_chr, NULL, 16);
  18. log_debug ("tmp_val: %d", tmp_val);
  19. // Reuse tmp_chr to hold the byte values for the code point.
  20. int nbytes = utf8_encode (tmp_val, tmp_chr);
  21. if (nbytes == 0)
  22. log_error ("Error encoding sequence: \\u%s", tmp_chr);
  23. // Copy bytes into destination.
  24. memcpy (data + j, tmp_chr, nbytes);
  25. log_trace ("UC byte value: %x %x", data[j], data[j + 1]);
  26. j += nbytes;
  27. i += 4;
  28. // 8-hex sequence.
  29. } else if (esc_str[i] == 'U') {
  30. i ++; // Skip over 'U'
  31. log_error ("4-byte sequence encoding is not yet implemented.");
  32. return NULL; // TODO encode 4-byte sequences
  33. // Unescape other escaped characters.
  34. } else data[j++] = unescape_char (esc_str[i++]);
  35. } else {
  36. // Copy ASCII char verbatim.
  37. data[j++] = esc_str[i++];
  38. }
  39. }
  40. data[j++] = '\0';
  41. uint8_t *tmp = realloc (data, j); // Compact result.
  42. if (UNLIKELY (!tmp)) return NULL;
  43. data = tmp;
  44. return data;
  45. }
  46. LSUP_Term **
  47. LSUP_obj_list_add (LSUP_Term **ol, LSUP_Term *o)
  48. {
  49. size_t i = 0;
  50. while (ol[i++]);
  51. LSUP_Term **tmp = realloc (ol, sizeof (*ol) * (i + 1));
  52. if (!tmp) return NULL;
  53. tmp[i] = o;
  54. tmp[i + 1] = NULL;
  55. return ol;
  56. }
  57. LSUP_PredObjList *
  58. LSUP_pred_obj_list_new (void)
  59. {
  60. /*
  61. * Init state:
  62. * {p: [NULL], o: [NULL]}
  63. */
  64. LSUP_PredObjList *po;
  65. MALLOC_GUARD (po, NULL);
  66. // Set sentinels.
  67. CALLOC_GUARD (po->p, NULL);
  68. CALLOC_GUARD (po->o, NULL);
  69. return po;
  70. }
  71. void
  72. LSUP_pred_obj_list_free (LSUP_PredObjList *po)
  73. {
  74. for (size_t i = 0; po->p[i]; i++) {
  75. LSUP_term_free (po->p[i]);
  76. }
  77. for (size_t i = 0; po->o[i]; i++) {
  78. for (size_t j = 0; po->o[i][j]; j++) {
  79. LSUP_term_free (po->o[i][j]);
  80. }
  81. }
  82. free (po);
  83. }
  84. LSUP_rc
  85. LSUP_pred_obj_list_add (LSUP_PredObjList *po, LSUP_Term *p, LSUP_Term **o)
  86. {
  87. size_t i;
  88. i = 0;
  89. while (po->p[i++]); // Count includes sentinel.
  90. LSUP_Term **tmp_p = realloc (po->p, sizeof (*po->p) * (i + 1));
  91. if (!tmp_p) return LSUP_MEM_ERR;
  92. tmp_p[i] = p;
  93. tmp_p[i + 1] = NULL;
  94. po->p = tmp_p;
  95. i = 0;
  96. while (po->o[i++]);
  97. LSUP_Term ***tmp_o = realloc (po->o, sizeof (*po->o) * (i + 1));
  98. if (!tmp_o) return LSUP_MEM_ERR;
  99. tmp_o[i] = o;
  100. tmp_o[i + 1] = NULL;
  101. po->o = tmp_o;
  102. return LSUP_OK;
  103. }
  104. LSUP_rc
  105. LSUP_spo_list_add_triples (
  106. LSUP_GraphIterator *it, LSUP_Term *s, const LSUP_PredObjList *po)
  107. {
  108. LSUP_Triple *spo = LSUP_triple_new (s, NULL, NULL);
  109. for (size_t i = 0; po->p[i]; i++) {
  110. spo->p = po->p[i];
  111. for (size_t j = 0; po->o[i][j]; j++) {
  112. spo->o = po->o[i][j];
  113. PRCCK (LSUP_graph_add_iter (it, spo));
  114. }
  115. }
  116. free (spo);
  117. return LSUP_OK;
  118. }
  119. LSUP_Term *
  120. LSUP_bnode_add_collection (LSUP_GraphIterator *it, LSUP_Term **ol)
  121. {
  122. LSUP_NSMap *nsm = LSUP_graph_namespace (LSUP_graph_iter_graph (it));
  123. LSUP_Term
  124. *s = LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL),
  125. *rdf_first = LSUP_iriref_new ("rdf:first", nsm),
  126. *rdf_rest = LSUP_iriref_new ("rdf:rest", nsm),
  127. *rdf_nil = LSUP_iriref_new ("rdf:nil", nsm),
  128. *link;
  129. LSUP_Triple *spo = TRP_DUMMY;
  130. link = s;
  131. for (size_t i = 0; ol[i]; i++) {
  132. spo->s = link;
  133. spo->p = rdf_first;
  134. spo->o = ol[i];
  135. PRCNL (LSUP_graph_add_iter (it, spo));
  136. spo->p = rdf_rest;
  137. spo->o = (
  138. ol[i + 1] ? LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL)
  139. : rdf_nil);
  140. PRCNL (LSUP_graph_add_iter (it, spo));
  141. if (link != s) LSUP_term_free (link);
  142. // Current object becomes next subject. Irrelevant for last item.
  143. link = spo->o;
  144. }
  145. LSUP_term_free (rdf_first);
  146. LSUP_term_free (rdf_rest);
  147. LSUP_term_free (rdf_nil);
  148. free (spo);
  149. return s;
  150. }
  151. /*
  152. * Extern inline functions.
  153. */
  154. char unescape_char (const char c);
  155. uint8_t *uint8_dup (const uint8_t *str);
  156. uint8_t *uint8_ndup (const uint8_t *str, size_t size);