codec.c 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. #include "codec.h"
  2. /** @brief List of characters to be escaped in serialized literals.
  3. *
  4. * @sa https://www.w3.org/TR/n-triples/#grammar-production-ECHAR
  5. */
  6. #define LIT_ECHAR "\t\b\n\r\f\"\'\\"
  7. uint8_t *unescape_unicode (const uint8_t *esc_str, size_t size)
  8. {
  9. // Output will not be longer than the escaped sequence.
  10. uint8_t *data = malloc (size + 1);
  11. size_t len = 0; // Size of output string.
  12. uint8_t tmp_chr[9];
  13. for (size_t i = 0; i < size;) {
  14. int esc_len; // Size of escape sequence.
  15. if (esc_str[i] == '\\') {
  16. i++; // Skip over '\\'
  17. if (esc_str[i] == 'u') {
  18. // 4-hex (2 bytes) sequence.
  19. esc_len = 4;
  20. } else if (esc_str[i] == 'U') {
  21. // 8-hex (4 bytes) sequence.
  22. esc_len = 8;
  23. } else {
  24. // Unescape other escaped characters.
  25. data[len++] = unescape_char (esc_str[i++]);
  26. continue;
  27. }
  28. // Continue encoding UTF code point.
  29. i ++; // Skip over 'u' / 'U'
  30. // Use tmp_chr to hold the hex string for the code point.
  31. memcpy (tmp_chr, esc_str + i, esc_len);
  32. tmp_chr[esc_len] = '\0';
  33. uint32_t tmp_val = strtol ((char *) tmp_chr, NULL, 16);
  34. //log_debug ("tmp_val: %d", tmp_val);
  35. // Reuse tmp_chr to hold the byte values for the code point.
  36. int cp_len = utf8_encode (tmp_val, tmp_chr);
  37. if (cp_len == 0) {
  38. log_error ("Error encoding sequence: %s", tmp_chr);
  39. return NULL;
  40. }
  41. // Copy bytes into destination.
  42. memcpy (data + len, tmp_chr, cp_len);
  43. #if 0
  44. // This can generate a LOT of output.
  45. if (esc_len == 4)
  46. log_trace ("UC byte value: %2x %2x", data[len], data[len + 1]);
  47. else
  48. log_trace (
  49. "UC byte value: %2x %2x %2x %2x",
  50. data[len], data[len + 1], data[len + 2], data[len + 3]
  51. );
  52. #endif
  53. len += cp_len;
  54. i += esc_len;
  55. } else {
  56. data[len++] = esc_str[i++];
  57. }
  58. }
  59. data[len++] = '\0';
  60. uint8_t *ret = realloc (data, len); // Compact result.
  61. if (UNLIKELY (!ret)) return NULL;
  62. return ret;
  63. }
  64. LSUP_rc
  65. escape_lit (const char *in, char **out_p)
  66. {
  67. size_t out_size = strlen (in) + 1;
  68. // Expand output string size to accommodate escape characters.
  69. for (
  70. size_t i = strcspn (in, LIT_ECHAR);
  71. i < strlen (in);
  72. i += strcspn (in + i + 1, LIT_ECHAR) + 1) {
  73. out_size ++;
  74. }
  75. char *out = calloc (1, out_size);
  76. if (UNLIKELY (!out)) return LSUP_MEM_ERR;
  77. size_t boundary;
  78. boundary = strcspn (in, LIT_ECHAR);
  79. for (size_t i = 0, j = 0;;) {
  80. out = strncat (out, in + i, boundary);
  81. i += boundary;
  82. j += boundary;
  83. if (i >= strlen (in)) break;
  84. out[j++] = '\\';
  85. out[j++] = escape_char (in[i++]);
  86. boundary = strcspn (in + i, LIT_ECHAR);
  87. }
  88. *out_p = out;
  89. return LSUP_OK;
  90. }
  91. char *
  92. fmt_header (char *pfx)
  93. {
  94. char *body = "Generated by lsup_rdf v" LSUP_VERSION " on ";
  95. time_t now = time (NULL);
  96. char date[16];
  97. strftime (date, sizeof (date), "%m/%d/%Y", gmtime (&now));
  98. char *out = malloc (strlen (pfx) + strlen (body) + strlen (date) + 2);
  99. if (UNLIKELY (!out)) return NULL;
  100. sprintf (out, "%s%s%s\n", pfx, body, date);
  101. return out;
  102. }
  103. /*
  104. * Extern inline functions.
  105. */
  106. char escape_char (const char c);
  107. char unescape_char (const char c);
  108. uint8_t *uint8_dup (const uint8_t *str);
  109. uint8_t *uint8_ndup (const uint8_t *str, size_t size);