123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138 |
- #include "codec.h"
- /** @brief List of characters to be escaped in serialized literals.
- *
- * @sa https://www.w3.org/TR/n-triples/#grammar-production-ECHAR
- */
- #define LIT_ECHAR "\t\b\n\r\f\"\'\\"
- uint8_t *unescape_unicode (const uint8_t *esc_str, size_t size)
- {
- // Output will not be longer than the escaped sequence.
- uint8_t *data = malloc (size + 1);
- size_t len = 0; // Size of output string.
- uint8_t tmp_chr[9];
- for (size_t i = 0; i < size;) {
- int esc_len; // Size of escape sequence.
- if (esc_str[i] == '\\') {
- i++; // Skip over '\\'
- if (esc_str[i] == 'u') {
- // 4-hex (2 bytes) sequence.
- esc_len = 4;
- } else if (esc_str[i] == 'U') {
- // 8-hex (4 bytes) sequence.
- esc_len = 8;
- } else {
- // Unescape other escaped characters.
- data[len++] = unescape_char (esc_str[i++]);
- continue;
- }
- // Continue encoding UTF code point.
- i ++; // Skip over 'u' / 'U'
- // Use tmp_chr to hold the hex string for the code point.
- memcpy (tmp_chr, esc_str + i, esc_len);
- tmp_chr[esc_len] = '\0';
- uint32_t tmp_val = strtol ((char *) tmp_chr, NULL, 16);
- //log_debug ("tmp_val: %d", tmp_val);
- // Reuse tmp_chr to hold the byte values for the code point.
- int cp_len = utf8_encode (tmp_val, tmp_chr);
- if (cp_len == 0) {
- log_error ("Error encoding sequence: %s", tmp_chr);
- return NULL;
- }
- // Copy bytes into destination.
- memcpy (data + len, tmp_chr, cp_len);
- #if 0
- // This can generate a LOT of output.
- if (esc_len == 4)
- log_trace ("UC byte value: %2x %2x", data[len], data[len + 1]);
- else
- log_trace (
- "UC byte value: %2x %2x %2x %2x",
- data[len], data[len + 1], data[len + 2], data[len + 3]
- );
- #endif
- len += cp_len;
- i += esc_len;
- } else {
- data[len++] = esc_str[i++];
- }
- }
- data[len++] = '\0';
- uint8_t *ret = realloc (data, len); // Compact result.
- if (UNLIKELY (!ret)) return NULL;
- return ret;
- }
- LSUP_rc
- escape_lit (const char *in, char **out_p)
- {
- size_t out_size = strlen (in) + 1;
- // Expand output string size to accommodate escape characters.
- for (
- size_t i = strcspn (in, LIT_ECHAR);
- i < strlen (in);
- i += strcspn (in + i + 1, LIT_ECHAR) + 1) {
- out_size ++;
- }
- char *out = calloc (1, out_size);
- if (UNLIKELY (!out)) return LSUP_MEM_ERR;
- size_t boundary;
- boundary = strcspn (in, LIT_ECHAR);
- for (size_t i = 0, j = 0;;) {
- out = strncat (out, in + i, boundary);
- i += boundary;
- j += boundary;
- if (i >= strlen (in)) break;
- out[j++] = '\\';
- out[j++] = escape_char (in[i++]);
- boundary = strcspn (in + i, LIT_ECHAR);
- }
- *out_p = out;
- return LSUP_OK;
- }
- char *
- fmt_header (char *pfx)
- {
- char *body = "Generated by lsup_rdf v" LSUP_VERSION " on ";
- time_t now = time (NULL);
- char date[16];
- strftime (date, sizeof (date), "%m/%d/%Y", gmtime (&now));
- char *out = malloc (strlen (pfx) + strlen (body) + strlen (date) + 2);
- if (UNLIKELY (!out)) return NULL;
- sprintf (out, "%s%s%s\n", pfx, body, date);
- return out;
- }
- /*
- * Extern inline functions.
- */
- char escape_char (const char c);
- char unescape_char (const char c);
- uint8_t *uint8_dup (const uint8_t *str);
- uint8_t *uint8_ndup (const uint8_t *str, size_t size);
|