#include "codec.h" /** @brief List of characters to be escaped in serialized literals. * * @sa https://www.w3.org/TR/n-triples/#grammar-production-ECHAR */ #define LIT_ECHAR "\t\b\n\r\f\"\'\\" uint8_t *unescape_unicode (const uint8_t *esc_str, size_t size) { // Output will not be longer than the escaped sequence. uint8_t *data = malloc (size + 1); size_t len = 0; // Size of output string. uint8_t tmp_chr[9]; for (size_t i = 0; i < size;) { int esc_len; // Size of escape sequence. if (esc_str[i] == '\\') { i++; // Skip over '\\' if (esc_str[i] == 'u') { // 4-hex (2 bytes) sequence. esc_len = 4; } else if (esc_str[i] == 'U') { // 8-hex (4 bytes) sequence. esc_len = 8; } else { // Unescape other escaped characters. data[len++] = unescape_char (esc_str[i++]); continue; } // Continue encoding UTF code point. i ++; // Skip over 'u' / 'U' // Use tmp_chr to hold the hex string for the code point. memcpy (tmp_chr, esc_str + i, esc_len); tmp_chr[esc_len] = '\0'; uint32_t tmp_val = strtol ((char *) tmp_chr, NULL, 16); //log_debug ("tmp_val: %d", tmp_val); // Reuse tmp_chr to hold the byte values for the code point. int cp_len = utf8_encode (tmp_val, tmp_chr); if (cp_len == 0) { log_error ("Error encoding sequence: %s", tmp_chr); return NULL; } // Copy bytes into destination. memcpy (data + len, tmp_chr, cp_len); #if 0 // This can generate a LOT of output. if (esc_len == 4) log_trace ("UC byte value: %2x %2x", data[len], data[len + 1]); else log_trace ( "UC byte value: %2x %2x %2x %2x", data[len], data[len + 1], data[len + 2], data[len + 3] ); #endif len += cp_len; i += esc_len; } else { data[len++] = esc_str[i++]; } } data[len++] = '\0'; uint8_t *ret = realloc (data, len); // Compact result. if (UNLIKELY (!ret)) return NULL; return ret; } LSUP_rc escape_lit (const char *in, char **out_p) { size_t out_size = strlen (in) + 1; // Expand output string size to accommodate escape characters. for ( size_t i = strcspn (in, LIT_ECHAR); i < strlen (in); i += strcspn (in + i + 1, LIT_ECHAR) + 1) { out_size ++; } char *out = calloc (1, out_size); if (UNLIKELY (!out)) return LSUP_MEM_ERR; size_t boundary; boundary = strcspn (in, LIT_ECHAR); for (size_t i = 0, j = 0;;) { out = strncat (out, in + i, boundary); i += boundary; j += boundary; if (i >= strlen (in)) break; out[j++] = '\\'; out[j++] = escape_char (in[i++]); boundary = strcspn (in + i, LIT_ECHAR); } *out_p = out; return LSUP_OK; } char * fmt_header (char *pfx) { char *body = "Generated by lsup_rdf v" LSUP_VERSION " on "; time_t now = time (NULL); char date[16]; strftime (date, sizeof (date), "%m/%d/%Y", gmtime (&now)); char *out = malloc (strlen (pfx) + strlen (body) + strlen (date) + 2); if (UNLIKELY (!out)) return NULL; sprintf (out, "%s%s%s\n", pfx, body, date); return out; } /* * Extern inline functions. */ char escape_char (const char c); char unescape_char (const char c); uint8_t *uint8_dup (const uint8_t *str); uint8_t *uint8_ndup (const uint8_t *str, size_t size);