scossu
/
lsup_rdf


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
							#include "lsup/codec.h"


/** @brief List of characters to be escaped in serialized literals.
 *
 * @sa https://www.w3.org/TR/n-triples/#grammar-production-ECHAR
 */
#define LIT_ECHAR "\t\b\n\r\f\"\'\\"


uint8_t *unescape_unicode (const uint8_t *esc_str, size_t size)
{
    // Output will not be longer than the escaped sequence.
    uint8_t *data = malloc (size + 1);

    size_t len = 0; // Size of output string.
    uint8_t tmp_chr[9];
    for (size_t i = 0; i < size;) {
        int esc_len; // Size of escape sequence.
        if (esc_str[i] == '\\') {
            i++; // Skip over '\\'

            if (esc_str[i] == 'u') {
                // 4-hex (2 bytes) sequence.
                esc_len = 4;
            } else if (esc_str[i] == 'U') {
                // 8-hex (4 bytes) sequence.
                esc_len = 8;
            } else {
                // Unescape other escaped characters.
                data[len++] = unescape_char (esc_str[i++]);
                continue;
            }

            // Continue encoding UTF code point.

            i ++; // Skip over 'u' / 'U'

            // Use tmp_chr to hold the hex string for the code point.
            memcpy (tmp_chr, esc_str + i, esc_len);
            tmp_chr[esc_len] = '\0';

            uint32_t tmp_val = strtol ((char *) tmp_chr, NULL, 16);
            //LOG_DEBUG("tmp_val: %d", tmp_val);

            // Reuse tmp_chr to hold the byte values for the code point.
            int cp_len = utf8_encode (tmp_val, tmp_chr);
            if (cp_len == 0) {
                log_error ("Error encoding sequence: %s", tmp_chr);
                return NULL;
            }

            // Copy bytes into destination.
            memcpy (data + len, tmp_chr, cp_len);
#if 0
            // This can generate a LOT of output.
            if (esc_len == 4)
                LOG_TRACE("UC byte value: %2x %2x", data[len], data[len + 1]);
            else
                LOG_TRACE(
                    "UC byte value: %2x %2x %2x %2x",
                    data[len], data[len + 1], data[len + 2], data[len + 3]
                );
#endif
            len += cp_len;
            i += esc_len;
        } else {
            data[len++] = esc_str[i++];
        }
    }

    data[len++] = '\0';
    uint8_t *ret = realloc (data, len); // Compact result.
    if (UNLIKELY (!ret)) return NULL;

    return ret;
}


LSUP_rc
escape_lit (const char *in, char **out_p)
{
    size_t out_size = strlen (in) + 1;

    // Expand output string size to accommodate escape characters.
    for (
            size_t i = strcspn (in, LIT_ECHAR);
            i < strlen (in);
            i += strcspn (in + i + 1, LIT_ECHAR) + 1) {
        out_size ++;
    }

    char *out = calloc (1, out_size);
    if (UNLIKELY (!out)) return LSUP_MEM_ERR;

    size_t boundary;
    boundary = strcspn (in, LIT_ECHAR);
    for (size_t i = 0, j = 0;;) {
        out = strncat (out, in + i, boundary);

        i += boundary;
        j += boundary;
        if (i >= strlen (in)) break;

        out[j++] = '\\';
        out[j++] = escape_char (in[i++]);
        boundary = strcspn (in + i, LIT_ECHAR);
    }

    *out_p = out;
    return LSUP_OK;
}


char *
fmt_header (char *pfx)
{
    char *body = "Generated by lsup_rdf v" LSUP_VERSION " on ";
    time_t now = time (NULL);
    char date[16];
    strftime (date, sizeof (date), "%m/%d/%Y", gmtime (&now));

    char *out = malloc (strlen (pfx) + strlen (body) + strlen (date) + 2);
    if (UNLIKELY (!out)) return NULL;

    sprintf (out, "%s%s%s\n", pfx, body, date);

    return out;
}

/*
 * Extern inline functions.
 */

char escape_char (const char c);
char unescape_char (const char c);
uint8_t *uint8_dup (const uint8_t *str);
uint8_t *uint8_ndup (const uint8_t *str, size_t size);