#include "term.h"

// URI parsing regular expression. Conforms to RFC3986.
#define URI_REGEX_STR \
    "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
#define NLEN(str)   (str) == NULL ? 0 : strlen ((str))
#define INVALID_URI_CHARS "<>\" {}|\\^`"


static regex_t ptn;
static bool ptn_init = false;


/* Global inline prototypes. */

LSUP_Term *LSUP_uri_new (const char *data);
LSUP_rc LSUP_uri_init (LSUP_Term *term, const char *data);


/**
 * Free global regex struct. Register with atexit().
 */
void term_cleanup() { if (ptn_init) regfree (&ptn); }


LSUP_Term *
LSUP_term_new (
        LSUP_term_type type, const char *data, char *datatype, char *lang)
{
    LSUP_Term *term;
    CALLOC_GUARD (term, NULL);

    // If undefined, just set the type.
    if (type == LSUP_TERM_UNDEFINED) term->type = type;

    else if (UNLIKELY (LSUP_term_init (
                    term, type, data, datatype, lang) != LSUP_OK)) {
        free (term);
        return NULL;
    }

    return term;
}


LSUP_Term *
LSUP_term_new_from_buffer (const LSUP_Buffer *sterm)
{
    LSUP_Term *term;
    MALLOC_GUARD (term, NULL);

    if (UNLIKELY (LSUP_term_deserialize (sterm, term) != LSUP_OK)) {
        free (term);
        return NULL;
    }

    return term;
}


LSUP_Buffer *
LSUP_buffer_new_from_term (const LSUP_Term *term)
{
    LSUP_Buffer *sterm;
    MALLOC_GUARD (sterm, NULL);
    sterm->addr = NULL;

    if (LSUP_term_serialize (term, sterm) != LSUP_OK) {
        free (sterm);
        return NULL;
    }

    return sterm;
}


LSUP_rc
LSUP_term_init(
        LSUP_Term *term, LSUP_term_type type,
        const char *data, char *datatype, char *lang)
{
    // This can never be LSUP_TERM_UNDEFINED.
    if (!data) return LSUP_VALUE_ERR;
    term->type = type;

    // Validate URI.
    if (term->type == LSUP_TERM_URI) {
        // TODO Cheap fix. Should url-encode all invalid chars.
        if (strpbrk (data, INVALID_URI_CHARS) != NULL) {
            fprintf (
                    stderr, "Characters %s are not allowed.\n",
                    INVALID_URI_CHARS);

            return LSUP_VALUE_ERR;
        }

        if (UNLIKELY (!ptn_init)) {
            int rc = regcomp (&ptn, URI_REGEX_STR, REG_EXTENDED);
            if (rc != 0) return LSUP_ERROR;
            ptn_init = true;
            atexit (term_cleanup);
        }

        if (regexec (&ptn, data, 0, NULL, 0) != 0) {
            fprintf (stderr, "Error matching URI pattern.\n");

            return LSUP_VALUE_ERR;
        }
    }

    char *data_tmp = realloc (term->data, strlen (data) + 1);
    if (UNLIKELY (!data_tmp)) return LSUP_MEM_ERR;
    term->data = data_tmp;
    strcpy (term->data, data);

    if (datatype) {
        data_tmp = realloc (term->datatype, strlen (datatype) + 1);
        if (UNLIKELY (!data_tmp)) return LSUP_MEM_ERR;
        term->datatype = data_tmp;
        strcpy (term->datatype, datatype);
    } else {
        free (term->datatype);
        term->datatype = NULL;
    }
    if (lang) {
        // TODO validate language and country code
        //char lsize = 5 ? lang[2] == "-" : 2;
        memcpy (term->lang, lang, LANG_SIZE);
    } else {
        memset (term->lang, 0, LANG_SIZE);
    }

    return LSUP_OK;
}


/*
 * This function allocates and returns the following byte sequence:
 *
 * - `sizeof (char)` bytes for the term type;
 * - `LANG_SIZE` bytes for the language tag;
 * - Arbitrary bytes with NUL-terminated strings for data and datatype.
 *
 * The index for `data` is consistently `LANG_SIZE + sizeof (char)`. The
 * index for `datatype` is found by the terminating NULL for `data`.
 *
 * Serialized representations of some RDF terms:
 *
 * <http://hello.org>
 *
 * 0      1                size=19
 * | \x01 | http://hello.org\x00 |
 * type   data
 *
 * "hello"
 *
 * 0      1      size=7
 * | \x03 | hello\x00 |
 * type   data
 *
 * "hello"^^xsd:string
 *
 * 0      1           7          size=18
 * | \x03 | hello\x00 | xsd:string\x00 |
 * type   data        datatype
 *
 * (note: the "xsd:" prefix is used for simplification here, it would be
 * normally be a fully qualified URI)
 *
 * "hello"@en-US
 *
 * 0      1           7               18     size=24
 * | \x03 | hello\x00 | xsd:string\x00 | en-US\x00 |
 * type   data        datatype         lang
 */
LSUP_rc
LSUP_term_serialize (const LSUP_Term *term, LSUP_Buffer *sterm)
{
    size_t size, data_len, datatype_len = 0,
           data_idx = 1, datatype_idx = 0, lang_idx = 0;

    if (UNLIKELY (term == NULL)) return LSUP_NOACTION;

    data_len = strlen (term->data) + 1;

    size = data_idx + data_len;

    if (term->datatype != NULL) {
        datatype_idx = size;
        datatype_len = strlen (term->datatype) + 1;
        size += datatype_len;

        if (strlen (term->lang) > 0) {
            lang_idx = size;
            size += strlen (term->lang) + 1;
        }
    }

    //TRACE ("Serialized term size: %lu", size);
    LSUP_buffer_init (sterm, size, NULL);

    // Copy type.
    memcpy (sterm->addr, &term->type, 1);
    // Copy data.
    memcpy (sterm->addr + data_idx, term->data, data_len);

    if (term->datatype != NULL) {
        // Copy data type.
        memcpy (sterm->addr + datatype_idx, term->datatype, datatype_len);

        // Copy lang tag.
        if (strlen (term->lang) > 0)
            strcpy (sterm->addr + lang_idx, term->lang);
    }

    return LSUP_OK;
}


LSUP_rc
LSUP_term_deserialize (const LSUP_Buffer *sterm, LSUP_Term *term)
{
    size_t cur;
    char *data, *datatype = NULL;
    langtag lang = "\00";

    char type = ((char*)(sterm->addr))[0];

    cur = 1;
    data = (char*)sterm->addr + cur;
    cur += strlen (data) + 1;

    if (type == LSUP_TERM_LITERAL && cur < sterm->size) {
        datatype = (char*)sterm->addr + cur;
        cur += strlen (datatype) + 1;
        if (strlen (datatype) == 0)
            datatype = NULL;

        if (cur < sterm->size)
            strcpy (lang, sterm->addr + cur);
    }

    return LSUP_term_init (term, type, data, datatype, lang);
}


bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2)
{
    if (term1->type != term2->type)
        return false;

    if (strcmp (term1->data, term2->data) != 0)
        return false;

    if (term1->type == LSUP_TERM_LITERAL) {
        if ((term1->datatype == NULL) != (term2->datatype == NULL)) // XOR
            return false;

        if (
                term1->datatype != NULL &&
                strcmp (term1->datatype, term2->datatype) != 0)
            return false;

        if ((term1->lang == NULL) != (term2->lang == NULL)) // XOR
            return false;

        if (
                term1->lang != NULL &&
                strcmp (term1->lang, term2->lang) != 0)
            return false;
    }

    return true;
}


void LSUP_term_done (LSUP_Term *term)
{
    if (LIKELY (term->data != NULL)) {
        free (term->data);
        term->data = NULL;
    }

    if (term->datatype != NULL) {
        free (term->datatype);
        term->datatype = NULL;
    }
}


void LSUP_term_free (LSUP_Term *term)
{
    if (LIKELY (term != NULL)) {
        LSUP_term_done (term);
        free (term);
        term = NULL;
    }
}


// Extern inline functions.

LSUP_Key LSUP_term_hash (const LSUP_Term *term);