term.h 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. #ifndef LSUP_TERM_H
  2. #define LSUP_TERM_H
  3. #include <assert.h>
  4. #include <regex.h>
  5. #include "xxhash.h"
  6. #include "buffer.h"
  7. // URI parsing regular expression. Conforms to RFC3986.
  8. #define URI_REGEX_STR \
  9. "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
  10. #define SEED 0 // TODO Make configurable.
  11. #define LANG_SIZE 8 // Size in chars of lang tag
  12. // "NULL" key, a value that is never user-provided. Used to mark special
  13. // values (e.g. deleted records).
  14. #define NULL_KEY 0
  15. // "NULL" triple, a value that is never user-provided. Used to fill deleted
  16. // triples in a keyset.
  17. #define NULL_TRP {NULL_KEY, NULL_KEY, NULL_KEY}
  18. typedef XXH64_hash_t LSUP_TermHash64;
  19. typedef char langtag[LANG_SIZE];
  20. typedef enum LSUP_term_type {
  21. LSUP_TERM_UNDEFINED,
  22. LSUP_TERM_URI,
  23. LSUP_TERM_BNODE,
  24. LSUP_TERM_LITERAL
  25. } LSUP_term_type;
  26. typedef struct LSUP_Term {
  27. LSUP_term_type type;
  28. // This language variable currently supports a 2-digit ISO 639 language
  29. // code and a 2-character ISO 3166-1 country code, separated by a hyphen.
  30. // See https://tools.ietf.org/html/bcp47#section-2.1
  31. langtag lang;
  32. char *datatype;
  33. char *data;
  34. } LSUP_Term;
  35. /*
  36. * Initialize a pre-allocated term structure.
  37. *
  38. * the structure can be an already initialized term, and can be reused
  39. * without freeing it.
  40. */
  41. LSUP_rc
  42. LSUP_term_init(
  43. LSUP_Term *term, LSUP_term_type type,
  44. const char *data, char *datatype, char *lang);
  45. /**
  46. * @brief Shortcut to initialize a URI.
  47. */
  48. inline LSUP_rc
  49. LSUP_uri_init(LSUP_Term *term, const char *data)
  50. { return LSUP_term_init(term, LSUP_TERM_URI, data, NULL, NULL); }
  51. LSUP_Term *
  52. LSUP_term_new(LSUP_term_type type, const char *data, char *datatype, char *lang);
  53. /**
  54. * @brief Shortcut to create a URI.
  55. */
  56. inline LSUP_Term *
  57. LSUP_uri_new(const char *data)
  58. { return LSUP_term_new(LSUP_TERM_URI, data, NULL, NULL); }
  59. /**
  60. * Generate a random URN with the format: `urn:lsup:<uuid4>`.
  61. */
  62. inline LSUP_Term *
  63. LSUP_uri_random()
  64. {
  65. uuid_t uuid;
  66. uuid_generate_random(uuid);
  67. uuid_str_t uuid_str;
  68. uuid_unparse_lower(uuid, uuid_str);
  69. char uri[UUIDSTR_SIZE + 10];
  70. sprintf(uri, "urn:uuid4:%s", uuid_str);
  71. return LSUP_uri_new(uri);
  72. }
  73. /** Simple ad-hoc serialization function.
  74. *
  75. * The resulting term must be freed with #LSUP_term_done after use.
  76. */
  77. LSUP_rc LSUP_term_serialize(const LSUP_Term *term, LSUP_Buffer *sterm);
  78. LSUP_rc LSUP_term_deserialize(const LSUP_Buffer *sterm, LSUP_Term *term);
  79. inline LSUP_Key
  80. LSUP_sterm_to_key(const LSUP_Buffer *sterm)
  81. {
  82. if (UNLIKELY (sterm == NULL)) return NULL_KEY;
  83. return XXH64(sterm->addr, sterm->size, SEED);
  84. }
  85. /**
  86. * Hash a term into a key. If NULL is passed, the result is NULL_KEY.
  87. */
  88. inline LSUP_Key
  89. LSUP_term_to_key(const LSUP_Term *term)
  90. {
  91. if (UNLIKELY (term == NULL)) return NULL_KEY;
  92. LSUP_Buffer sterm_s;
  93. LSUP_Buffer *sterm = &sterm_s;
  94. LSUP_term_serialize(term, sterm);
  95. LSUP_Key key = XXH64(sterm->addr, sterm->size, SEED);
  96. LSUP_buffer_done(sterm);
  97. return key;
  98. }
  99. /**
  100. * Compare two terms.
  101. */
  102. bool LSUP_term_equals(const LSUP_Term *term1, const LSUP_Term *term2);
  103. /*
  104. // TODO Implement when xxhash v0.8 is released with stable xxhash128 function.
  105. inline XXH128_hash_t
  106. LSUP_term_hash128(const LSUP_Term *term);
  107. */
  108. void
  109. LSUP_term_done(LSUP_Term *term);
  110. void
  111. LSUP_term_free(LSUP_Term *term);
  112. #endif