term.h 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. #ifndef LSUP_TERM_H
  2. #define LSUP_TERM_H
  3. #include <assert.h>
  4. #include <regex.h>
  5. #include "xxhash.h"
  6. #include "buffer.h"
  7. // URI parsing regular expression. Conforms to RFC3986.
  8. #define URI_REGEX_STR \
  9. "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
  10. #define SEED 0 // TODO Make configurable.
  11. #define LANG_SIZE 8 // Size in chars of lang tag
  12. // "NULL" key, a value that is never user-provided. Used to mark special
  13. // values (e.g. deleted records).
  14. #define NULL_KEY 0
  15. // "NULL" triple, a value that is never user-provided. Used to fill deleted
  16. // triples in a keyset.
  17. #define NULL_TRP {NULL_KEY, NULL_KEY, NULL_KEY}
  18. typedef LSUP_Buffer LSUP_SerTerm;
  19. typedef XXH64_hash_t LSUP_TermHash64;
  20. typedef char langtag[LANG_SIZE];
  21. typedef enum LSUP_term_type {
  22. LSUP_TERM_UNDEFINED,
  23. LSUP_TERM_URI,
  24. LSUP_TERM_BNODE,
  25. LSUP_TERM_LITERAL
  26. } LSUP_term_type;
  27. typedef struct LSUP_Term {
  28. LSUP_term_type type;
  29. // This language variable currently supports a 2-digit ISO 639 language
  30. // code and a 2-character ISO 3166-1 country code, separated by a hyphen.
  31. // See https://tools.ietf.org/html/bcp47#section-2.1
  32. langtag lang;
  33. char *datatype;
  34. char *data;
  35. } LSUP_Term;
  36. /*
  37. * Initialize a pre-allocated term structure.
  38. *
  39. * the structure can be an already initialized term, and can be reused
  40. * without freeing it.
  41. */
  42. int
  43. LSUP_term_init(
  44. LSUP_Term *term, LSUP_term_type type,
  45. char *data, char *datatype, char *lang);
  46. LSUP_Term *
  47. LSUP_term_new(LSUP_term_type type, char *data, char *datatype, char *lang);
  48. /**
  49. * Generate a random URN with the format: `urn:lsup:<uuid4>`.
  50. */
  51. char *
  52. LSUP_term_gen_random_str();
  53. /** Simple ad-hoc serialization function.
  54. *
  55. * This function allocates and returns the following byte sequence:
  56. *
  57. * - `sizeof(char)` bytes for the term type;
  58. * - `LANG_SIZE` bytes for the language tag;
  59. * - Arbitrary bytes with NUL-terminated strings for data and datatype.
  60. *
  61. * The index for `data` is consistently `LANG_SIZE + sizeof(char)`. The
  62. * index for `datatype` is found by the terminating NULL for `data`.
  63. *
  64. * Serialized representations of some RDF terms:
  65. *
  66. * <http://hello.org>
  67. *
  68. * 0 1 size=19
  69. * | \x01 | http://hello.org\x00 |
  70. * type data
  71. *
  72. * "hello"
  73. *
  74. * 0 1 size=7
  75. * | \x03 | hello\x00 |
  76. * type data
  77. *
  78. * "hello"^^xsd:string
  79. *
  80. * 0 1 7 size=18
  81. * | \x03 | hello\x00 | xsd:string\x00 |
  82. * type data datatype
  83. *
  84. * (note: the "xsd:" prefix is used for simplification here, it would be
  85. * normally be a fully qualified URI)
  86. *
  87. * "hello"@en-US
  88. *
  89. * 0 1 7 18 size=26
  90. * | \x03 | hello\x00 | xsd:string\x00 | en-US\x00\x00\x00 |
  91. * type data datatype lang
  92. */
  93. int LSUP_term_serialize(const LSUP_Term *term, LSUP_Buffer *sterm);
  94. int
  95. LSUP_term_deserialize(const LSUP_Buffer *sterm, LSUP_Term *term);
  96. inline LSUP_Key
  97. LSUP_sterm_to_key(const LSUP_SerTerm *sterm)
  98. {
  99. LSUP_Key key = (LSUP_Key)XXH64(sterm->addr, sterm->size, SEED);
  100. return key;
  101. }
  102. /**
  103. * Hash a term into a key. If NULL is passed, the result is NULL_KEY.
  104. */
  105. inline LSUP_Key
  106. LSUP_term_to_key(const LSUP_Term *term)
  107. {
  108. if (term == NULL)
  109. return NULL_KEY;
  110. LSUP_Buffer sterm_s;
  111. LSUP_Buffer *sterm = &sterm_s;
  112. LSUP_term_serialize(term, sterm);
  113. LSUP_Key key = LSUP_sterm_to_key(sterm);
  114. LSUP_buffer_done(sterm);
  115. return key;
  116. }
  117. /**
  118. * Compare two terms.
  119. */
  120. bool LSUP_term_equals(const LSUP_Term *term1, const LSUP_Term *term2);
  121. /*
  122. // TODO Implement when xxhash v0.8 is released with stable xxhash128 function.
  123. XXH128_hash_t
  124. LSUP_term_hash128(const LSUP_Term *term);
  125. */
  126. void
  127. LSUP_term_done(LSUP_Term *term);
  128. void
  129. LSUP_term_free(LSUP_Term *term);
  130. #endif