term.h 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. #ifndef LSUP_TERM_H
  2. #define LSUP_TERM_H
  3. #include <assert.h>
  4. #include <regex.h>
  5. #include "xxhash.h"
  6. #include "buffer.h"
  7. // URI parsing regular expression. Conforms to RFC3986.
  8. #define URI_REGEX_STR \
  9. "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
  10. #define SEED 0 // TODO Make configurable.
  11. #define LANG_SIZE 8 // Size in chars of lang tag
  12. typedef LSUP_Buffer LSUP_SerTerm;
  13. typedef XXH64_hash_t LSUP_TermHash64;
  14. typedef char langtag[LANG_SIZE];
  15. typedef enum LSUP_term_type {
  16. LSUP_TERM_UNDEFINED,
  17. LSUP_TERM_URI,
  18. LSUP_TERM_BNODE,
  19. LSUP_TERM_LITERAL
  20. } LSUP_term_type;
  21. typedef struct LSUP_Term {
  22. LSUP_term_type type;
  23. // This language variable currently supports a 2-digit ISO 639 language
  24. // code and a 2-character ISO 3166-1 country code, separated by a hyphen.
  25. // See https://tools.ietf.org/html/bcp47#section-2.1
  26. langtag lang;
  27. char *datatype;
  28. char *data;
  29. } LSUP_Term;
  30. /*
  31. * Initialize a pre-allocated term structure.
  32. *
  33. * the structure can be an already initialized term, and can be reused
  34. * without freeing it.
  35. */
  36. int
  37. LSUP_term_init(
  38. LSUP_Term *term, LSUP_term_type type,
  39. char *data, char *datatype, char *lang);
  40. LSUP_Term *
  41. LSUP_term_new(LSUP_term_type type, char *data, char *datatype, char *lang);
  42. /**
  43. * Generate a random URN with the format: `urn:lsup:<uuid4>`.
  44. */
  45. char *
  46. LSUP_term_gen_random_str();
  47. /** Simple ad-hoc serialization function.
  48. *
  49. * This function allocates and returns the following byte sequence:
  50. *
  51. * - `sizeof(char)` bytes for the term type;
  52. * - `LANG_SIZE` bytes for the language tag;
  53. * - Arbitrary bytes with NUL-terminated strings for data and datatype.
  54. *
  55. * The index for `data` is consistently `LANG_SIZE + sizeof(char)`. The
  56. * index for `datatype` is found by the terminating NULL for `data`.
  57. *
  58. * Serialized representations of some RDF terms:
  59. *
  60. * <http://hello.org>
  61. *
  62. * 0 1 size=19
  63. * | \x01 | http://hello.org\x00 |
  64. * type data
  65. *
  66. * "hello"
  67. *
  68. * 0 1 size=7
  69. * | \x03 | hello\x00 |
  70. * type data
  71. *
  72. * "hello"^^xsd:string
  73. *
  74. * 0 1 7 size=18
  75. * | \x03 | hello\x00 | xsd:string\x00 |
  76. * type data datatype
  77. *
  78. * (note: the "xsd:" prefix is used for simplification here, it would be
  79. * normally be a fully qualified URI)
  80. *
  81. * "hello"@en-US
  82. *
  83. * 0 1 7 18 size=26
  84. * | \x03 | hello\x00 | xsd:string\x00 | en-US\x00\x00\x00 |
  85. * type data datatype lang
  86. */
  87. int LSUP_term_serialize(const LSUP_Term *term, LSUP_Buffer *sterm);
  88. int
  89. LSUP_term_deserialize(const LSUP_Buffer *sterm, LSUP_Term *term);
  90. inline LSUP_Key
  91. LSUP_sterm_to_key(const LSUP_SerTerm *sterm)
  92. {
  93. LSUP_Key key = (LSUP_Key)XXH64(sterm->addr, sterm->size, SEED);
  94. return key;
  95. }
  96. /**
  97. * Hash a term into a key. If NULL is passed, the result is NULL_KEY.
  98. */
  99. inline LSUP_Key
  100. LSUP_term_to_key(const LSUP_Term *term)
  101. {
  102. if (term == NULL)
  103. return NULL_KEY;
  104. LSUP_Buffer sterm_s;
  105. LSUP_Buffer *sterm = &sterm_s;
  106. LSUP_term_serialize(term, sterm);
  107. LSUP_Key key = LSUP_sterm_to_key(sterm);
  108. LSUP_buffer_done(sterm);
  109. return key;
  110. }
  111. /**
  112. * Compare two terms.
  113. */
  114. bool LSUP_term_equals(const LSUP_Term *term1, const LSUP_Term *term2);
  115. /*
  116. // TODO Implement when xxhash v0.8 is released with stable xxhash128 function.
  117. XXH128_hash_t
  118. LSUP_term_hash128(const LSUP_Term *term);
  119. */
  120. void
  121. LSUP_term_done(LSUP_Term *term);
  122. void
  123. LSUP_term_free(LSUP_Term *term);
  124. #endif