term.h 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. #ifndef LSUP_TERM_H
  2. #define LSUP_TERM_H
  3. #include <assert.h>
  4. #include <regex.h>
  5. #include "xxhash.h"
  6. #include "buffer.h"
  7. // URI parsing regular expression. Conforms to RFC3986.
  8. #define URI_REGEX_STR \
  9. "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
  10. #define SEED 0 // TODO Make configurable.
  11. #define LANG_SIZE 8 // Size in chars of lang tag
  12. typedef LSUP_Buffer LSUP_SerTerm;
  13. typedef XXH64_hash_t LSUP_TermHash64;
  14. typedef char langtag[LANG_SIZE];
  15. typedef enum LSUP_term_type {
  16. LSUP_TERM_UNDEFINED,
  17. LSUP_TERM_URI,
  18. LSUP_TERM_BNODE,
  19. LSUP_TERM_LITERAL
  20. } LSUP_term_type;
  21. typedef struct LSUP_Term {
  22. LSUP_term_type type;
  23. // This language variable currently supports a 2-digit ISO 639 language
  24. // code and a 2-character ISO 3166-1 country code, separated by a hyphen.
  25. // See https://tools.ietf.org/html/bcp47#section-2.1
  26. langtag lang;
  27. char *datatype;
  28. char *data;
  29. } LSUP_Term;
  30. /*
  31. * Initialize a pre-allocated term structure.
  32. *
  33. * the structure can be an already initialized term, and can be reused
  34. * without freeing it.
  35. */
  36. int
  37. LSUP_term_init(
  38. LSUP_Term *term, LSUP_term_type type,
  39. char *data, char *datatype, char *lang);
  40. LSUP_Term *
  41. LSUP_term_new(LSUP_term_type type, char *data, char *datatype, char *lang);
  42. /** Simple ad-hoc serialization function.
  43. *
  44. * This function allocates and returns the following byte sequence:
  45. *
  46. * - `sizeof(char)` bytes for the term type;
  47. * - `LANG_SIZE` bytes for the language tag;
  48. * - Arbitrary bytes with NUL-terminated strings for data and datatype.
  49. *
  50. * The index for `data` is consistently `LANG_SIZE + sizeof(char)`. The
  51. * index for `datatype` is found by the terminating NULL for `data`.
  52. *
  53. * Serialized representations of some RDF terms:
  54. *
  55. * <http://hello.org>
  56. *
  57. * 0 1 size=19
  58. * | \x01 | http://hello.org\x00 |
  59. * type data
  60. *
  61. * "hello"
  62. *
  63. * 0 1 size=7
  64. * | \x03 | hello\x00 |
  65. * type data
  66. *
  67. * "hello"^^xsd:string
  68. *
  69. * 0 1 7 size=18
  70. * | \x03 | hello\x00 | xsd:string\x00 |
  71. * type data datatype
  72. *
  73. * (note: the "xsd:" prefix is used for simplification here, it would be
  74. * normally be a fully qualified URI)
  75. *
  76. * "hello"@en-US
  77. *
  78. * 0 1 7 18 size=26
  79. * | \x03 | hello\x00 | xsd:string\x00 | en-US\x00\x00\x00 |
  80. * type data datatype lang
  81. */
  82. int LSUP_term_serialize(const LSUP_Term *term, LSUP_Buffer *sterm);
  83. int
  84. LSUP_term_deserialize(const LSUP_Buffer *sterm, LSUP_Term *term);
  85. inline LSUP_Key
  86. LSUP_sterm_to_key(const LSUP_SerTerm *sterm)
  87. {
  88. LSUP_Key key = (LSUP_Key)XXH64(sterm->addr, sterm->size, SEED);
  89. return key;
  90. }
  91. /**
  92. * Hash a term into a key. If NULL is passed, the result is NULL_KEY.
  93. */
  94. inline LSUP_Key
  95. LSUP_term_to_key(const LSUP_Term *term)
  96. {
  97. if (term == NULL)
  98. return NULL_KEY;
  99. LSUP_Buffer sterm_s;
  100. LSUP_Buffer *sterm = &sterm_s;
  101. LSUP_term_serialize(term, sterm);
  102. LSUP_Key key = LSUP_sterm_to_key(sterm);
  103. LSUP_buffer_done(sterm);
  104. return key;
  105. }
  106. /**
  107. * Compare two terms.
  108. */
  109. bool LSUP_term_equals(const LSUP_Term *term1, const LSUP_Term *term2);
  110. /*
  111. // TODO Implement when xxhash v0.8 is released with stable xxhash128 function.
  112. XXH128_hash_t
  113. LSUP_term_hash128(const LSUP_Term *term);
  114. */
  115. void
  116. LSUP_term_done(LSUP_Term *term);
  117. void
  118. LSUP_term_free(LSUP_Term *term);
  119. #endif