term.c 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. #include "term.h"
  2. // URI parsing regular expression. Conforms to RFC3986.
  3. #define URI_REGEX_STR \
  4. "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
  5. #define NLEN(str) (str) == NULL ? 0 : strlen ((str))
  6. #define INVALID_URI_CHARS "<>\" {}|\\^`"
  7. static regex_t ptn;
  8. static bool ptn_init = false;
  9. /* Global inline prototypes. */
  10. LSUP_Term *LSUP_uri_new (const char *data);
  11. LSUP_rc LSUP_uri_init (LSUP_Term *term, const char *data);
  12. /**
  13. * Free global regex struct. Register with atexit().
  14. */
  15. void term_cleanup() { if (ptn_init) regfree (&ptn); }
  16. LSUP_Term *
  17. LSUP_term_new (
  18. LSUP_term_type type, const char *data, char *datatype, char *lang)
  19. {
  20. LSUP_Term *term;
  21. CALLOC_GUARD (term, NULL);
  22. // If undefined, just set the type.
  23. if (type == LSUP_TERM_UNDEFINED) term->type = type;
  24. else if (UNLIKELY (LSUP_term_init (
  25. term, type, data, datatype, lang) != LSUP_OK)) {
  26. free (term);
  27. return NULL;
  28. }
  29. return term;
  30. }
  31. LSUP_Term *
  32. LSUP_term_new_from_buffer (const LSUP_Buffer *sterm)
  33. {
  34. LSUP_Term *term;
  35. MALLOC_GUARD (term, NULL);
  36. if (UNLIKELY (LSUP_term_deserialize (sterm, term) != LSUP_OK)) {
  37. free (term);
  38. return NULL;
  39. }
  40. return term;
  41. }
  42. LSUP_Buffer *
  43. LSUP_buffer_new_from_term (const LSUP_Term *term)
  44. {
  45. LSUP_Buffer *sterm;
  46. MALLOC_GUARD (sterm, NULL);
  47. sterm->addr = NULL;
  48. if (LSUP_term_serialize (term, sterm) != LSUP_OK) {
  49. free (sterm);
  50. return NULL;
  51. }
  52. return sterm;
  53. }
  54. LSUP_rc
  55. LSUP_term_init(
  56. LSUP_Term *term, LSUP_term_type type,
  57. const char *data, char *datatype, char *lang)
  58. {
  59. // This can never be LSUP_TERM_UNDEFINED.
  60. if (!data) return LSUP_VALUE_ERR;
  61. term->type = type;
  62. // Validate URI.
  63. if (term->type == LSUP_TERM_URI) {
  64. // TODO Cheap fix. Should url-encode all invalid chars.
  65. if (strpbrk (data, INVALID_URI_CHARS) != NULL) {
  66. fprintf (
  67. stderr, "Characters %s are not allowed.\n",
  68. INVALID_URI_CHARS);
  69. return LSUP_VALUE_ERR;
  70. }
  71. if (UNLIKELY (!ptn_init)) {
  72. int rc = regcomp (&ptn, URI_REGEX_STR, REG_EXTENDED);
  73. if (rc != 0) return LSUP_ERROR;
  74. ptn_init = true;
  75. atexit (term_cleanup);
  76. }
  77. if (regexec (&ptn, data, 0, NULL, 0) != 0) {
  78. fprintf (stderr, "Error matching URI pattern.\n");
  79. return LSUP_VALUE_ERR;
  80. }
  81. }
  82. char *data_tmp = realloc (term->data, strlen (data) + 1);
  83. if (UNLIKELY (!data_tmp)) return LSUP_MEM_ERR;
  84. term->data = data_tmp;
  85. strcpy (term->data, data);
  86. if (datatype) {
  87. data_tmp = realloc (term->datatype, strlen (datatype) + 1);
  88. if (UNLIKELY (!data_tmp)) return LSUP_MEM_ERR;
  89. term->datatype = data_tmp;
  90. strcpy (term->datatype, datatype);
  91. } else {
  92. free (term->datatype);
  93. term->datatype = NULL;
  94. }
  95. if (lang) {
  96. // TODO validate language and country code
  97. //char lsize = 5 ? lang[2] == "-" : 2;
  98. memcpy (term->lang, lang, LANG_SIZE);
  99. } else {
  100. memset (term->lang, 0, LANG_SIZE);
  101. }
  102. return LSUP_OK;
  103. }
  104. /*
  105. * This function allocates and returns the following byte sequence:
  106. *
  107. * - `sizeof (char)` bytes for the term type;
  108. * - `LANG_SIZE` bytes for the language tag;
  109. * - Arbitrary bytes with NUL-terminated strings for data and datatype.
  110. *
  111. * The index for `data` is consistently `LANG_SIZE + sizeof (char)`. The
  112. * index for `datatype` is found by the terminating NULL for `data`.
  113. *
  114. * Serialized representations of some RDF terms:
  115. *
  116. * <http://hello.org>
  117. *
  118. * 0 1 size=19
  119. * | \x01 | http://hello.org\x00 |
  120. * type data
  121. *
  122. * "hello"
  123. *
  124. * 0 1 size=7
  125. * | \x03 | hello\x00 |
  126. * type data
  127. *
  128. * "hello"^^xsd:string
  129. *
  130. * 0 1 7 size=18
  131. * | \x03 | hello\x00 | xsd:string\x00 |
  132. * type data datatype
  133. *
  134. * (note: the "xsd:" prefix is used for simplification here, it would be
  135. * normally be a fully qualified URI)
  136. *
  137. * "hello"@en-US
  138. *
  139. * 0 1 7 18 size=24
  140. * | \x03 | hello\x00 | xsd:string\x00 | en-US\x00 |
  141. * type data datatype lang
  142. */
  143. LSUP_rc
  144. LSUP_term_serialize (const LSUP_Term *term, LSUP_Buffer *sterm)
  145. {
  146. size_t size, data_len, datatype_len = 0,
  147. data_idx = 1, datatype_idx = 0, lang_idx = 0;
  148. if (UNLIKELY (term == NULL)) return LSUP_NOACTION;
  149. data_len = strlen (term->data) + 1;
  150. size = data_idx + data_len;
  151. if (term->datatype != NULL) {
  152. datatype_idx = size;
  153. datatype_len = strlen (term->datatype) + 1;
  154. size += datatype_len;
  155. if (strlen (term->lang) > 0) {
  156. lang_idx = size;
  157. size += strlen (term->lang) + 1;
  158. }
  159. }
  160. //TRACE ("Serialized term size: %lu", size);
  161. LSUP_buffer_init (sterm, size, NULL);
  162. // Copy type.
  163. memcpy (sterm->addr, &term->type, 1);
  164. // Copy data.
  165. memcpy (sterm->addr + data_idx, term->data, data_len);
  166. if (term->datatype != NULL) {
  167. // Copy data type.
  168. memcpy (sterm->addr + datatype_idx, term->datatype, datatype_len);
  169. // Copy lang tag.
  170. if (strlen (term->lang) > 0)
  171. strcpy (sterm->addr + lang_idx, term->lang);
  172. }
  173. return LSUP_OK;
  174. }
  175. LSUP_rc
  176. LSUP_term_deserialize (const LSUP_Buffer *sterm, LSUP_Term *term)
  177. {
  178. size_t cur;
  179. char *data, *datatype = NULL;
  180. langtag lang = "\00";
  181. char type = ((char*)(sterm->addr))[0];
  182. cur = 1;
  183. data = (char*)sterm->addr + cur;
  184. cur += strlen (data) + 1;
  185. if (type == LSUP_TERM_LITERAL && cur < sterm->size) {
  186. datatype = (char*)sterm->addr + cur;
  187. cur += strlen (datatype) + 1;
  188. if (strlen (datatype) == 0)
  189. datatype = NULL;
  190. if (cur < sterm->size)
  191. strcpy (lang, sterm->addr + cur);
  192. }
  193. return LSUP_term_init (term, type, data, datatype, lang);
  194. }
  195. bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2)
  196. {
  197. if (term1->type != term2->type)
  198. return false;
  199. if (strcmp (term1->data, term2->data) != 0)
  200. return false;
  201. if (term1->type == LSUP_TERM_LITERAL) {
  202. if ((term1->datatype == NULL) != (term2->datatype == NULL)) // XOR
  203. return false;
  204. if (
  205. term1->datatype != NULL &&
  206. strcmp (term1->datatype, term2->datatype) != 0)
  207. return false;
  208. if ((term1->lang == NULL) != (term2->lang == NULL)) // XOR
  209. return false;
  210. if (
  211. term1->lang != NULL &&
  212. strcmp (term1->lang, term2->lang) != 0)
  213. return false;
  214. }
  215. return true;
  216. }
  217. void LSUP_term_done (LSUP_Term *term)
  218. {
  219. if (LIKELY (term->data != NULL)) {
  220. free (term->data);
  221. term->data = NULL;
  222. }
  223. if (term->datatype != NULL) {
  224. free (term->datatype);
  225. term->datatype = NULL;
  226. }
  227. }
  228. void LSUP_term_free (LSUP_Term *term)
  229. {
  230. if (LIKELY (term != NULL)) {
  231. LSUP_term_done (term);
  232. free (term);
  233. term = NULL;
  234. }
  235. }
  236. // Extern inline functions.
  237. LSUP_Key LSUP_term_hash (const LSUP_Term *term);