term.c 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. #include "term.h"
  2. #define NLEN(str) (str) == NULL ? 0 : strlen ((str))
  3. static regex_t ptn;
  4. static bool ptn_init = false;
  5. /* Global inline prototypes. */
  6. LSUP_Term *LSUP_uri_new (const char *data);
  7. LSUP_rc LSUP_uri_init (LSUP_Term *term, const char *data);
  8. /**
  9. * Free global regex struct. Register with atexit().
  10. */
  11. void term_cleanup() { if (ptn_init) regfree (&ptn); }
  12. LSUP_Term *
  13. LSUP_term_new (
  14. LSUP_term_type type, const char *data, char *datatype, char *lang)
  15. {
  16. LSUP_Term *term;
  17. term = calloc (1, sizeof (*term));
  18. if (UNLIKELY (!term)) return NULL;
  19. // If undefined, just set the type.
  20. if (type == LSUP_TERM_UNDEFINED) term->type = type;
  21. else if (UNLIKELY (LSUP_term_init (
  22. term, type, data, datatype, lang) != LSUP_OK)) {
  23. free (term);
  24. return NULL;
  25. }
  26. return term;
  27. }
  28. LSUP_Term *
  29. LSUP_term_new_from_buffer (const LSUP_Buffer *sterm)
  30. {
  31. LSUP_Term *term = malloc (sizeof (*term));
  32. if (UNLIKELY (!term)) return NULL;
  33. if (UNLIKELY (LSUP_term_deserialize (sterm, term) != LSUP_OK)) {
  34. free (term);
  35. return NULL;
  36. }
  37. return term;
  38. }
  39. LSUP_Buffer *
  40. LSUP_buffer_new_from_term (const LSUP_Term *term)
  41. {
  42. LSUP_Buffer *sterm = malloc (sizeof (*sterm));
  43. if (UNLIKELY (!sterm)) return NULL;
  44. sterm->addr = NULL;
  45. if (LSUP_term_serialize (term, sterm) != LSUP_OK) {
  46. free (sterm);
  47. return NULL;
  48. }
  49. return sterm;
  50. }
  51. LSUP_rc
  52. LSUP_term_init(
  53. LSUP_Term *term, LSUP_term_type type,
  54. const char *data, char *datatype, char *lang)
  55. {
  56. // This can never be LSUP_TERM_UNDEFINED.
  57. if (!data) return LSUP_VALUE_ERR;
  58. term->type = type;
  59. // Validate URI.
  60. if (term->type == LSUP_TERM_URI) {
  61. if (UNLIKELY (!ptn_init)) {
  62. int rc = regcomp (&ptn, URI_REGEX_STR, REG_EXTENDED);
  63. if (rc != 0) return LSUP_ERROR;
  64. ptn_init = true;
  65. atexit (term_cleanup);
  66. }
  67. if (regexec (&ptn, data, 0, NULL, 0) != 0) {
  68. fprintf (stderr, "Error matching URI pattern.\n");
  69. return LSUP_VALUE_ERR;
  70. }
  71. }
  72. char *data_tmp = realloc (term->data, strlen (data) + 1);
  73. if (UNLIKELY (!data_tmp)) return LSUP_MEM_ERR;
  74. term->data = data_tmp;
  75. strcpy (term->data, data);
  76. if (datatype) {
  77. data_tmp = realloc (term->datatype, strlen (datatype) + 1);
  78. if (UNLIKELY (!data_tmp)) return LSUP_MEM_ERR;
  79. term->datatype = data_tmp;
  80. strcpy (term->datatype, datatype);
  81. } else {
  82. free (term->datatype);
  83. term->datatype = NULL;
  84. }
  85. if (lang) {
  86. // TODO validate language and country code
  87. //char lsize = 5 ? lang[2] == "-" : 2;
  88. memcpy (term->lang, lang, LANG_SIZE);
  89. } else {
  90. memset (term->lang, 0, LANG_SIZE);
  91. }
  92. return LSUP_OK;
  93. }
  94. /*
  95. * This function allocates and returns the following byte sequence:
  96. *
  97. * - `sizeof (char)` bytes for the term type;
  98. * - `LANG_SIZE` bytes for the language tag;
  99. * - Arbitrary bytes with NUL-terminated strings for data and datatype.
  100. *
  101. * The index for `data` is consistently `LANG_SIZE + sizeof (char)`. The
  102. * index for `datatype` is found by the terminating NULL for `data`.
  103. *
  104. * Serialized representations of some RDF terms:
  105. *
  106. * <http://hello.org>
  107. *
  108. * 0 1 size=19
  109. * | \x01 | http://hello.org\x00 |
  110. * type data
  111. *
  112. * "hello"
  113. *
  114. * 0 1 size=7
  115. * | \x03 | hello\x00 |
  116. * type data
  117. *
  118. * "hello"^^xsd:string
  119. *
  120. * 0 1 7 size=18
  121. * | \x03 | hello\x00 | xsd:string\x00 |
  122. * type data datatype
  123. *
  124. * (note: the "xsd:" prefix is used for simplification here, it would be
  125. * normally be a fully qualified URI)
  126. *
  127. * "hello"@en-US
  128. *
  129. * 0 1 7 18 size=26
  130. * | \x03 | hello\x00 | xsd:string\x00 | en-US\x00\x00\x00 |
  131. * type data datatype lang
  132. */
  133. LSUP_rc
  134. LSUP_term_serialize (const LSUP_Term *term, LSUP_Buffer *sterm)
  135. {
  136. size_t size, data_len, datatype_len = 0,
  137. data_idx = 1, datatype_idx = 0, lang_idx = 0;
  138. if (UNLIKELY (term == NULL)) return LSUP_NOACTION;
  139. data_len = strlen (term->data) + 1;
  140. size = data_idx + data_len;
  141. if (term->datatype != NULL) {
  142. datatype_idx = size;
  143. datatype_len = strlen (term->datatype) + 1;
  144. size += datatype_len;
  145. if (strlen (term->lang) > 0) {
  146. lang_idx = size;
  147. size += LANG_SIZE;
  148. }
  149. }
  150. //TRACE ("Serialized term size: %lu", size);
  151. LSUP_buffer_init (sterm, size, NULL);
  152. // Copy type.
  153. memcpy (sterm->addr, &term->type, 1);
  154. // Copy data.
  155. memcpy (sterm->addr + data_idx, term->data, data_len);
  156. if (term->datatype != NULL) {
  157. // Copy data type.
  158. memcpy (sterm->addr + datatype_idx, term->datatype, datatype_len);
  159. // Copy lang tag.
  160. if (strlen (term->lang) > 0)
  161. memcpy (sterm->addr + lang_idx, term->lang, LANG_SIZE);
  162. }
  163. return LSUP_OK;
  164. }
  165. LSUP_rc
  166. LSUP_term_deserialize (const LSUP_Buffer *sterm, LSUP_Term *term)
  167. {
  168. size_t cur;
  169. char *data, *datatype = NULL;
  170. langtag lang = "\00";
  171. char type = ((char*)(sterm->addr))[0];
  172. cur = 1;
  173. data = (char*)sterm->addr + cur;
  174. cur += strlen (data) + 1;
  175. if (type == LSUP_TERM_LITERAL && cur < sterm->size) {
  176. datatype = (char*)sterm->addr + cur;
  177. cur += strlen (datatype) + 1;
  178. if (strlen (datatype) == 0)
  179. datatype = NULL;
  180. if (cur < sterm->size)
  181. strcpy (lang, sterm->addr + cur);
  182. }
  183. return LSUP_term_init (term, type, data, datatype, lang);
  184. }
  185. bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2)
  186. {
  187. if (term1->type != term2->type)
  188. return false;
  189. if (strcmp (term1->data, term2->data) != 0)
  190. return false;
  191. if (term1->type == LSUP_TERM_LITERAL) {
  192. if ((term1->datatype == NULL) != (term2->datatype == NULL)) // XOR
  193. return false;
  194. if (
  195. term1->datatype != NULL &&
  196. strcmp (term1->datatype, term2->datatype) != 0)
  197. return false;
  198. if ((term1->lang == NULL) != (term2->lang == NULL)) // XOR
  199. return false;
  200. if (
  201. term1->lang != NULL &&
  202. strcmp (term1->lang, term2->lang) != 0)
  203. return false;
  204. }
  205. return true;
  206. }
  207. void LSUP_term_done (LSUP_Term *term)
  208. {
  209. if (LIKELY (term->data != NULL)) {
  210. free (term->data);
  211. term->data = NULL;
  212. }
  213. if (term->datatype != NULL) {
  214. free (term->datatype);
  215. term->datatype = NULL;
  216. }
  217. }
  218. void LSUP_term_free (LSUP_Term *term)
  219. {
  220. if (LIKELY (term != NULL)) {
  221. LSUP_term_done (term);
  222. free (term);
  223. term = NULL;
  224. }
  225. }
  226. // Extern inline functions.
  227. LSUP_Key LSUP_sterm_to_key (const LSUP_Buffer *sterm);
  228. LSUP_Key LSUP_term_to_key (const LSUP_Term *term);