grammar_ttl.y 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. %include {
  2. /** @brief Lemon parser grammar for Turtle.
  3. *
  4. * The `lemon' parser generator executable must be in your PATH:
  5. * https://sqlite.org/src/doc/trunk/doc/lemon.html
  6. *
  7. * To generate the parser, run: `make parsers'
  8. *
  9. * TTL EBNF: https://www.w3.org/TeamSubmission/turtle/#sec-grammar-grammar
  10. */
  11. #include "codec.h"
  12. DEFINE XSD_PFX "http://www.w3.org/2001/XMLSchema#" // FIXME
  13. }
  14. %name TTLParse
  15. %stack_overflow {
  16. log_error ("Stack oveflow in TTL parsing. Please jettison the parser.";
  17. }
  18. %parse_failure {
  19. log_error ("TTL parse error. Cannot continue.");
  20. }
  21. %stack_size CHUNK_SIZE
  22. %syntax_error {
  23. //UNUSED_PARAMETER (yymajor); /* Silence some compiler warnings */
  24. if (TOKEN.z[0]) log_error ("near \"%T\": syntax error", &TOKEN);
  25. else log_error ("incomplete input");
  26. }
  27. %token_prefix "T_"
  28. %token_type { uint8_t * }
  29. %token_destructor { free ($$); }
  30. /* NULL-terminated array of object term handles. */
  31. %type objList { LSUP_Term ** }
  32. %destructor objList {
  33. for (size_t i = 0; $$[i]; i++) {
  34. LSUP_term_free ($$[i]);
  35. }
  36. }
  37. %default_type { uint8_t * }
  38. %extra_argument { LSUP_TTLParserState *state }
  39. /*
  40. * Rules.
  41. */
  42. turtleDoc ::= statements EOF .
  43. statements ::= statements statement .
  44. statements ::= .
  45. statement ::= directive ows EOS .
  46. statement ::= triples ows EOS .
  47. directive ::= prefixID .
  48. directive ::= base .
  49. prefixID ::= PREFIX WS PFX_NAME(P) COLON IRIREF(N) . {
  50. LSUP_nsmap_add (state->nsm, P, N);
  51. }
  52. prefixID ::= PREFIX WS COLON IRIREF(N) . {
  53. LSUP_nsmap_add (state->nsm, "", N);
  54. }
  55. base ::= BASE WS IRIREF(D) . {
  56. state->base = LSUP_iriref_new (D, NULL);
  57. }
  58. triples ::= subject(S) predObjList(L) . {
  59. LSUP_spo_list_add_triples (state->it, S, L);
  60. LSUP_term_free (S);
  61. LSUP_pred_obj_list_free (L);
  62. }
  63. %type predObjList { PredObjList * }
  64. %destructor predObjList { LSUP_pred_obj_list_free ($$); }
  65. predObjList(A) ::= predObjList(A) SEMICOLON predicate(P) objectList(O) . {
  66. return LSUP_pred_obj_list_add (A, P, O);
  67. }
  68. predObjList(A) ::= predicate(P) objectList(O) . {
  69. A = LSUP_pred_ob_list_new();
  70. return LSUP_pred_obj_list_add (A, P, O);
  71. }
  72. predObjList ::= predObjList SEMICOLON .
  73. %type objectList { LSUP_Term ** }
  74. objectList(A) ::= objectList(L) COMMA object(O) . {
  75. A = LSUP_obj_list_add (L, O);
  76. }
  77. objectList(A) ::= object(O) . {
  78. A = calloc (sizeof (*A) * 2);
  79. if (UNLIKELY (!A)) return LSUP_MEM_ERR; // TODO error handling
  80. A[0] = O;
  81. }
  82. subject ::= resource .
  83. subject ::= blank .
  84. predicate ::= resource .
  85. predicate(A)::= RDF_TYPE . { A = LSUP_iriref_new ("rdf:type", state->nsm); }
  86. object ::= resource .
  87. object ::= blank .
  88. object ::= literal .
  89. %type literal { LSUP_Term * }
  90. //%destructor literal { LSUP_term_free ($$); } // Destroyed with PO list.
  91. literal(A) ::= STRING(D) . {
  92. A = LSUP_term_new (LSUP_TERM_LITERAL, D, NULL);
  93. }
  94. literal(A) ::= STRING(D) LANGTAG(L) . {
  95. A = LSUP_term_new (LSUP_TERM_LT_LITERAL, D, L);
  96. }
  97. literal(A) ::= STRING(D) DTYPE_MARKER resource(M) . {
  98. A = LSUP_term_new (LSUP_TERM_LITERAL, D, M);
  99. }
  100. literal(A) ::= INTEGER(D) . {
  101. A = LSUP_term_new (
  102. LSUP_TERM_LITERAL, D,
  103. LSUP_iriref_new ("xsd:integer", state->nsm));
  104. }
  105. literal(A) ::= DOUBLE(D) . {
  106. A = LSUP_term_new (
  107. LSUP_TERM_LITERAL, D,
  108. LSUP_iriref_new ("xsd:double", state->nsm));
  109. }
  110. literal(A) ::= DECIMAL(D) . {
  111. A = LSUP_term_new (
  112. LSUP_TERM_LITERAL, D,
  113. LSUP_iriref_new ("xsd:decimal", state->nsm));
  114. }
  115. literal(A) ::= BOOLEAN(D) . {
  116. A = LSUP_term_new (
  117. LSUP_TERM_LITERAL, D,
  118. LSUP_iriref_new ("xsd:boolean", state->nsm));
  119. }
  120. blank(A) ::= nodeID(D) . {
  121. A = LSUP_term_new (LSUP_TERM_BNODE, D, NULL);
  122. }
  123. blank(A) ::= LBRACKET ows RBRACKET . {
  124. A = LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL);
  125. }
  126. blank(A) ::= LBRACKET predObjList(L) RBRACKET . {
  127. A = LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL);
  128. LSUP_spo_list_add_triples (A, L);
  129. LSUP_pred_obj_list_free (L);
  130. }
  131. blank ::= collection .
  132. blank(A) ::= LPAREN ows RPAREN . {
  133. A = LSUP_iriref_new ("rdf:nil", state->nsm);
  134. }
  135. // "collection" is the subject of the first collection item.
  136. %type collection { LSUP_Term * }
  137. // Collection triples are added here to the graph.
  138. collection(A) ::= LPAREN ows itemList(L) ows RPAREN . {
  139. A = LSUP_bnode_add_collection (state->it, L);
  140. }
  141. %type itemList { LSUP_Term ** }
  142. // Freed when the item list in the collection gets added to the graph.
  143. %destructor itemList {}
  144. itemList(A) ::= itemList(L) WS object(O) . { A = LSUP_obj_list_add (L, O); }
  145. itemList(A) ::= object(O) . {
  146. A = calloc (sizeof (*A) * 2);
  147. if (UNLIKELY (!A)) return LSUP_MEM_ERR; // TODO error handling
  148. A[0] = O;
  149. }
  150. %type resource { LSUP_Term * }
  151. %destructor resource { LSUP_term_free ($$); }
  152. resource(A) ::= IRIREF(D) . {
  153. LSUP_Term rel_iri = LSUP_iriref_new (D, NULL);
  154. free (D);
  155. if (state->base) {
  156. A = LSUP_iriref_absolute (rel_iri, state->base);
  157. LSUP_term_free (rel_iri);
  158. } else {
  159. A = rel_iri;
  160. }
  161. }
  162. resource(A) ::= qname(D) . { A = LSUP_iriref_new (D, state->nsm); }
  163. qname(A) ::= PFX_NAME(P) COLON IDNAME(D) . {
  164. A = malloc (strlen (P) + strlen (D) + 2);
  165. sprintf (A, "%s:%s", P, D);
  166. }
  167. qname(A) ::= COLON IDNAME(D) . {
  168. A = malloc (strlen (D) + 2);
  169. sprintf (A, ":%s", D);
  170. }
  171. nodeID(A) ::= BNODE_PFX IDNAME(D) . { A = D; }
  172. ows ::= WS.
  173. ows ::=.
  174. /*
  175. * This has been adapted from
  176. * https://www.w3.org/TeamSubmission/turtle/#sec-grammar-grammar :
  177. [1] turtleDoc ::= statement*
  178. [2] statement ::= directive '.' | triples '.' | ws+
  179. [3] directive ::= prefixID | base
  180. [4] prefixID ::= '@prefix' ws+ prefixName? ':' uriref
  181. [5] base ::= '@base' ws+ uriref
  182. [6] triples ::= subject predicateObjectList
  183. [7] predicateObjectList ::= verb objectList ( ';' verb objectList )* ( ';')?
  184. [8] objectList ::= object ( ',' object)*
  185. [9] verb ::= predicate | 'a'
  186. [10] comment ::= '#' ( [^#xA#xD] )*
  187. [11] subject ::= resource | blank
  188. [12] predicate ::= resource
  189. [13] object ::= resource | blank | literal
  190. [14] literal ::= quotedString ( '@' language )? | datatypeString | integer | double | decimal | boolean
  191. [15] datatypeString ::= quotedString '^^' resource
  192. [16] integer ::= ('-' | '+') ? [0-9]+
  193. [17] double ::= ('-' | '+') ? ( [0-9]+ '.' [0-9]* exponent | '.' ([0-9])+ exponent | ([0-9])+ exponent )
  194. [18] decimal ::= ('-' | '+')? ( [0-9]+ '.' [0-9]* | '.' ([0-9])+ | ([0-9])+ )
  195. [19] exponent ::= [eE] ('-' | '+')? [0-9]+
  196. [20] boolean ::= 'true' | 'false'
  197. [21] blank ::= nodeID | '[]' | '[' predicateObjectList ']' | collection
  198. [22] itemList ::= object+
  199. [23] collection ::= '(' itemList? ')'
  200. [24] ws ::= #x9 | #xA | #xD | #x20 | comment
  201. [25] resource ::= uriref | qname
  202. [26] nodeID ::= '_:' name
  203. [27] qname ::= prefixName? ':' name?
  204. [28] uriref ::= '<' relativeURI '>'
  205. [29] language ::= [a-z]+ ('-' [a-z0-9]+ )*
  206. [30] nameStartChar ::= [A-Z] | "_" | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
  207. [31] nameChar ::= nameStartChar | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
  208. [32] name ::= nameStartChar nameChar*
  209. [33] prefixName ::= ( nameStartChar - '_' ) nameChar*
  210. [34] relativeURI ::= ucharacter*
  211. [35] quotedString ::= string | longString
  212. [36] string ::= #x22 scharacter* #x22
  213. [37] longString ::= #x22 #x22 #x22 lcharacter* #x22 #x22 #x22
  214. [38] character ::= '\u' hex hex hex hex |
  215. '\U' hex hex hex hex hex hex hex hex |
  216. '\\' |
  217. [#x20-#x5B] | [#x5D-#x10FFFF]
  218. [39] echaracter ::= character | '\t' | '\n' | '\r'
  219. [40] hex ::= [#x30-#x39] | [#x41-#x46]
  220. [41] ucharacter ::= ( character - #x3E ) | '\>'
  221. [42] scharacter ::= ( echaracter - #x22 ) | '\"'
  222. [43] lcharacter ::= echaracter | '\"' | #x9 | #xA | #xD
  223. */