grammar_ttl.y 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. %include {
  2. /** @brief Lemon parser grammar for Turtle.
  3. *
  4. * The `lemon' parser generator executable must be in your PATH:
  5. * https://sqlite.org/src/doc/trunk/doc/lemon.html
  6. *
  7. * To generate the parser, run: `make parsers'
  8. *
  9. * TTL EBNF: https://www.w3.org/TeamSubmission/turtle/#sec-grammar-grammar
  10. */
  11. #include "codec.h"
  12. }
  13. %name TTLParse
  14. %stack_overflow {
  15. log_error ("Stack oveflow in TTL parsing. Please jettison the parser.");
  16. }
  17. %parse_failure {
  18. log_error ("TTL parse error. Cannot continue.");
  19. }
  20. %stack_size CHUNK_SIZE
  21. %syntax_error {
  22. //UNUSED_PARAMETER (yymajor); /* Silence some compiler warnings */
  23. if (TOKEN[0]) log_error ("near \"%T\": syntax error", &TOKEN);
  24. else log_error ("incomplete input");
  25. }
  26. %token_prefix "T_"
  27. %token_type { char * }
  28. %token_destructor { free ($$); }
  29. /* NULL-terminated array of object term handles. */
  30. %type objList { LSUP_Term ** }
  31. %destructor objList {
  32. for (size_t i = 0; $$[i]; i++) {
  33. LSUP_term_free ($$[i]);
  34. }
  35. }
  36. %default_type { char * }
  37. %extra_argument { LSUP_TTLParserState *state }
  38. /*
  39. * Rules.
  40. */
  41. turtleDoc ::= statements EOF .
  42. statements ::= statements statement .
  43. statements ::= .
  44. statement ::= directive ows EOS .
  45. statement ::= triples ows EOS .
  46. directive ::= prefixID .
  47. directive ::= base .
  48. prefixID ::= PREFIX WS PFX_NAME(P) COLON IRIREF(N) . {
  49. LSUP_nsmap_add (state->nsm, P, N);
  50. }
  51. prefixID ::= PREFIX WS COLON IRIREF(N) . {
  52. LSUP_nsmap_add (state->nsm, "", N);
  53. }
  54. base ::= BASE WS IRIREF(D) . {
  55. state->base = LSUP_iriref_new (D, NULL);
  56. }
  57. triples ::= subject(S) predObjList(L) . {
  58. LSUP_spo_list_add_triples (state->it, S, L);
  59. LSUP_term_free (S);
  60. LSUP_pred_obj_list_free (L);
  61. }
  62. %type predObjList { LSUP_PredObjList * }
  63. %destructor predObjList { LSUP_pred_obj_list_free ($$); }
  64. predObjList(A) ::= predObjList(A) SEMICOLON predicate(P) objectList(O) . {
  65. return LSUP_pred_obj_list_add (A, P, O);
  66. }
  67. predObjList(A) ::= predicate(P) objectList(O) . {
  68. A = LSUP_pred_obj_list_new();
  69. return LSUP_pred_obj_list_add (A, P, O);
  70. }
  71. predObjList ::= predObjList SEMICOLON .
  72. %type objectList { LSUP_Term ** }
  73. objectList(A) ::= objectList(L) COMMA object(O) . {
  74. A = LSUP_obj_list_add (L, O);
  75. }
  76. objectList(A) ::= object(O) . {
  77. A = calloc (sizeof (*A), 2);
  78. //if (UNLIKELY (!A)) return LSUP_MEM_ERR; // TODO error handling
  79. A[0] = O;
  80. }
  81. %type subject { LSUP_Term * }
  82. subject ::= resource .
  83. subject ::= blank .
  84. %type predicate { LSUP_Term * }
  85. predicate ::= resource .
  86. predicate(A)::= RDF_TYPE . { A = LSUP_iriref_new ("rdf:type", state->nsm); }
  87. %type object { LSUP_Term * }
  88. object ::= resource .
  89. object ::= blank .
  90. object ::= literal .
  91. %type literal { LSUP_Term * }
  92. literal(A) ::= STRING(D) . {
  93. A = LSUP_term_new (LSUP_TERM_LITERAL, D, NULL);
  94. }
  95. literal(A) ::= STRING(D) LANGTAG(L) . {
  96. A = LSUP_term_new (LSUP_TERM_LT_LITERAL, D, L);
  97. }
  98. literal(A) ::= STRING(D) DTYPE_MARKER resource(M) . {
  99. A = LSUP_term_new (LSUP_TERM_LITERAL, D, M);
  100. }
  101. literal(A) ::= INTEGER(D) . {
  102. A = LSUP_term_new (
  103. LSUP_TERM_LITERAL, D,
  104. LSUP_iriref_new ("xsd:integer", state->nsm));
  105. }
  106. literal(A) ::= DOUBLE(D) . {
  107. A = LSUP_term_new (
  108. LSUP_TERM_LITERAL, D,
  109. LSUP_iriref_new ("xsd:double", state->nsm));
  110. }
  111. literal(A) ::= DECIMAL(D) . {
  112. A = LSUP_term_new (
  113. LSUP_TERM_LITERAL, D,
  114. LSUP_iriref_new ("xsd:decimal", state->nsm));
  115. }
  116. literal(A) ::= BOOLEAN(D) . {
  117. A = LSUP_term_new (
  118. LSUP_TERM_LITERAL, D,
  119. LSUP_iriref_new ("xsd:boolean", state->nsm));
  120. }
  121. %type blank { LSUP_Term * }
  122. blank(A) ::= nodeID(D) . {
  123. A = LSUP_term_new (LSUP_TERM_BNODE, D, NULL);
  124. }
  125. blank(A) ::= LBRACKET ows RBRACKET . {
  126. A = LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL);
  127. }
  128. blank(A) ::= LBRACKET predObjList(L) RBRACKET . {
  129. A = LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL);
  130. LSUP_spo_list_add_triples (state->it, A, L);
  131. LSUP_pred_obj_list_free (L);
  132. }
  133. blank ::= collection .
  134. blank(A) ::= LPAREN ows RPAREN . {
  135. A = LSUP_iriref_new ("rdf:nil", state->nsm);
  136. }
  137. // "collection" is the subject of the first collection item.
  138. %type collection { LSUP_Term * }
  139. // Collection triples are added here to the graph.
  140. collection(A) ::= LPAREN ows itemList(L) ows RPAREN . {
  141. A = LSUP_bnode_add_collection (state->it, L);
  142. }
  143. %type itemList { LSUP_Term ** }
  144. // Freed when the item list in the collection gets added to the graph.
  145. %destructor itemList {}
  146. itemList(A) ::= itemList(L) WS object(O) . { A = LSUP_obj_list_add (L, O); }
  147. itemList(A) ::= object(O) . {
  148. A = calloc (sizeof (*A), 2);
  149. //if (UNLIKELY (!A)) return LSUP_MEM_ERR; // TODO error handling
  150. A[0] = O;
  151. }
  152. %type resource { LSUP_Term * }
  153. resource(A) ::= IRIREF(D) . {
  154. LSUP_Term *rel_iri = LSUP_iriref_new (D, NULL);
  155. free (D);
  156. if (state->base) {
  157. A = LSUP_iriref_absolute (rel_iri, state->base);
  158. LSUP_term_free (rel_iri);
  159. } else {
  160. A = rel_iri;
  161. }
  162. }
  163. resource(A) ::= qname(D) . { A = LSUP_iriref_new (D, state->nsm); }
  164. qname(A) ::= PFX_NAME(P) COLON IDNAME(D) . {
  165. A = malloc (strlen (P) + strlen (D) + 2);
  166. sprintf (A, "%s:%s", P, D);
  167. }
  168. qname(A) ::= COLON IDNAME(D) . {
  169. A = malloc (strlen (D) + 2);
  170. sprintf (A, ":%s", D);
  171. }
  172. nodeID(A) ::= BNODE_PFX IDNAME(D) . { A = D; }
  173. ows ::= WS.
  174. ows ::=.
  175. /*
  176. * This has been adapted from
  177. * https://www.w3.org/TeamSubmission/turtle/#sec-grammar-grammar :
  178. [1] turtleDoc ::= statement*
  179. [2] statement ::= directive '.' | triples '.' | ws+
  180. [3] directive ::= prefixID | base
  181. [4] prefixID ::= '@prefix' ws+ prefixName? ':' uriref
  182. [5] base ::= '@base' ws+ uriref
  183. [6] triples ::= subject predicateObjectList
  184. [7] predicateObjectList ::= verb objectList ( ';' verb objectList )* ( ';')?
  185. [8] objectList ::= object ( ',' object)*
  186. [9] verb ::= predicate | 'a'
  187. [10] comment ::= '#' ( [^#xA#xD] )*
  188. [11] subject ::= resource | blank
  189. [12] predicate ::= resource
  190. [13] object ::= resource | blank | literal
  191. [14] literal ::= quotedString ( '@' language )? | datatypeString | integer | double | decimal | boolean
  192. [15] datatypeString ::= quotedString '^^' resource
  193. [16] integer ::= ('-' | '+') ? [0-9]+
  194. [17] double ::= ('-' | '+') ? ( [0-9]+ '.' [0-9]* exponent | '.' ([0-9])+ exponent | ([0-9])+ exponent )
  195. [18] decimal ::= ('-' | '+')? ( [0-9]+ '.' [0-9]* | '.' ([0-9])+ | ([0-9])+ )
  196. [19] exponent ::= [eE] ('-' | '+')? [0-9]+
  197. [20] boolean ::= 'true' | 'false'
  198. [21] blank ::= nodeID | '[]' | '[' predicateObjectList ']' | collection
  199. [22] itemList ::= object+
  200. [23] collection ::= '(' itemList? ')'
  201. [24] ws ::= #x9 | #xA | #xD | #x20 | comment
  202. [25] resource ::= uriref | qname
  203. [26] nodeID ::= '_:' name
  204. [27] qname ::= prefixName? ':' name?
  205. [28] uriref ::= '<' relativeURI '>'
  206. [29] language ::= [a-z]+ ('-' [a-z0-9]+ )*
  207. [30] nameStartChar ::= [A-Z] | "_" | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
  208. [31] nameChar ::= nameStartChar | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
  209. [32] name ::= nameStartChar nameChar*
  210. [33] prefixName ::= ( nameStartChar - '_' ) nameChar*
  211. [34] relativeURI ::= ucharacter*
  212. [35] quotedString ::= string | longString
  213. [36] string ::= #x22 scharacter* #x22
  214. [37] longString ::= #x22 #x22 #x22 lcharacter* #x22 #x22 #x22
  215. [38] character ::= '\u' hex hex hex hex |
  216. '\U' hex hex hex hex hex hex hex hex |
  217. '\\' |
  218. [#x20-#x5B] | [#x5D-#x10FFFF]
  219. [39] echaracter ::= character | '\t' | '\n' | '\r'
  220. [40] hex ::= [#x30-#x39] | [#x41-#x46]
  221. [41] ucharacter ::= ( character - #x3E ) | '\>'
  222. [42] scharacter ::= ( echaracter - #x22 ) | '\"'
  223. [43] lcharacter ::= echaracter | '\"' | #x9 | #xA | #xD
  224. */