123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265 |
- %include {
- /** @brief Lemon parser grammar for Turtle.
- *
- * The `lemon' parser generator executable must be in your PATH:
- * https://sqlite.org/src/doc/trunk/doc/lemon.html
- *
- * To generate the parser, run: `make parsers'
- *
- * TTL EBNF: https://www.w3.org/TeamSubmission/turtle/#sec-grammar-grammar
- */
- #include "codec.h"
- DEFINE XSD_PFX "http://www.w3.org/2001/XMLSchema#" // FIXME
- }
- %name TTLParse
- %stack_overflow {
- log_error ("Stack oveflow in TTL parsing. Please jettison the parser.";
- }
- %parse_failure {
- log_error ("TTL parse error. Cannot continue.");
- }
- %stack_size CHUNK_SIZE
- %syntax_error {
- //UNUSED_PARAMETER (yymajor); /* Silence some compiler warnings */
- if (TOKEN.z[0]) log_error ("near \"%T\": syntax error", &TOKEN);
- else log_error ("incomplete input");
- }
- %token_prefix "T_"
- %token_type { uint8_t * }
- %token_destructor { free ($$); }
- /* NULL-terminated array of object term handles. */
- %type objList { LSUP_Term ** }
- %destructor objList {
- for (size_t i = 0; $$[i]; i++) {
- LSUP_term_free ($$[i]);
- }
- }
- %default_type { uint8_t * }
- %extra_argument { LSUP_TTLParserState *state }
- /*
- * Rules.
- */
- turtleDoc ::= statements EOF .
- statements ::= statements statement .
- statements ::= .
- statement ::= directive ows EOS .
- statement ::= triples ows EOS .
- directive ::= prefixID .
- directive ::= base .
- prefixID ::= PREFIX WS PFX_NAME(P) COLON IRIREF(N) . {
- LSUP_nsmap_add (state->nsm, P, N);
- }
- prefixID ::= PREFIX WS COLON IRIREF(N) . {
- LSUP_nsmap_add (state->nsm, "", N);
- }
- base ::= BASE WS IRIREF(D) . {
- state->base = LSUP_iriref_new (D, NULL);
- }
- triples ::= subject(S) predObjList(L) . {
- LSUP_spo_list_add_triples (state->it, S, L);
- LSUP_term_free (S);
- LSUP_pred_obj_list_free (L);
- }
- %type predObjList { PredObjList * }
- %destructor predObjList { LSUP_pred_obj_list_free ($$); }
- predObjList(A) ::= predObjList(A) SEMICOLON predicate(P) objectList(O) . {
- return LSUP_pred_obj_list_add (A, P, O);
- }
- predObjList(A) ::= predicate(P) objectList(O) . {
- A = LSUP_pred_ob_list_new();
- return LSUP_pred_obj_list_add (A, P, O);
- }
- predObjList ::= predObjList SEMICOLON .
- %type objectList { LSUP_Term ** }
- objectList(A) ::= objectList(L) COMMA object(O) . {
- A = LSUP_obj_list_add (L, O);
- }
- objectList(A) ::= object(O) . {
- A = calloc (sizeof (*A) * 2);
- if (UNLIKELY (!A)) return LSUP_MEM_ERR; // TODO error handling
- A[0] = O;
- }
- subject ::= resource .
- subject ::= blank .
- predicate ::= resource .
- predicate(A)::= RDF_TYPE . { A = LSUP_iriref_new ("rdf:type", state->nsm); }
- object ::= resource .
- object ::= blank .
- object ::= literal .
- %type literal { LSUP_Term * }
- //%destructor literal { LSUP_term_free ($$); } // Destroyed with PO list.
- literal(A) ::= STRING(D) . {
- A = LSUP_term_new (LSUP_TERM_LITERAL, D, NULL);
- }
- literal(A) ::= STRING(D) LANGTAG(L) . {
- A = LSUP_term_new (LSUP_TERM_LT_LITERAL, D, L);
- }
- literal(A) ::= STRING(D) DTYPE_MARKER resource(M) . {
- A = LSUP_term_new (LSUP_TERM_LITERAL, D, M);
- }
- literal(A) ::= INTEGER(D) . {
- A = LSUP_term_new (
- LSUP_TERM_LITERAL, D,
- LSUP_iriref_new ("xsd:integer", state->nsm));
- }
- literal(A) ::= DOUBLE(D) . {
- A = LSUP_term_new (
- LSUP_TERM_LITERAL, D,
- LSUP_iriref_new ("xsd:double", state->nsm));
- }
- literal(A) ::= DECIMAL(D) . {
- A = LSUP_term_new (
- LSUP_TERM_LITERAL, D,
- LSUP_iriref_new ("xsd:decimal", state->nsm));
- }
- literal(A) ::= BOOLEAN(D) . {
- A = LSUP_term_new (
- LSUP_TERM_LITERAL, D,
- LSUP_iriref_new ("xsd:boolean", state->nsm));
- }
- blank(A) ::= nodeID(D) . {
- A = LSUP_term_new (LSUP_TERM_BNODE, D, NULL);
- }
- blank(A) ::= LBRACKET ows RBRACKET . {
- A = LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL);
- }
- blank(A) ::= LBRACKET predObjList(L) RBRACKET . {
- A = LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL);
- LSUP_spo_list_add_triples (A, L);
- LSUP_pred_obj_list_free (L);
- }
- blank ::= collection .
- blank(A) ::= LPAREN ows RPAREN . {
- A = LSUP_iriref_new ("rdf:nil", state->nsm);
- }
- // "collection" is the subject of the first collection item.
- %type collection { LSUP_Term * }
- // Collection triples are added here to the graph.
- collection(A) ::= LPAREN ows itemList(L) ows RPAREN . {
- A = LSUP_bnode_add_collection (state->it, L);
- }
- %type itemList { LSUP_Term ** }
- // Freed when the item list in the collection gets added to the graph.
- %destructor itemList {}
- itemList(A) ::= itemList(L) WS object(O) . { A = LSUP_obj_list_add (L, O); }
- itemList(A) ::= object(O) . {
- A = calloc (sizeof (*A) * 2);
- if (UNLIKELY (!A)) return LSUP_MEM_ERR; // TODO error handling
- A[0] = O;
- }
- %type resource { LSUP_Term * }
- %destructor resource { LSUP_term_free ($$); }
- resource(A) ::= IRIREF(D) . {
- LSUP_Term rel_iri = LSUP_iriref_new (D, NULL);
- free (D);
- if (state->base) {
- A = LSUP_iriref_absolute (rel_iri, state->base);
- LSUP_term_free (rel_iri);
- } else {
- A = rel_iri;
- }
- }
- resource(A) ::= qname(D) . { A = LSUP_iriref_new (D, state->nsm); }
- qname(A) ::= PFX_NAME(P) COLON IDNAME(D) . {
- A = malloc (strlen (P) + strlen (D) + 2);
- sprintf (A, "%s:%s", P, D);
- }
- qname(A) ::= COLON IDNAME(D) . {
- A = malloc (strlen (D) + 2);
- sprintf (A, ":%s", D);
- }
- nodeID(A) ::= BNODE_PFX IDNAME(D) . { A = D; }
- ows ::= WS.
- ows ::=.
- /*
- * This has been adapted from
- * https://www.w3.org/TeamSubmission/turtle/#sec-grammar-grammar :
- [1] turtleDoc ::= statement*
- [2] statement ::= directive '.' | triples '.' | ws+
- [3] directive ::= prefixID | base
- [4] prefixID ::= '@prefix' ws+ prefixName? ':' uriref
- [5] base ::= '@base' ws+ uriref
- [6] triples ::= subject predicateObjectList
- [7] predicateObjectList ::= verb objectList ( ';' verb objectList )* ( ';')?
- [8] objectList ::= object ( ',' object)*
- [9] verb ::= predicate | 'a'
- [10] comment ::= '#' ( [^#xA#xD] )*
- [11] subject ::= resource | blank
- [12] predicate ::= resource
- [13] object ::= resource | blank | literal
- [14] literal ::= quotedString ( '@' language )? | datatypeString | integer | double | decimal | boolean
- [15] datatypeString ::= quotedString '^^' resource
- [16] integer ::= ('-' | '+') ? [0-9]+
- [17] double ::= ('-' | '+') ? ( [0-9]+ '.' [0-9]* exponent | '.' ([0-9])+ exponent | ([0-9])+ exponent )
- [18] decimal ::= ('-' | '+')? ( [0-9]+ '.' [0-9]* | '.' ([0-9])+ | ([0-9])+ )
- [19] exponent ::= [eE] ('-' | '+')? [0-9]+
- [20] boolean ::= 'true' | 'false'
- [21] blank ::= nodeID | '[]' | '[' predicateObjectList ']' | collection
- [22] itemList ::= object+
- [23] collection ::= '(' itemList? ')'
- [24] ws ::= #x9 | #xA | #xD | #x20 | comment
- [25] resource ::= uriref | qname
- [26] nodeID ::= '_:' name
- [27] qname ::= prefixName? ':' name?
- [28] uriref ::= '<' relativeURI '>'
- [29] language ::= [a-z]+ ('-' [a-z0-9]+ )*
- [30] nameStartChar ::= [A-Z] | "_" | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
- [31] nameChar ::= nameStartChar | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
- [32] name ::= nameStartChar nameChar*
- [33] prefixName ::= ( nameStartChar - '_' ) nameChar*
- [34] relativeURI ::= ucharacter*
- [35] quotedString ::= string | longString
- [36] string ::= #x22 scharacter* #x22
- [37] longString ::= #x22 #x22 #x22 lcharacter* #x22 #x22 #x22
- [38] character ::= '\u' hex hex hex hex |
- '\U' hex hex hex hex hex hex hex hex |
- '\\' |
- [#x20-#x5B] | [#x5D-#x10FFFF]
- [39] echaracter ::= character | '\t' | '\n' | '\r'
- [40] hex ::= [#x30-#x39] | [#x41-#x46]
- [41] ucharacter ::= ( character - #x3E ) | '\>'
- [42] scharacter ::= ( echaracter - #x22 ) | '\"'
- [43] lcharacter ::= echaracter | '\"' | #x9 | #xA | #xD
- */
|