%include { /** @brief Lemon parser grammar for Turtle. * * The `lemon' parser generator executable must be in your PATH: * https://sqlite.org/src/doc/trunk/doc/lemon.html * * To generate the parser, run: `make parsers' * * TTL EBNF: https://www.w3.org/TeamSubmission/turtle/#sec-grammar-grammar */ #include "codec.h" } %name TTLParse %stack_overflow { log_error ("Stack oveflow in TTL parsing. Please jettison the parser."); } %parse_failure { log_error ("TTL parse error. Cannot continue."); } %stack_size CHUNK_SIZE /* %syntax_error { if (TOKEN[0]) log_error ("near \"%T\": syntax error", &TOKEN); else log_error ("incomplete input"); } */ %token_prefix "T_" %token_type { char * } %token_destructor { free ($$); } /* NULL-terminated array of object term handles. */ %type objList { LSUP_Term ** } %destructor objList { for (size_t i = 0; $$[i]; i++) { LSUP_term_free ($$[i]); } } %default_type { char * } %extra_argument { LSUP_TTLParserState *state } %left WS . %left EOS . %left SEMICOLON . %left COMMA . %nonassoc PFX . %nonassoc COLON . /* * Rules. */ turtleDoc ::= statements EOF . statements ::= statements statement . statements ::= . statement ::= directive ows EOS . statement ::= triples ows EOS . directive ::= prefixID . directive ::= base . prefixID ::= PREFIX WS PFX(P) ows IRIREF(N) . { LSUP_nsmap_add (state->nsm, P, N); } prefixID ::= PREFIX WS COLON ows IRIREF(N) . { LSUP_nsmap_add (state->nsm, "", N); } base ::= BASE WS IRIREF(D) . { state->base = LSUP_iriref_new (D, NULL); } // WS before predicate is optional because pred has leading WS already. triples ::= subject(S) predObjList(L) . [EOS] { size_t ct = LSUP_spo_list_add_triples (state->it, S, L); state->ct += ct; log_trace ("Added %lu triples.", ct); LSUP_term_free (S); LSUP_pred_obj_list_free (L); } %type predObjList { LSUP_PredObjList * } %destructor predObjList { LSUP_pred_obj_list_free ($$); } predObjList(A) ::= predicate(P) WS objectList(O) . [SEMICOLON] { A = LSUP_pred_obj_list_new(); LSUP_pred_obj_list_add (A, P, O); } predObjList(A) ::= predObjList(L) SEMICOLON predicate(P) WS objectList(O) . [SEMICOLON] { LSUP_pred_obj_list_add (L, P, O); A = L; } %type objectList { LSUP_Term ** } objectList(A) ::= objectList(L) COMMA object(O) . [COMMA] { A = LSUP_obj_list_add (L, O); } objectList(A) ::= object(O) . [COMMA] { A = calloc (sizeof (*A), 2); A[0] = O; } %type subject { LSUP_Term * } subject ::= resource . subject ::= blank . %type predicate { LSUP_Term * } // Leading WS is counted as part of the RDF_TYPE ('a') token. predicate ::= resource . predicate(A)::= RDF_TYPE . { A = LSUP_iriref_new ("rdf:type", state->nsm); } %type object { LSUP_Term * } object ::= resource . object ::= blank . object ::= literal . %type literal { LSUP_Term * } literal(A) ::= STRING(D) . { A = LSUP_term_new (LSUP_TERM_LITERAL, D, NULL); } literal(A) ::= STRING(D) LANGTAG(L) . { A = LSUP_term_new (LSUP_TERM_LT_LITERAL, D, L); } literal(A) ::= STRING(D) DTYPE_MARKER resource(M) . { A = LSUP_term_new (LSUP_TERM_LITERAL, D, M); } literal(A) ::= INTEGER(D) . { A = LSUP_term_new ( LSUP_TERM_LITERAL, D, LSUP_iriref_new ("xsd:integer", state->nsm)); } literal(A) ::= DOUBLE(D) . { A = LSUP_term_new ( LSUP_TERM_LITERAL, D, LSUP_iriref_new ("xsd:double", state->nsm)); } literal(A) ::= DECIMAL(D) . { A = LSUP_term_new ( LSUP_TERM_LITERAL, D, LSUP_iriref_new ("xsd:decimal", state->nsm)); } literal(A) ::= BOOLEAN(D) . { A = LSUP_term_new ( LSUP_TERM_LITERAL, D, LSUP_iriref_new ("xsd:boolean", state->nsm)); } %type blank { LSUP_Term * } blank(A) ::= nodeID(D) . { A = LSUP_term_new (LSUP_TERM_BNODE, D, NULL); } blank(A) ::= LBRACKET ows RBRACKET . { A = LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL); } blank(A) ::= LBRACKET ows predObjList(L) ows RBRACKET . { A = LSUP_term_new (LSUP_TERM_BNODE, NULL, NULL); state->ct += LSUP_spo_list_add_triples (state->it, A, L); LSUP_pred_obj_list_free (L); } blank ::= collection . blank(A) ::= LPAREN ows RPAREN . { A = LSUP_iriref_new ("rdf:nil", state->nsm); } // "collection" is the subject of the first collection item. %type collection { LSUP_Term * } // Collection triples are added here to the graph. collection(A) ::= LPAREN ows itemList(L) ows RPAREN . { A = LSUP_bnode_add_collection (state->it, L); } %type itemList { LSUP_Term ** } // Freed when the item list in the collection gets added to the graph. %destructor itemList {} itemList(A) ::= itemList(L) WS object(O) . { A = LSUP_obj_list_add (L, O); } itemList(A) ::= object(O) . { A = calloc (sizeof (*A), 2); A[0] = O; } %type resource { LSUP_Term * } resource(A) ::= IRIREF(D) . { LSUP_Term *rel_iri = LSUP_iriref_new (D, NULL); free (D); if (state->base) { A = LSUP_iriref_absolute (rel_iri, state->base); LSUP_term_free (rel_iri); } else { A = rel_iri; } } resource(A) ::= qname(D) . { A = LSUP_iriref_new (D, state->nsm); } qname(A) ::= PFX(P) IDNAME(D) . { A = malloc (strlen (P) + strlen (D) + 2); sprintf (A, "%s:%s", P, D); } qname(A) ::= COLON IDNAME(D) . { A = malloc (strlen (D) + 2); sprintf (A, ":%s", D); } qname(A) ::= COLON . { A = strndup (":", 2); } nodeID(A) ::= BNODE_PFX IDNAME(D) . { A = D; } ows ::= WS. ows ::=. /* * This has been adapted from * https://www.w3.org/TeamSubmission/turtle/#sec-grammar-grammar : [1] turtleDoc ::= statement* [2] statement ::= directive '.' | triples '.' | ws+ [3] directive ::= prefixID | base [4] prefixID ::= '@prefix' ws+ prefixName? ':' uriref [5] base ::= '@base' ws+ uriref [6] triples ::= subject predicateObjectList [7] predicateObjectList ::= verb objectList ( ';' verb objectList )* ( ';')? [8] objectList ::= object ( ',' object)* [9] verb ::= predicate | 'a' [10] comment ::= '#' ( [^#xA#xD] )* [11] subject ::= resource | blank [12] predicate ::= resource [13] object ::= resource | blank | literal [14] literal ::= quotedString ( '@' language )? | datatypeString | integer | double | decimal | boolean [15] datatypeString ::= quotedString '^^' resource [16] integer ::= ('-' | '+') ? [0-9]+ [17] double ::= ('-' | '+') ? ( [0-9]+ '.' [0-9]* exponent | '.' ([0-9])+ exponent | ([0-9])+ exponent ) [18] decimal ::= ('-' | '+')? ( [0-9]+ '.' [0-9]* | '.' ([0-9])+ | ([0-9])+ ) [19] exponent ::= [eE] ('-' | '+')? [0-9]+ [20] boolean ::= 'true' | 'false' [21] blank ::= nodeID | '[]' | '[' predicateObjectList ']' | collection [22] itemList ::= object+ [23] collection ::= '(' itemList? ')' [24] ws ::= #x9 | #xA | #xD | #x20 | comment [25] resource ::= uriref | qname [26] nodeID ::= '_:' name [27] qname ::= prefixName? ':' name? [28] uriref ::= '<' relativeURI '>' [29] language ::= [a-z]+ ('-' [a-z0-9]+ )* [30] nameStartChar ::= [A-Z] | "_" | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] [31] nameChar ::= nameStartChar | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] [32] name ::= nameStartChar nameChar* [33] prefixName ::= ( nameStartChar - '_' ) nameChar* [34] relativeURI ::= ucharacter* [35] quotedString ::= string | longString [36] string ::= #x22 scharacter* #x22 [37] longString ::= #x22 #x22 #x22 lcharacter* #x22 #x22 #x22 [38] character ::= '\u' hex hex hex hex | '\U' hex hex hex hex hex hex hex hex | '\\' | [#x20-#x5B] | [#x5D-#x10FFFF] [39] echaracter ::= character | '\t' | '\n' | '\r' [40] hex ::= [#x30-#x39] | [#x41-#x46] [41] ucharacter ::= ( character - #x3E ) | '\>' [42] scharacter ::= ( echaracter - #x22 ) | '\"' [43] lcharacter ::= echaracter | '\"' | #x9 | #xA | #xD */