|
@@ -96,8 +96,8 @@ static int lex (ParseIterator *it, YYCTYPE **token_p)
|
|
|
|
|
|
|
|
|
// Character classes.
|
|
|
- EOL = [\x0A\x0D];
|
|
|
- NCWS = [\x09\x20] | EOL;
|
|
|
+ EOL = [\n\r];
|
|
|
+ NCWS = [\t\x20] | EOL;
|
|
|
HEX = [\x30-\x39\x41-\x46];
|
|
|
CHAR_BASE = "\\u" HEX{4} | "\\U" HEX{8} | '\\'
|
|
|
| [\U0000005D-\U0010FFFF];
|
|
@@ -117,18 +117,10 @@ static int lex (ParseIterator *it, YYCTYPE **token_p)
|
|
|
LCHAR = ECHAR | ([\\] ["]) | [\t\n\r];
|
|
|
|
|
|
// Constructs.
|
|
|
- COMMENT = '#' ( [^\x0A\x0D] )*;
|
|
|
+ COMMENT = '#' ( [^\n\r] )*;
|
|
|
WS = NCWS+ | COMMENT;
|
|
|
INTEGER = ('-' | '+')? [0-9]+;
|
|
|
EXPONENT = [eE] INTEGER;
|
|
|
- LANGUAGE = [a-z]+ ('-' [a-z0-9]+)*;
|
|
|
- REL_IRI = UCHAR*;
|
|
|
- IRIREF = '<' REL_IRI '>';
|
|
|
- NAME = NSTART_CHAR NAME_CHAR*;
|
|
|
- PFX = PSTART_CHAR NAME_CHAR* ':';
|
|
|
- //LSTRING = [\x22]{3} LCHAR* [\x22]{3};
|
|
|
- STRING = [\x22] SCHAR* [\x22];
|
|
|
- LANGTAG = '@' LANGUAGE;
|
|
|
DOUBLE = ('-' | '+') ? ([0-9]+ '.' [0-9]* EXPONENT
|
|
|
| '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT);
|
|
|
DECIMAL = ('-' | '+')?
|
|
@@ -168,7 +160,7 @@ cont: // Continue token parsing.
|
|
|
return T_BOOLEAN;
|
|
|
}
|
|
|
|
|
|
- IRIREF {
|
|
|
+ '<' UCHAR* '>' {
|
|
|
*token_p = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
|
|
|
log_trace ("URI data: %s", *token_p);
|
|
|
|
|
@@ -180,14 +172,14 @@ cont: // Continue token parsing.
|
|
|
return T_RDF_TYPE;
|
|
|
}
|
|
|
|
|
|
- NAME {
|
|
|
+ NSTART_CHAR NAME_CHAR* {
|
|
|
*token_p = unescape_unicode (it->tok, YYCURSOR - it->tok);
|
|
|
- log_trace ("name: %s", *token_p);
|
|
|
+ log_trace ("ID name: %s", *token_p);
|
|
|
|
|
|
return T_IDNAME;
|
|
|
}
|
|
|
|
|
|
- PFX {
|
|
|
+ PSTART_CHAR NAME_CHAR* ':' {
|
|
|
*token_p = uint8_ndup (it->tok, YYCURSOR - it->tok - 1);
|
|
|
log_trace ("Prefix name: '%s'", *token_p);
|
|
|
|
|
@@ -196,7 +188,7 @@ cont: // Continue token parsing.
|
|
|
|
|
|
[\x22]{3} { goto lchar; }
|
|
|
|
|
|
- STRING {
|
|
|
+ [\x22] SCHAR* [\x22] {
|
|
|
*token_p = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
|
|
|
log_trace ("Long string: %s", *token_p);
|
|
|
|
|
@@ -208,7 +200,7 @@ cont: // Continue token parsing.
|
|
|
return T_WS;
|
|
|
}
|
|
|
|
|
|
- LANGTAG {
|
|
|
+ '@' [a-z]+ ('-' [a-z0-9]+)* {
|
|
|
*token_p = uint8_ndup (it->tok + 1, YYCURSOR - it->tok);
|
|
|
log_trace ("Lang tag: %s", *token_p);
|
|
|
|