|
@@ -42,6 +42,19 @@ typedef struct {
|
|
|
} ParseIterator;
|
|
|
|
|
|
|
|
|
+// TODO The opposite of this is in codec_nt.c. Find a better place for both.
|
|
|
+static inline char unescape_char(const char c) {
|
|
|
+ switch (c) {
|
|
|
+ case 't': return '\t';
|
|
|
+ case 'b': return '\b';
|
|
|
+ case 'n': return '\n';
|
|
|
+ case 'r': return '\r';
|
|
|
+ case 'f': return '\f';
|
|
|
+ default: return c;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
static int fill(ParseIterator *it)
|
|
|
{
|
|
|
if (it->eof) {
|
|
@@ -107,31 +120,40 @@ static YYCTYPE *unescape_unicode (const YYCTYPE *esc_str, size_t size)
|
|
|
size_t j = 0;
|
|
|
YYCTYPE tmp_chr[5];
|
|
|
for (size_t i = 0; i < size;) {
|
|
|
- if (memcmp (esc_str + i, "\\u", 2) == 0) {
|
|
|
- i += 2; // backslash + 'u'
|
|
|
+ if (esc_str[i] == '\\') {
|
|
|
+ i++; // Skip over '\\'
|
|
|
+
|
|
|
+ // 4-hex sequence.
|
|
|
+ if (esc_str[i] == 'u') {
|
|
|
+ i ++; // Skip over 'u'
|
|
|
+
|
|
|
+ // Use tmp_chr to hold the hex string for the code point.
|
|
|
+ memcpy(tmp_chr, esc_str + i, sizeof (tmp_chr) - 1);
|
|
|
+ tmp_chr[4] = '\0';
|
|
|
|
|
|
- // Use tmp_chr to hold the hex string representing the code point.
|
|
|
- memcpy(tmp_chr, esc_str + i, sizeof (tmp_chr) - 1);
|
|
|
- tmp_chr[4] = '\0';
|
|
|
+ uint32_t tmp_val = strtol ((char*)tmp_chr, NULL, 16);
|
|
|
+ TRACE ("tmp_val: %d\n", tmp_val);
|
|
|
|
|
|
- uint32_t tmp_val = strtol ((char*)tmp_chr, NULL, 16);
|
|
|
- TRACE ("tmp_val: %d\n", tmp_val);
|
|
|
+ // Reuse tmp_chr to hold the byte values for the code point.
|
|
|
+ int nbytes = utf8_encode (tmp_val, tmp_chr);
|
|
|
|
|
|
- // Reuse tmp_chr to hold the byte values for the code point.
|
|
|
- int nbytes = utf8_encode (tmp_val, tmp_chr);
|
|
|
+ // Copy bytes into destination.
|
|
|
+ memcpy (uc_str + j, tmp_chr, nbytes);
|
|
|
+ TRACE ("UC byte value: %x %x\n", uc_str[j], uc_str[j + 1]);
|
|
|
|
|
|
- // Copy bytes into destination.
|
|
|
- memcpy (uc_str + j, tmp_chr, nbytes);
|
|
|
- TRACE ("UC byte value: %x %x\n", uc_str[j], uc_str[j + 1]);
|
|
|
+ j += nbytes;
|
|
|
+ i += 4;
|
|
|
|
|
|
- j += nbytes;
|
|
|
- i += 4;
|
|
|
+ // 8-hex sequence.
|
|
|
+ } else if (esc_str[i] == 'U') {
|
|
|
+ i ++; // Skip over 'U'
|
|
|
+ fprintf (
|
|
|
+ stderr,
|
|
|
+ "UTF-16 sequence unescaping not yet implemented.\n");
|
|
|
+ return NULL; // TODO encode UTF-16
|
|
|
|
|
|
- } else if (memcmp (esc_str + i, "\\U", 2) == 0) {
|
|
|
- fprintf (
|
|
|
- stderr,
|
|
|
- "UTF-16 sequence unescaping not yet implemented.\n");
|
|
|
- return NULL; // TODO encode UTF-16
|
|
|
+ // Unescape other escaped characters.
|
|
|
+ } else uc_str[j++] = unescape_char(esc_str[i++]);
|
|
|
} else {
|
|
|
// Copy ASCII char verbatim.
|
|
|
uc_str[j++] = esc_str[i++];
|
|
@@ -254,7 +276,7 @@ loop:
|
|
|
}
|
|
|
|
|
|
BNODE {
|
|
|
- YYCTYPE *data = unescape_unicode (it->tok + 2, YYCURSOR - it->tok - 1);
|
|
|
+ YYCTYPE *data = unescape_unicode (it->tok + 2, YYCURSOR - it->tok - 2);
|
|
|
|
|
|
TRACE ("BNode data: %s\n", data);
|
|
|
|