#ifndef _VOLK_PARSER_COMMON_H #define _VOLK_PARSER_COMMON_H #include "volksdata/codec.h" /** @brief TTL is UTF-8 encoded. * * @sa https://www.w3.org/TeamSubmission/turtle/#sec-grammar * * `char` should be considered to be UTF-8 throughout this library, however, * setting YYCTYPE to char generates case labels outside of the char range. */ #define YYCTYPE uint8_t #define YYCURSOR it->cur #define YYMARKER it->mar #define YYLIMIT it->lim #define YYFILL fill(it) == 0 typedef struct { FILE * fh; ///< Input file handle. const char * sh; ///< Input string. Exclusive with fh. size_t buf_size; ///< Initial allocation for buffer. YYCTYPE * buf, ///< Start of buffer. * lim, ///< Position after the last available ///< input character (YYLIMIT). * cur, ///< Next input character to be read (YYCURSOR) * mar, ///< Most recent match (YYMARKER) * tok, ///< Start of current token. * bol; ///< Address of the beginning of the ///< current line (for debugging). unsigned line; ///< Current line no. (for debugging). unsigned ct; ///< Number of statements parsed. bool eof; ///< if we have reached EOF. /*!stags:re2c format = "YYCTYPE *@@;"; */ } ParseIterator; static int fill(ParseIterator *it); /** @brief Initialize parser. * * @param[in] it iterator handle to be initialized. * * @param[in] fh Open file handle to read from. This is exclusive with sh. If * both fh and sh are provided, fh has precedence. * * @param[in] sh String to read from. This is exclusive with fh. */ static void parse_init (ParseIterator *it, FILE *fh, const char *sh) { if(fh) { // Stream handling. It engages YYFILL and reads by chunks. it->fh = fh; it->sh = NULL; it->buf_size = CHUNK_SIZE; it->buf = malloc(it->buf_size); if (!it->buf) log_error ("Error allocating lexer buffer."); it->cur = it->mar = it->tok = it->lim = it->buf + it->buf_size - 1; it->bol = it->buf; it->eof = false; it->lim[0] = 0; } else { // String handling. Uses the provided string as the buffer. it->fh = NULL; it->sh = sh; it->buf_size = strlen(sh) + 1; it->buf = NULL; it->cur = it->tok = (YYCTYPE*)it->sh; it->lim = it->mar = it->cur + it->buf_size - 1; it->bol = it->cur; it->eof = true; } it->line = 1; it->ct = 0; /*!stags:re2c format = "it->@@ = NULL; "; */ } int fill(ParseIterator *it) { log_debug ("Filling codec buffer @ %p.", it->buf); if (it->eof) return 1; size_t shift = it->tok - it->buf; size_t used = it->lim - it->tok; // If buffer is too small for the lexeme, double the capacity. if (shift < 1) { YYCTYPE *old_buf = it->buf; shift += it->buf_size; it->buf_size *= 2; LOG_DEBUG ("Reallocating buffer to %zu bytes.", it->buf_size); it->buf = realloc (it->buf, it->buf_size); if (!it->buf) { log_error ("Memory allocation error."); return -1; } // Move all relative points if address changed. size_t reloc_off = it->buf - old_buf; it->cur += reloc_off; it->tok += reloc_off; it->lim += reloc_off; it->mar += reloc_off; } else { LOG_DEBUG("Shifting bytes: %zu", shift); memmove (it->buf, it->tok, used); LOG_TRACE ("Limit offset before reading data: %zu", it->lim - it->tok); it->lim -= shift; it->cur -= shift; it->mar -= shift; it->tok -= shift; } it->lim += fread (it->lim, 1, it->buf_size - used - 1, it->fh); /*!stags:re2c format = "if (it->@@) it->@@ -= shift; "; */ LOG_TRACE ("Cursor offset from last token: %zu", it->cur - it->tok); LOG_TRACE ("Limit offset from last token: %zu", it->lim - it->tok); it->lim[0] = 0; it->eof = it->lim < it->buf + it->buf_size - 1; return 0; } #endif // _VOLK_PARSER_COMMON_H