parser_common.h 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. #ifndef _VOLK_PARSER_COMMON_H
  2. #define _VOLK_PARSER_COMMON_H
  3. #include "volksdata/codec.h"
  4. /** @brief TTL is UTF-8 encoded.
  5. *
  6. * @sa https://www.w3.org/TeamSubmission/turtle/#sec-grammar
  7. *
  8. * `char` should be considered to be UTF-8 throughout this library, however,
  9. * setting YYCTYPE to char generates case labels outside of the char range.
  10. */
  11. #define YYCTYPE uint8_t
  12. #define YYCURSOR it->cur
  13. #define YYMARKER it->mar
  14. #define YYLIMIT it->lim
  15. #define YYFILL fill(it) == 0
  16. typedef struct {
  17. FILE * fh; ///< Input file handle.
  18. const char * sh; ///< Input string. Exclusive with fh.
  19. size_t buf_size; ///< Initial allocation for buffer.
  20. YYCTYPE * buf, ///< Start of buffer.
  21. * lim, ///< Position after the last available
  22. ///< input character (YYLIMIT).
  23. * cur, ///< Next input character to be read (YYCURSOR)
  24. * mar, ///< Most recent match (YYMARKER)
  25. * tok, ///< Start of current token.
  26. * bol; ///< Address of the beginning of the
  27. ///< current line (for debugging).
  28. unsigned line; ///< Current line no. (for debugging).
  29. unsigned ct; ///< Number of statements parsed.
  30. bool eof; ///< if we have reached EOF.
  31. /*!stags:re2c format = "YYCTYPE *@@;"; */
  32. } ParseIterator;
  33. static int fill(ParseIterator *it);
  34. /** @brief Initialize parser.
  35. *
  36. * @param[in] it iterator handle to be initialized.
  37. *
  38. * @param[in] fh Open file handle to read from. This is exclusive with sh. If
  39. * both fh and sh are provided, fh has precedence.
  40. *
  41. * @param[in] sh String to read from. This is exclusive with fh.
  42. */
  43. static void parse_init (ParseIterator *it, FILE *fh, const char *sh)
  44. {
  45. if(fh) {
  46. // Stream handling. It engages YYFILL and reads by chunks.
  47. it->fh = fh;
  48. it->sh = NULL;
  49. it->buf_size = CHUNK_SIZE;
  50. it->buf = malloc(it->buf_size);
  51. if (!it->buf) log_error ("Error allocating lexer buffer.");
  52. it->cur = it->mar = it->tok = it->lim = it->buf + it->buf_size - 1;
  53. it->bol = it->buf;
  54. it->eof = false;
  55. it->lim[0] = 0;
  56. } else {
  57. // String handling. Uses the provided string as the buffer.
  58. it->fh = NULL;
  59. it->sh = sh;
  60. it->buf_size = strlen(sh) + 1;
  61. it->buf = NULL;
  62. it->cur = it->tok = (YYCTYPE*)it->sh;
  63. it->lim = it->mar = it->cur + it->buf_size - 1;
  64. it->bol = it->cur;
  65. it->eof = true;
  66. }
  67. it->line = 1;
  68. it->ct = 0;
  69. /*!stags:re2c format = "it->@@ = NULL; "; */
  70. }
  71. int
  72. fill(ParseIterator *it)
  73. {
  74. log_debug ("Filling codec buffer @ %p.", it->buf);
  75. if (it->eof) return 1;
  76. size_t shift = it->tok - it->buf;
  77. size_t used = it->lim - it->tok;
  78. // If buffer is too small for the lexeme, double the capacity.
  79. if (shift < 1) {
  80. YYCTYPE *old_buf = it->buf;
  81. shift += it->buf_size;
  82. it->buf_size *= 2;
  83. LOG_DEBUG ("Reallocating buffer to %zu bytes.", it->buf_size);
  84. it->buf = realloc (it->buf, it->buf_size);
  85. if (!it->buf) {
  86. log_error ("Memory allocation error.");
  87. return -1;
  88. }
  89. // Move all relative points if address changed.
  90. size_t reloc_off = it->buf - old_buf;
  91. it->cur += reloc_off;
  92. it->tok += reloc_off;
  93. it->lim += reloc_off;
  94. it->mar += reloc_off;
  95. } else {
  96. LOG_DEBUG("Shifting bytes: %zu", shift);
  97. memmove (it->buf, it->tok, used);
  98. LOG_TRACE ("Limit offset before reading data: %zu", it->lim - it->tok);
  99. it->lim -= shift;
  100. it->cur -= shift;
  101. it->mar -= shift;
  102. it->tok -= shift;
  103. }
  104. it->lim += fread (it->lim, 1, it->buf_size - used - 1, it->fh);
  105. /*!stags:re2c format = "if (it->@@) it->@@ -= shift; "; */
  106. LOG_TRACE ("Cursor offset from last token: %zu", it->cur - it->tok);
  107. LOG_TRACE ("Limit offset from last token: %zu", it->lim - it->tok);
  108. it->lim[0] = 0;
  109. it->eof = it->lim < it->buf + it->buf_size - 1;
  110. return 0;
  111. }
  112. #endif // _VOLK_PARSER_COMMON_H