Forráskód Böngészése

Re-add size to unescape_unicode; fix NT tests.

Stefano Cossu 2 éve
szülő
commit
39727f5b06
6 módosított fájl, 31 hozzáadás és 36 törlés
  1. 3 4
      include/codec.h
  2. 0 10
      include/codec/parser_ttl.h
  3. 1 2
      src/codec.c
  4. 12 6
      src/codec/lexer_nt.re
  5. 11 13
      src/codec/lexer_ttl.re
  6. 4 1
      test/test_codec_nt.c

+ 3 - 4
include/codec.h

@@ -273,15 +273,14 @@ unescape_char (const char c)
 }
 
 /** @brief Replace \uxxxx and \Uxxxxxxxx with Unicode bytes.
- *
- * TODO This does not encode UTF-16 yet. An UTF-16 version should change
- * signature because it cannot output a plain NUL-terminated string.
  *
  * @param[in] esc_str Escaped string.
  *
+ * @param[in] size Maximum number of characters to scan, à la strncpy().
+ *
  * @return String with escape sequences replaced by Unicode bytes.
  */
-uint8_t *unescape_unicode (const uint8_t *esc_str);
+uint8_t *unescape_unicode (const uint8_t *esc_str, size_t size);
 
 
 /** @brief Add an object to an objct list.

+ 0 - 10
include/codec/parser_ttl.h

@@ -4,16 +4,6 @@
 #include "codec.h"
 
 
-// Parser interface. Required because Lemon doesn't export these in the header
-// automatically.
-/*
-void *TTLParseAlloc( void*(*malloc)(size_t));
-void TTLParseFree(void *pParser, void(*free)(void*) );
-void TTLParse(void *pParser, int tokenCode, uint8_t *token, ...);
-void TTLParseTrace(FILE *stream, char *zPrefix);
-*/
-
-
 /** @brief Parse a single term.
  *
  * @param[in] rep N-Triples representation as a character string.

+ 1 - 2
src/codec.c

@@ -1,8 +1,7 @@
 #include "codec.h"
 
-uint8_t *unescape_unicode (const uint8_t *esc_str)
+uint8_t *unescape_unicode (const uint8_t *esc_str, size_t size)
 {
-    size_t size = strlen ((char *)esc_str);
     uint8_t *data = malloc (size + 1);
 
     size_t j = 0;

+ 12 - 6
src/codec/lexer_nt.re

@@ -63,12 +63,11 @@ static void parse_init(ParseIterator *it, FILE *fh)
 }
 
 
-// Parser interface. Required because Lemon doesn't export these in the header
-// automatically.
+// Parser interface. Required here to silence linters.
 void *NTParseAlloc();
 void NTParse();
 void NTParseFree();
-
+void NTParseTrace();
 
 // Lexer.
 
@@ -126,20 +125,21 @@ loop:
     }
 
     IRIREF {
-        YYCTYPE *data = unescape_unicode (it->tok + 1);
+        YYCTYPE *data = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
 
         log_debug ("URI data: %s", data);
 
         *term = LSUP_iriref_new ((char*)data, NULL);
         free (data);
 
+        if (!UNLIKELY (term)) return -1;
         return T_IRIREF;
     }
 
     LITERAL {
         // Only unescape Unicode from data.
         size_t size = lit_data_e - it->tok - 2;
-        YYCTYPE *data = unescape_unicode (it->tok + 1);
+        YYCTYPE *data = unescape_unicode (it->tok + 1, size);
         log_trace ("Literal data: %s", data);
 
         char *metadata = NULL;
@@ -174,17 +174,19 @@ loop:
         free (data);
         free (metadata);
 
+        if (!UNLIKELY (term)) return -1;
         return T_LITERAL;
     }
 
     BNODE {
-        YYCTYPE *data = unescape_unicode (it->tok + 2);
+        YYCTYPE *data = unescape_unicode (it->tok + 2, YYCURSOR - it->tok - 2);
 
         log_debug ("BNode data: %s", data);
 
         *term = LSUP_term_new (LSUP_TERM_BNODE, (char*)data, NULL);
         free (data);
 
+        if (!UNLIKELY (term)) return -1;
         return T_BNODE;
     }
 
@@ -255,6 +257,10 @@ LSUP_nt_parse_doc (FILE *fh, LSUP_Graph **gr_p, size_t *ct, char **err_p)
     ParseIterator parse_it;
     parse_init (&parse_it, fh);
 
+#ifdef DEBUG
+    NTParseTrace (stdout, "NT Parser > ");
+#endif
+
     void *parser = NTParseAlloc (malloc);
 
     LSUP_rc rc;

+ 11 - 13
src/codec/lexer_ttl.re

@@ -77,17 +77,11 @@ static void parse_init (ParseIterator *it, FILE *fh)
 }
 
 
-// Parser interface. Required because Lemon doesn't export these in the header
-// automatically.
-/*
-void *TTLParseAlloc( void*(*malloc)(size_t));
-void TTLParseFree(void *pParser, void(*free)(void*) );
-void TTLParse(void *pParser, int tokenCode, YYCTYPE *token, ...);
-void TTLParseTrace(FILE *stream, char *zPrefix);
-*/
+// Parser interface. Required here to silence linters.
 void *TTLParseAlloc();
 void TTLParse();
 void TTLParseFree();
+void TTLParseTrace();
 
 // Lexer.
 
@@ -170,35 +164,35 @@ loop:
     }
 
     IRIREF {
-        *token_p = unescape_unicode (it->tok + 1);
+        *token_p = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
         log_debug ("URI data: %s", *token_p);
 
         return T_IRIREF;
     }
 
     PFX_NAME {
-        *token_p = unescape_unicode (it->tok);
+        *token_p = unescape_unicode (it->tok, YYCURSOR - it->tok - 1);
         log_debug ("Prefix name: %s", *token_p);
 
         return T_PFX_NAME;
     }
 
     NAME {
-        *token_p = unescape_unicode (it->tok);
+        *token_p = unescape_unicode (it->tok, YYCURSOR - it->tok - 1);
         log_debug ("name: %s", *token_p);
 
         return T_IDNAME;
     }
 
     LSTRING {
-        *token_p = unescape_unicode (it->tok + 3);
+        *token_p = unescape_unicode (it->tok + 3, YYCURSOR - it->tok - 4);
         log_debug ("Long string: %s", *token_p);
 
         return T_STRING;
     }
 
     STRING {
-        *token_p = unescape_unicode (it->tok + 1);
+        *token_p = unescape_unicode (it->tok + 1, YYCURSOR - it->tok - 2);
         log_debug ("Long string: %s", *token_p);
 
         return T_STRING;
@@ -323,6 +317,10 @@ LSUP_ttl_parse_doc (FILE *fh, LSUP_Graph **gr_p, size_t *ct, char **err_p)
 
     YYCTYPE *token;
 
+#ifdef DEBUG
+    TTLParseTrace (stdout, "TTL Parser > ");
+#endif
+
     for (;;) {
         int ttype = lex (&parse_it, &token);
 

+ 4 - 1
test/test_codec_nt.c

@@ -139,7 +139,7 @@ test_encode_nt_term()
     EXPECT_STR_EQ (out, end_nt[0]);
 
     for (int i = 0; i < TERM_CT - 2; i++) {
-        log_debug ("Test encoding term #%d of %d.", i, TERM_CT - 2);
+        log_info ("Test encoding term #%d of %d.", i, TERM_CT - 2);
         EXPECT_PASS (nt_codec.encode_term (terms[i], NULL, &out));
         EXPECT_STR_EQ (out, end_nt[i]);
     }
@@ -160,6 +160,7 @@ test_encode_nt_term()
 
 static int test_encode_nt_graph()
 {
+    log_info ("Test encoding graph document.");
     LSUP_Graph *gr = LSUP_graph_new (
             LSUP_iriref_new (NULL, NULL), LSUP_STORE_HTABLE, NULL, NULL, 0);
     if (!gr) return LSUP_MEM_ERR;
@@ -195,7 +196,9 @@ static int test_encode_nt_graph()
 static int
 test_decode_nt_term()
 {
+    log_info ("Test decoding terms.");
     for (int i = 0; i < TERM_CT - 2; i++) {
+        log_debug ("Decoding term %d/%d.", i, TERM_CT - 3);
         LSUP_Term *term;
         EXPECT_PASS (nt_codec.decode_term (start_nt[i], NULL, &term));
         LSUP_term_free (term);