Browse Source

Pass tests.

Stefano Cossu 4 years ago
parent
commit
d6cbb46aa6
4 changed files with 97 additions and 29 deletions
  1. 68 9
      include/codec_base.h
  2. 12 8
      src/codec_nt.c
  3. 7 7
      src/store_mdb.c
  4. 10 5
      test/test_codec_nt.c

+ 68 - 9
include/codec_base.h

@@ -4,6 +4,15 @@
 #include "graph.h"
 
 
+/** @brief Codec iterator type.
+ *
+ * This structure holds state data including input and output for encoding and
+ * decoding RDF. Normally it should not be inspected ormanipulated directly,
+ * but rather passed to codec iteration functions for processing RDF.
+ *
+ * NOTE: This should be used as an opaque handle, however it is exposed here
+ * for easier inclusion into each codec.
+ */
 typedef struct codec_iter_t {
     char *              rep;        // String representation of a RDF fragment.
     LSUP_Triple *       trp;        // RDF fragment being encoded or decoded.
@@ -39,13 +48,55 @@ typedef LSUP_rc (*term_dec_fn_t)(
         const char *rep, const LSUP_NSMap *nsm, LSUP_Term **term);
 
 
-/** @brief Graph encoder callback type.
+/** @brief Initialize a graph encoding loop.
  *
- * @return LSUP_OK on successful encoding; <0 for other errors.
+ * This prototype is to be implemented by graph encoding loops. It should
+ * create an iterator and perform all initial setup for finding triples.
+ *
+ * @param[in] gr The graph to be encoded. The graph's namespace map is used by the
+ * codec for namespace prefixing. The graph may only be freed after the loop is
+ * finalized.
+ *
+ * @return A codec iterator handle to be passed to a #gr_codec_iter_fn_t
+ * function and, eventually, to a #gr_codec_done_fn_t function.
  */
-typedef LSUP_CodecIterator * (*gr_codec_init_fn_t)(const LSUP_Graph *gr);
+typedef LSUP_CodecIterator * (*gr_encode_init_fn_t)(const LSUP_Graph *gr);
 
-typedef LSUP_rc (*gr_codec_iter_fn_t)(LSUP_CodecIterator *it);
+
+/** @brief Initialize a graph decoding loop.
+ *
+ * This prototype is to be implemented by graph decoding loops. It should
+ * create an iterator and perform all initial setup for tokenizing text into
+ * processing units.
+ *
+ * @param[in] rep The RDF string to be decoded. Any namespace prefixes defined in
+ *  this string will be used for decoding.
+ *
+ * @return A codec iterator handle to be passed to a #gr_codec_iter_fn_t
+ * function and, eventually, to a #gr_codec_done_fn_t function.
+ */
+typedef LSUP_CodecIterator * (*gr_decode_init_fn_t)(const char *rep);
+
+
+/** @brief Perform one encoding or decoding iteration.
+ *
+ * This prototype is used for both encoding and decoding function. It should
+ * perform all the steps to either encode one or more triples into a complete
+ * RDF fragment representing a complete triple or a set thereof, or to parse
+ * a RDF string into one or more complete triples.
+ *
+ * @param[in] it Iterator handle.
+ *
+ * @param[out] res Handle to be populated with the data obtained from encoding
+ * (a string) or decoding (a NULL-terminated array of triples). This pointer
+ * must be passed initialized (it may be NULL) and should be eventually
+ * freed manually at the end of the loop (it is reallocated at each iteration,
+ * so memory from a previous iteration may be overwritten with new data).
+ *
+ * @return LSUP_OK if a new token was processed; LSUP_END if the end of the
+ *  loop was reached.
+ */
+typedef LSUP_rc (*gr_codec_iter_fn_t)(LSUP_CodecIterator *it, void **res);
 
 typedef void (*gr_codec_done_fn_t)(LSUP_CodecIterator *it);
 
@@ -66,12 +117,20 @@ typedef LSUP_rc (*gr_dec_fn_t)(const char *rep, LSUP_Graph **gr);
  * - name: A brief (16-char max), human-readable to identify the codec.
  * - mimetype: MIME type associated with the codec.
  * - extension: File extension associated with the serialized file.
+ *
  * - term_encoder: Callback function for encoding a single term.
  * - term_decoder: Callback function for decoding a single term.
  *
- * There is no validation enforced, but at least the name, mimetype and
- * extension, as well as one or more encoding functions and their respective
- * decoding functions, should be defined in a codec.
+ * - gr_encode_init: Initialize a graph decoding loop.
+ * - gr_encode_iter: Run one iteration of encoding on one or more triples.
+ * - gr_encode_done: Finalize the encoding loop and free the support data.
+ *
+ * - gr_decode_init: Initialize a graph decoding loop.
+ * - gr_decode_iter: Run one iteration of decoding on one or more text lines.
+ * - gr_decode_done: Finalize the decoding loop and free the support data.
+ *
+ * For documentation on the individual encoding and decoding callbaks, see the
+ * related function prototypes.
  */
 typedef struct codec_t {
     char                name[16];       // Name of the codec.
@@ -83,12 +142,12 @@ typedef struct codec_t {
     term_dec_fn_t       term_decoder;   // Term decoder function.
 
     // Graph encoding.
-    gr_codec_init_fn_t  gr_encode_init; // Graph encoder initialization.
+    gr_encode_init_fn_t gr_encode_init; // Graph encoder initialization.
     gr_codec_iter_fn_t  gr_encode_iter; // Graph encoder initialization.
     gr_codec_done_fn_t  gr_encode_done; // Graph encoder initialization.
 
     // Graph decoding.
-    gr_codec_init_fn_t  gr_decode_init; // Graph decoder initialization.
+    gr_decode_init_fn_t gr_decode_init; // Graph decoder initialization.
     gr_codec_iter_fn_t  gr_decode_iter; // Graph decoder initialization.
     gr_codec_done_fn_t  gr_decode_done; // Graph decoder initialization.
 } LSUP_Codec;

+ 12 - 8
src/codec_nt.c

@@ -56,7 +56,7 @@ term_to_nt (const LSUP_Term *term, const LSUP_NSMap *nsm, char **out_p)
             if (strlen (term->lang) > 0)
                 buf_len += strlen(term->lang) + 1; // Room for @
 
-            TRACE ("nt rep length: %lu\n", buf_len);
+            //TRACE ("nt rep length: %lu\n", buf_len);
 
             tmp = realloc (out, buf_len);
             if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
@@ -125,7 +125,7 @@ gr_to_nt_init (const LSUP_Graph *gr)
 
 
 static LSUP_rc
-gr_to_nt_iter (LSUP_CodecIterator *it) {
+gr_to_nt_iter (LSUP_CodecIterator *it, void **res) {
     LSUP_rc rc = LSUP_graph_iter_next (it->gr_it, it->trp);
     if (rc != LSUP_OK) return rc;
 
@@ -134,12 +134,15 @@ gr_to_nt_iter (LSUP_CodecIterator *it) {
     term_to_nt (it->trp->o, it->nsm, &it->str_o);
 
     char *tmp = realloc (
-            it->rep, strlen (it->str_s) + strlen (it->str_p)
+            *res, strlen (it->str_s) + strlen (it->str_p)
             + strlen (it->str_o) + 6);
-    if (UNLIKELY (!tmp)) return LSUP_MEM_ERR;
+    if (UNLIKELY (!tmp)) {
+        *res = NULL;
+        return LSUP_MEM_ERR;
+    }
 
-    it->rep = tmp;
-    sprintf (it->rep, "%s %s %s .\n", it->str_s, it->str_p, it->str_o);
+    sprintf (tmp, "%s %s %s .\n", it->str_s, it->str_p, it->str_o);
+    *res = tmp;
 
     it->cur++;
 
@@ -161,7 +164,7 @@ gr_to_nt_done (LSUP_CodecIterator *it)
 
 
 static LSUP_CodecIterator *
-nt_to_gr_init (const LSUP_Graph *gr)
+nt_to_gr_init (const char *rep)
 {
     // TODO
     return NULL;
@@ -169,7 +172,8 @@ nt_to_gr_init (const LSUP_Graph *gr)
 
 
 static LSUP_rc
-nt_to_gr_iter (LSUP_CodecIterator *it) {
+nt_to_gr_iter (LSUP_CodecIterator *it, void **res)
+{
     // TODO
     return LSUP_NOT_IMPL_ERR;
 }

+ 7 - 7
src/store_mdb.c

@@ -388,7 +388,7 @@ LSUP_mdbstore_add_iter (MDBIterator *it, const LSUP_SerTriple *sspo)
     for (int i = 0; i < 3; i++) {
         LSUP_Buffer *st = LSUP_striple_pos (sspo, i);
 
-#ifdef DEBUG
+#ifdef DEBUG3
         printf ("Inserting term: ");
         LSUP_buffer_print (st);
         printf ("\n");
@@ -642,7 +642,7 @@ mdbiter_next_key (LSUP_MDBIterator *it)
         data.mv_size = KLEN;
 
         while (rc == LSUP_NORESULT) {
-            TRACE (STR, "begin ctx loop.");
+            //TRACE (STR, "begin ctx loop.");
             // If ctx is specified, look if the matching triple is associated
             // with it. If not, move on to the next triple.
             // The loop normally exits when a triple with matching ctx is found
@@ -654,11 +654,11 @@ mdbiter_next_key (LSUP_MDBIterator *it)
 
             if (db_rc == MDB_SUCCESS) {
                 rc = LSUP_OK;
-                TRACE (STR, "Triple found for context.");
+                //TRACE (STR, "Triple found for context.");
             }
 
             else if (db_rc == MDB_NOTFOUND) {
-                TRACE (STR, "No triples found for context.");
+                //TRACE (STR, "No triples found for context.");
                 if (it->rc == MDB_NOTFOUND) rc = LSUP_END;
                 else it->iter_op_fn (it);
 
@@ -953,14 +953,14 @@ it_next_1bound (MDBIterator *it)
     // Ensure next block within the same page is not beyond the last.
     if (it->i < it->data.mv_size / DBL_KLEN - 1) {
         it->i ++;
-        TRACE ("Increasing page cursor to %lu.", it->i);
-        TRACE ("it->rc: %d", it->rc);
+        //TRACE ("Increasing page cursor to %lu.", it->i);
+        //TRACE ("it->rc: %d", it->rc);
 
     } else {
         // If the last block in the page is being yielded,
         // move cursor to beginning of next page.
         it->i = 0;
-        TRACE ("Reset page cursor to %lu.", it->i);
+        //TRACE ("Reset page cursor to %lu.", it->i);
         it->rc = mdb_cursor_get (
                 it->cur, &it->key, &it->data, MDB_NEXT_MULTIPLE);
         TRACE ("it->rc: %d", it->rc);

+ 10 - 5
test/test_codec_nt.c

@@ -97,7 +97,8 @@ static int test_encode_nt_graph()
     LSUP_Graph *gr = LSUP_graph_new (LSUP_STORE_MEM);
     if (!gr) return LSUP_MEM_ERR;
 
-    LSUP_Triple trp[7];
+    LSUP_Triple trp[8];
+    memset (trp, 0, sizeof (LSUP_Triple) * 8); // Last NULL is a sentinel
 
     LSUP_triple_init (trp + 0, terms[0], terms[1], terms[2]);
     LSUP_triple_init (trp + 1, terms[0], terms[1], terms[3]);
@@ -108,17 +109,21 @@ static int test_encode_nt_graph()
     LSUP_triple_init (trp + 6, terms[7], terms[1], terms[6]);
 
     size_t ins;
-    LSUP_graph_add_trp (gr, trp, 7, &ins);
+    LSUP_graph_add_trp (gr, trp, &ins);
 
     char *out = calloc (1, 1);
     LSUP_CodecIterator *it = nt_codec.gr_encode_init (gr);
     ASSERT (it != NULL, "Error creating codec iterator!");
 
-    while (nt_codec.gr_encode_iter (it) != LSUP_END) {
-        out = realloc (out, strlen(out) + strlen (it->rep) + 1);
-        out = strcat (out, it->rep);
+    char *tmp = NULL;
+    LSUP_rc rc;
+    while ((rc = nt_codec.gr_encode_iter (it, (void**)&tmp)) != LSUP_END) {
+        ASSERT (rc >= 0, "Encoding step failed!");
+        out = realloc (out, strlen(out) + strlen (tmp) + 1);
+        out = strcat (out, tmp);
     }
     nt_codec.gr_encode_done (it);
+    free (tmp);
     LSUP_graph_free (gr);
     //printf("Serialized graph: %s\n", out);