Browse Source

Graph copy function; fix mem leak in graph_set_uri.

Stefano Cossu 3 years ago
parent
commit
1a4343c071
5 changed files with 219 additions and 147 deletions
  1. 2 0
      TODO.md
  2. 10 0
      include/term.h
  3. 1 1
      src/graph.c
  4. 176 146
      src/term.c
  5. 30 0
      test/test_term.c

+ 2 - 0
TODO.md

@@ -23,6 +23,7 @@
 - *D* Namespaced IRIs
 - *D* Relative IRIs
 - *P* Turtle serialization / deserialization
+- *P* Full UTF-8 support
 - *P* Extended tests
     - *P* C API
     - *P* Python API
@@ -35,6 +36,7 @@
     - Extract unique terms and 2-term tuples
 - NQ codec
 - TriG codec
+- UTF-16 support
 
 
 ## Long-term

+ 10 - 0
include/term.h

@@ -268,6 +268,16 @@ LSUP_bnode_new (const char *data)
 { return LSUP_term_new (LSUP_TERM_BNODE, data, NULL); }
 
 
+/** @brief Copy a term.
+ *
+ * @param[in] src The term to copy.
+ *
+ * @return A new duplicate term handle.
+ */
+LSUP_Term *
+LSUP_term_copy (const LSUP_Term *src);
+
+
 /** @brief Deserialize a buffer into a term.
  *
  * @param[in] sterm Buffer to convert into a term. It must be a valid

+ 1 - 1
src/graph.c

@@ -227,7 +227,7 @@ LSUP_graph_set_uri (LSUP_Graph *gr, LSUP_Term *uri)
     }
 
     LSUP_term_free (gr->uri);
-    gr->uri = LSUP_iriref_new (uri->data, LSUP_iriref_nsm (uri));
+    gr->uri = uri;
 
     return LSUP_OK;
 }

+ 176 - 146
src/term.c

@@ -75,6 +75,22 @@ LSUP_term_new (
 }
 
 
+LSUP_Term *
+LSUP_term_copy (const LSUP_Term *src)
+{
+    void *metadata = NULL;
+
+    if (LSUP_IS_IRI (src))
+        metadata = (void *) LSUP_iriref_nsm (src);
+    else if (src->type == LSUP_TERM_LITERAL)
+        metadata = (void *) src->datatype;
+    else if (src->type == LSUP_TERM_LT_LITERAL)
+        memcpy (&metadata, src->lang, sizeof (metadata));
+
+    return LSUP_term_new (src->type, src->data, metadata);
+}
+
+
 LSUP_Term *
 LSUP_term_new_from_buffer (const LSUP_Buffer *sterm)
 {
@@ -232,151 +248,6 @@ LSUP_term_serialize (const LSUP_Term *term)
 }
 
 
-LSUP_rc
-term_init (
-        LSUP_Term *term, LSUP_TermType type,
-        const char *data, void *metadata)
-{
-    if (UNLIKELY (!LSUP_uri_ptn)) {
-        log_error ("Environment not initialized. Did you call LSUP_init()?");
-        return LSUP_ERROR;
-    }
-    // This can never be LSUP_TERM_UNDEFINED.
-    if (type <= LSUP_TERM_UNDEFINED || type > MAX_VALID_TERM_TYPE) {
-        log_error ("%d is not a valid term type.", type);
-        return LSUP_VALUE_ERR;
-    }
-
-    term->type = type;
-
-    if (data) {
-        // Validate IRI.
-        if (LSUP_IS_IRI (term)) {
-            char *fquri;
-
-            // Find fully qualified IRI to parse.
-            if (term->type == LSUP_TERM_NS_IRIREF) {
-                if (LSUP_nsmap_normalize_uri (
-                    metadata, data, &fquri) != LSUP_OK
-                ) {
-                    log_error ("Error normalizing IRI data.");
-
-                    return LSUP_VALUE_ERR;
-                }
-                log_debug ("Fully qualified IRI: %s", fquri);
-            } else fquri = (char *) data;
-
-            if (strpbrk (fquri, invalid_uri_chars) != NULL) {
-                log_error (
-                        "Characters %s are not allowed. Got: %s\n",
-                        invalid_uri_chars, fquri);
-
-                return LSUP_VALUE_ERR;
-            }
-
-            // Capture interesting IRI parts.
-            regmatch_t matches[11];
-            if (UNLIKELY (regexec (LSUP_uri_ptn, fquri, 11, matches, 0) != 0)) {
-                fprintf (stderr, "Error matching URI pattern.\n");
-
-                return LSUP_VALUE_ERR;
-            }
-            if (term->type == LSUP_TERM_NS_IRIREF) free (fquri);
-
-            MALLOC_GUARD (term->iri_info, LSUP_MEM_ERR);
-
-            term->iri_info->prefix = matches[1];
-            term->iri_info->path = matches[5];
-            term->iri_info->frag = matches[10];
-            term->iri_info->nsm = metadata;
-        }
-
-        term->data = strdup (data);
-
-    } else {
-        // No data. Make up a random UUID or URI if allowed.
-        if (type == LSUP_TERM_IRIREF || type == LSUP_TERM_BNODE) {
-            uuid_t uuid;
-            uuid_generate_random (uuid);
-
-            uuid_str_t uuid_str;
-            uuid_unparse_lower (uuid, uuid_str);
-
-            if (type == LSUP_TERM_IRIREF) {
-                term->data = malloc (UUID4_URN_SIZE);
-                snprintf (
-                        term->data, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str);
-
-                MALLOC_GUARD (term->iri_info, LSUP_MEM_ERR);
-
-                // Allocate IRI match patterns manually.
-                term->iri_info->prefix.rm_so = 0;
-                term->iri_info->prefix.rm_eo = 4;
-                term->iri_info->path.rm_so = 4;
-                term->iri_info->path.rm_eo = UUIDSTR_SIZE + 6;
-                term->iri_info->frag.rm_so = -1;
-                term->iri_info->frag.rm_eo = -1;
-                term->iri_info->nsm = NULL;
-
-            } else term->data = strdup (uuid_str);
-        } else {
-            log_error ("No data provided for term.");
-            return LSUP_VALUE_ERR;
-        }
-    }
-
-    if (term->type == LSUP_TERM_LT_LITERAL) {
-        if (!metadata) {
-            log_warn ("Lang tag is NULL. Creating a non-tagged literal.");
-            term->type = LSUP_TERM_LITERAL;
-        } else {
-            char *lang_str = (char *) metadata;
-            log_trace("Lang string: %s", lang_str);
-            // Lang tags longer than 7 characters will be truncated.
-            strncpy(term->lang, lang_str, sizeof (term->lang) - 1);
-            if (strlen (term->lang) < 1) {
-                log_error ("Lang tag cannot be an empty string.");
-                return LSUP_VALUE_ERR;
-            }
-            term->lang[7] = '\0';
-        }
-    }
-
-    if (term->type == LSUP_TERM_LITERAL) {
-        term->datatype = metadata;
-        if (! term->datatype) term->datatype = LSUP_default_datatype;
-        log_trace ("Storing data type: %s", term->datatype->data);
-
-        if (! LSUP_IS_IRI (term->datatype )) {
-            log_error (
-                    "Literal data tpe is not a IRI: %s",
-                    term->datatype ->data);
-
-            return LSUP_VALUE_ERR;
-        }
-
-        uint32_t dtype_hash = LSUP_term_hash (term->datatype);
-
-        LSUP_Term *tmp = (LSUP_Term *) LSUP_tcache_get (dtype_hash);
-        if (!tmp) LSUP_tcache_add (dtype_hash, term->datatype);
-        else if (term->datatype != tmp) {
-            LSUP_term_free (term->datatype);
-            term->datatype = tmp;
-        }
-
-        log_trace ("Datatype address: %p", term->datatype);
-        log_trace ("Datatype hash: %lu", LSUP_term_hash (term->datatype));
-
-    } else if (term->type == LSUP_TERM_BNODE) {
-        // TODO This is not usable for global skolemization.
-        term->bnode_id = XXH64 (
-                term->data, strlen (term->data) + 1, HASH_SEED);
-    }
-
-    return LSUP_OK;
-}
-
-
 LSUP_Key
 LSUP_term_hash (const LSUP_Term *term)
 {
@@ -515,7 +386,17 @@ LSUP_triple_serialize (const LSUP_Triple *spo)
 LSUP_rc
 LSUP_triple_init (LSUP_Triple *spo, LSUP_Term *s, LSUP_Term *p, LSUP_Term *o)
 {
-    // TODO validate term types.
+    /* FIXME TRP_DUMMY is a problem here.
+    if (! LSUP_IS_IRI (s) && s->type != LSUP_TERM_BNODE) {
+        log_error ("Subject is not of a valid term type: %d", s->type);
+        return LSUP_VALUE_ERR;
+    }
+    if (! LSUP_IS_IRI (p)) {
+        log_error ("Predicate is not of a valid term type: %d", p->type);
+        return LSUP_VALUE_ERR;
+    }
+    */
+
     spo->s = s;
     spo->p = p;
     spo->o = o;
@@ -579,6 +460,155 @@ LSUP_tcache_get (const LSUP_Key key)
 }
 
 
+/*
+ * Internal functions.
+ */
+
+LSUP_rc
+term_init (
+        LSUP_Term *term, LSUP_TermType type,
+        const char *data, void *metadata)
+{
+    if (UNLIKELY (!LSUP_uri_ptn)) {
+        log_error ("Environment not initialized. Did you call LSUP_init()?");
+        return LSUP_ERROR;
+    }
+    // This can never be LSUP_TERM_UNDEFINED.
+    if (type <= LSUP_TERM_UNDEFINED || type > MAX_VALID_TERM_TYPE) {
+        log_error ("%d is not a valid term type.", type);
+        return LSUP_VALUE_ERR;
+    }
+
+    term->type = type;
+
+    if (data) {
+        // Validate IRI.
+        if (LSUP_IS_IRI (term)) {
+            char *fquri;
+
+            // Find fully qualified IRI to parse.
+            if (term->type == LSUP_TERM_NS_IRIREF) {
+                if (LSUP_nsmap_normalize_uri (
+                    metadata, data, &fquri) != LSUP_OK
+                ) {
+                    log_error ("Error normalizing IRI data.");
+
+                    return LSUP_VALUE_ERR;
+                }
+                log_debug ("Fully qualified IRI: %s", fquri);
+            } else fquri = (char *) data;
+
+            if (strpbrk (fquri, invalid_uri_chars) != NULL) {
+                log_error (
+                        "Characters %s are not allowed. Got: %s\n",
+                        invalid_uri_chars, fquri);
+
+                return LSUP_VALUE_ERR;
+            }
+
+            // Capture interesting IRI parts.
+            regmatch_t matches[11];
+            if (UNLIKELY (regexec (LSUP_uri_ptn, fquri, 11, matches, 0) != 0)) {
+                fprintf (stderr, "Error matching URI pattern.\n");
+
+                return LSUP_VALUE_ERR;
+            }
+            if (term->type == LSUP_TERM_NS_IRIREF) free (fquri);
+
+            MALLOC_GUARD (term->iri_info, LSUP_MEM_ERR);
+
+            term->iri_info->prefix = matches[1];
+            term->iri_info->path = matches[5];
+            term->iri_info->frag = matches[10];
+            term->iri_info->nsm = metadata;
+        }
+
+        term->data = strdup (data);
+
+    } else {
+        // No data. Make up a random UUID or URI if allowed.
+        if (type == LSUP_TERM_IRIREF || type == LSUP_TERM_BNODE) {
+            uuid_t uuid;
+            uuid_generate_random (uuid);
+
+            uuid_str_t uuid_str;
+            uuid_unparse_lower (uuid, uuid_str);
+
+            if (type == LSUP_TERM_IRIREF) {
+                term->data = malloc (UUID4_URN_SIZE);
+                snprintf (
+                        term->data, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str);
+
+                MALLOC_GUARD (term->iri_info, LSUP_MEM_ERR);
+
+                // Allocate IRI match patterns manually.
+                term->iri_info->prefix.rm_so = 0;
+                term->iri_info->prefix.rm_eo = 4;
+                term->iri_info->path.rm_so = 4;
+                term->iri_info->path.rm_eo = UUIDSTR_SIZE + 6;
+                term->iri_info->frag.rm_so = -1;
+                term->iri_info->frag.rm_eo = -1;
+                term->iri_info->nsm = NULL;
+
+            } else term->data = strdup (uuid_str);
+        } else {
+            log_error ("No data provided for term.");
+            return LSUP_VALUE_ERR;
+        }
+    }
+
+    if (term->type == LSUP_TERM_LT_LITERAL) {
+        if (!metadata) {
+            log_warn ("Lang tag is NULL. Creating a non-tagged literal.");
+            term->type = LSUP_TERM_LITERAL;
+        } else {
+            char *lang_str = (char *) metadata;
+            log_trace("Lang string: %s", lang_str);
+            // Lang tags longer than 7 characters will be truncated.
+            strncpy(term->lang, lang_str, sizeof (term->lang) - 1);
+            if (strlen (term->lang) < 1) {
+                log_error ("Lang tag cannot be an empty string.");
+                return LSUP_VALUE_ERR;
+            }
+            term->lang[7] = '\0';
+        }
+    }
+
+    if (term->type == LSUP_TERM_LITERAL) {
+        term->datatype = metadata;
+        if (! term->datatype) term->datatype = LSUP_default_datatype;
+        log_trace ("Storing data type: %s", term->datatype->data);
+
+        if (! LSUP_IS_IRI (term->datatype )) {
+            log_error (
+                    "Literal data tpe is not a IRI: %s",
+                    term->datatype ->data);
+
+            return LSUP_VALUE_ERR;
+        }
+
+        uint32_t dtype_hash = LSUP_term_hash (term->datatype);
+
+        LSUP_Term *tmp = (LSUP_Term *) LSUP_tcache_get (dtype_hash);
+        if (!tmp) LSUP_tcache_add (dtype_hash, term->datatype);
+        else if (term->datatype != tmp) {
+            LSUP_term_free (term->datatype);
+            term->datatype = tmp;
+        }
+
+        log_trace ("Datatype address: %p", term->datatype);
+        log_trace ("Datatype hash: %lu", LSUP_term_hash (term->datatype));
+
+    } else if (term->type == LSUP_TERM_BNODE) {
+        // TODO This is not usable for global skolemization.
+        term->bnode_id = XXH64 (
+                term->data, strlen (term->data) + 1, HASH_SEED);
+    }
+
+    return LSUP_OK;
+}
+
+
 /*
  * Extern inline functions.
  */

+ 30 - 0
test/test_term.c

@@ -157,6 +157,36 @@ static int test_literal()
     return 0;
 }
 
+static int test_term_copy()
+{
+    LSUP_Term *uri1 = LSUP_iriref_new ("http://hello.org", NULL);
+    LSUP_Term *lit1 = LSUP_literal_new ("hello", NULL);
+    LSUP_Term *tlit1 = LSUP_literal_new (
+            "hello", LSUP_iriref_new ("urn:mydatatype:string", NULL));
+    LSUP_Term *llit1 = LSUP_lt_literal_new ("hello", "en-US");
+
+    LSUP_Term *uri2 = LSUP_term_copy (uri1);
+    LSUP_Term *lit2 = LSUP_term_copy (lit1);
+    LSUP_Term *tlit2 = LSUP_term_copy (tlit1);
+    LSUP_Term *llit2 = LSUP_term_copy (llit1);
+
+    ASSERT (LSUP_term_equals (uri1, uri2), "Term mismatch!");
+    ASSERT (LSUP_term_equals (lit1, lit2), "Term mismatch!");
+    ASSERT (LSUP_term_equals (tlit1, tlit2), "Term mismatch!");
+    ASSERT (LSUP_term_equals (llit1, llit2), "Term mismatch!");
+
+    LSUP_term_free (uri1);
+    LSUP_term_free (uri2);
+    LSUP_term_free (lit1);
+    LSUP_term_free (lit2);
+    LSUP_term_free (tlit1);
+    LSUP_term_free (tlit2);
+    LSUP_term_free (llit1);
+    LSUP_term_free (llit2);
+
+    return 0;
+}
+
 static int test_term_serialize_deserialize()
 {
     LSUP_Term *uri = LSUP_iriref_new ("http://hello.org", NULL);