Browse Source

Subclass terms.

Stefano Cossu 3 years ago
parent
commit
979cb4f408
8 changed files with 323 additions and 78 deletions
  1. 2 1
      Makefile
  2. 2 3
      TODO.md
  3. 2 2
      cpython/py_graph.h
  4. 37 1
      cpython/py_lsup_rdf.c
  5. 186 4
      cpython/py_term.h
  6. 3 15
      include/term.h
  7. 44 37
      src/term.c
  8. 47 15
      test/cpython_test.py

+ 2 - 1
Makefile

@@ -59,7 +59,8 @@ valgrind:
 	valgrind \
 	--leak-check=full --show-leak-kinds=all --track-origins=yes \
 	--log-file=/tmp/lsup_valgrind.log \
-	./bin/test
+	./bin/test && \
+	echo "Memcheck complete. Valgrind log is at /tmp/lsup_valgrind.log"
 
 
 memcheck: test valgrind

+ 2 - 3
TODO.md

@@ -12,15 +12,14 @@
 - *D* Better error handling
 - *D* Logging
 - *D* Store graph
-- *W* Python bindings
+- *D* Python bindings
     - *D* Basic module framework
     - *D* term, triple, graph modules
     - *D* Codec integration
     - *D* Graph remove and lookup ops
     - *D* Namespace module
-    - *D* Lookup methods
     - *D* Tests (basic)
-    - *P* Subclass term types
+    - *D* Subclass term types
 - *P* Turtle serialization / deserialization
 - *P* Extended tests
     - *P* C API

+ 2 - 2
cpython/py_graph.h

@@ -311,8 +311,8 @@ inline static int build_trp_pattern (PyObject *args, LSUP_Term *spo[])
     }
 
     spo[0] = s_obj != Py_None ? ((TermObject *)s_obj)->ob_struct : NULL;
-    spo[1] = s_obj != Py_None ? ((TermObject *)p_obj)->ob_struct : NULL;
-    spo[2] = s_obj != Py_None ? ((TermObject *)o_obj)->ob_struct : NULL;
+    spo[1] = p_obj != Py_None ? ((TermObject *)p_obj)->ob_struct : NULL;
+    spo[2] = o_obj != Py_None ? ((TermObject *)o_obj)->ob_struct : NULL;
 
     return 0;
 }

+ 37 - 1
cpython/py_lsup_rdf.c

@@ -59,7 +59,13 @@ static PyModuleDef term_mod = {
 PyMODINIT_FUNC
 PyInit_term()
 {
-    if (PyType_Ready (&TermType) < 0) return NULL;
+    if (
+        PyType_Ready (&TermType) < 0
+        || PyType_Ready (&IRIRefType) < 0
+        || PyType_Ready (&LiteralType) < 0
+        || PyType_Ready (&LTLiteralType) < 0
+        || PyType_Ready (&BNodeType) < 0
+    ) return NULL;
 
     PyObject *m = PyModule_Create(&term_mod);
     if (m == NULL) return NULL;
@@ -69,6 +75,8 @@ PyInit_term()
         || PyModule_AddIntConstant (m, "TERM_IRIREF", LSUP_TERM_IRIREF) < 0
         || PyModule_AddIntConstant (m, "TERM_BNODE", LSUP_TERM_BNODE) < 0
         || PyModule_AddIntConstant (m, "TERM_LITERAL", LSUP_TERM_LITERAL) < 0
+        || PyModule_AddIntConstant (
+                m, "TERM_LT_LITERAL", LSUP_TERM_LT_LITERAL) < 0
     ) return NULL;
 
     Py_INCREF(&TermType);
@@ -78,6 +86,34 @@ PyInit_term()
         return NULL;
     }
 
+    Py_INCREF(&IRIRefType);
+    if (PyModule_AddObject(m, "IRIRef", (PyObject *) &IRIRefType) < 0) {
+        Py_DECREF(&IRIRefType);
+        Py_DECREF(m);
+        return NULL;
+    }
+
+    Py_INCREF(&LiteralType);
+    if (PyModule_AddObject(m, "Literal", (PyObject *) &LiteralType) < 0) {
+        Py_DECREF(&LiteralType);
+        Py_DECREF(m);
+        return NULL;
+    }
+
+    Py_INCREF(&LTLiteralType);
+    if (PyModule_AddObject(m, "LTLiteral", (PyObject *) &LTLiteralType) < 0) {
+        Py_DECREF(&LTLiteralType);
+        Py_DECREF(m);
+        return NULL;
+    }
+
+    Py_INCREF(&BNodeType);
+    if (PyModule_AddObject(m, "BNode", (PyObject *) &BNodeType) < 0) {
+        Py_DECREF(&BNodeType);
+        Py_DECREF(m);
+        return NULL;
+    }
+
     return m;
 }
 

+ 186 - 4
cpython/py_term.h

@@ -7,6 +7,7 @@
 #include <structmember.h>
 
 #include "term.h"
+#include "py_namespace.h"
 
 
 typedef struct {
@@ -40,6 +41,93 @@ Term_init (TermObject *self, PyObject *args, PyObject *kwargs)
     return 0;
 }
 
+
+static int
+Term_iriref_init (TermObject *self, PyObject *args, PyObject *kwargs)
+{
+    char *data = NULL;
+    LSUP_NSMap *nsm = NULL;
+
+    static char *kwlist[] = {"data", "nsm", NULL};
+
+    if (!PyArg_ParseTupleAndKeywords (
+            args, kwargs, "|sO", kwlist, &data, &nsm))
+        return -1;
+
+    if (nsm && !PyObject_TypeCheck (nsm, &NSMapType)) {
+        PyErr_SetString (PyExc_TypeError, "nsm is not a NSMap type.");
+        return -1;
+    }
+
+    // TODO Add nsm parameter.
+    self->ob_struct = LSUP_uri_new (data);
+    if (!self->ob_struct) {
+        PyErr_SetString (PyExc_ValueError, "Could not create term.");
+        return -1;
+    }
+
+    return 0;
+}
+
+
+static int
+Term_literal_init (TermObject *self, PyObject *args, PyObject *kwargs)
+{
+    char *data = NULL, *datatype = NULL;
+
+    static char *kwlist[] = {"", "datatype", NULL};
+
+    if (!PyArg_ParseTupleAndKeywords (
+            args, kwargs, "s|z", kwlist, &data, &datatype))
+        return -1;
+
+    self->ob_struct = LSUP_term_new (LSUP_TERM_LITERAL, data, datatype);
+    if (!self->ob_struct) {
+        PyErr_SetString (PyExc_ValueError, "Could not create term.");
+        return -1;
+    }
+
+    return 0;
+}
+
+
+static int
+Term_lt_literal_init (TermObject *self, PyObject *args, PyObject *kwargs)
+{
+    char *data = NULL, *lang = NULL;
+
+    if (!PyArg_ParseTuple (args, "sz", &data, &lang))
+        return -1;
+
+    self->ob_struct = LSUP_term_new (LSUP_TERM_LT_LITERAL, data, lang);
+    if (!self->ob_struct) {
+        PyErr_SetString (PyExc_ValueError, "Could not create term.");
+        return -1;
+    }
+
+    return 0;
+}
+
+
+static int
+Term_bnode_init (TermObject *self, PyObject *args, PyObject *kwargs)
+{
+    char *data;
+    static char *kwlist[] = {"data", NULL};
+
+    if (!PyArg_ParseTupleAndKeywords (args, kwargs, "|s", kwlist, &data))
+        return -1;
+
+    self->ob_struct = LSUP_term_new (LSUP_TERM_BNODE, data, NULL);
+    if (!self->ob_struct) {
+        PyErr_SetString (PyExc_ValueError, "Could not create term.");
+        return -1;
+    }
+
+    return 0;
+}
+
+
 static void
 Term_dealloc (TermObject *self)
 {
@@ -82,6 +170,20 @@ Term_get_datatype (TermObject *self, void *closure)
     return datatype;
 }
 
+/*
+ * This is the same value for all language-tagged literals.
+ */
+static PyObject *
+LTLiteral_get_datatype (TermObject *self, void *closure)
+{
+    const LSUP_Term *dtype = LSUP_default_datatype;
+
+    PyObject *datatype = PyUnicode_FromString (dtype->data);
+
+    Py_INCREF (datatype);
+    return datatype;
+}
+
 static PyObject *
 Term_get_lang (TermObject *self, void *closure)
 {
@@ -98,20 +200,56 @@ Term_get_lang (TermObject *self, void *closure)
 
 
 static PyGetSetDef Term_getsetters[] = {
-    {"type", (getter) Term_get_type, NULL, "Term type.", NULL},
-    {"data", (getter) Term_get_data, NULL, "Term data.", NULL},
+    {"_type", (getter) Term_get_type, NULL, "Term type.", NULL},
+    {"_data", (getter) Term_get_data, NULL, "Term data.", NULL},
     {
-        "datatype", (getter) Term_get_datatype,
+        "_datatype", (getter) Term_get_datatype,
         NULL, "Literal term data type.", NULL
     },
     {
-        "lang", (getter) Term_get_lang,
+        "_lang", (getter) Term_get_lang,
         NULL, "Literal term language tag.", NULL
     },
     {NULL}
 };
 
 
+static PyGetSetDef IRIRef_getsetters[] = {
+    {"data", (getter) Term_get_data, NULL, "IRI string.", NULL},
+    {NULL}
+};
+
+
+static PyGetSetDef Literal_getsetters[] = {
+    {"data", (getter) Term_get_data, NULL, "Literal data.", NULL},
+    {
+        "datatype", (getter) Term_get_datatype,
+        NULL, "Data type.", NULL
+    },
+    {NULL}
+};
+
+
+static PyGetSetDef LTLiteral_getsetters[] = {
+    {"data", (getter) Term_get_data, NULL, "Literal data.", NULL},
+    {
+        "datatype", (getter) LTLiteral_get_datatype,
+        NULL, "Data type.", NULL
+    },
+    {
+        "lang", (getter) Term_get_lang,
+        NULL, "Language tag.", NULL
+    },
+    {NULL}
+};
+
+
+static PyGetSetDef BNode_getsetters[] = {
+    {"data", (getter) Term_get_data, NULL, "Blank node label.", NULL},
+    {NULL}
+};
+
+
 static PyObject *
 Term_richcmp (PyObject *obj1, PyObject *obj2, int op);
 
@@ -137,6 +275,50 @@ PyTypeObject TermType = {
 };
 
 
+PyTypeObject IRIRefType = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    .tp_name = "term.IRIRef",
+    .tp_doc = "RDF IRI reference.",
+    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
+    .tp_base = &TermType,
+    .tp_init = (initproc) Term_iriref_init,
+    .tp_getset = IRIRef_getsetters,
+};
+
+
+PyTypeObject LiteralType = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    .tp_name = "term.Literal",
+    .tp_doc = "RDF Literal.",
+    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
+    .tp_base = &TermType,
+    .tp_init = (initproc) Term_literal_init,
+    .tp_getset = Literal_getsetters,
+};
+
+
+PyTypeObject LTLiteralType = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    .tp_name = "term.LTLiteral",
+    .tp_doc = "RDF language-tagged Literal.",
+    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
+    .tp_base = &LiteralType,
+    .tp_init = (initproc) Term_lt_literal_init,
+    .tp_getset = LTLiteral_getsetters,
+};
+
+
+PyTypeObject BNodeType = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    .tp_name = "term.BNode",
+    .tp_doc = "RDF Blanbk Node.",
+    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
+    .tp_base = &TermType,
+    .tp_init = (initproc) Term_bnode_init,
+    .tp_getset = BNode_getsetters,
+};
+
+
 static PyObject *
 Term_richcmp (PyObject *obj1, PyObject *obj2, int op)
 {

+ 3 - 15
include/term.h

@@ -139,19 +139,6 @@ LSUP_term_new (LSUP_TermType type, const char *data, void *metadata);
 inline LSUP_Term *
 LSUP_uri_new (const char *data)
 {
-    if (!data) {
-        uuid_t uuid;
-        uuid_generate_random (uuid);
-
-        uuid_str_t uuid_str;
-        uuid_unparse_lower (uuid, uuid_str);
-
-        char uri[UUID4_URN_SIZE];
-        snprintf (uri, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str);
-
-        data = uri;
-    }
-
     return LSUP_term_new (LSUP_TERM_IRIREF, data, NULL);
 }
 
@@ -192,8 +179,9 @@ LSUP_term_serialize (const LSUP_Term *term);
 /**
  * @brief Shortcut to initialize a URI.
  */
-LSUP_rc
-LSUP_uri_init (LSUP_Term *term, const char *data);
+inline LSUP_rc
+LSUP_uri_init (LSUP_Term *term, const char *data)
+{ return LSUP_term_init (term, LSUP_TERM_IRIREF, data, NULL); }
 
 
 /** @brief Hash a buffer.

+ 44 - 37
src/term.c

@@ -105,11 +105,11 @@ LSUP_term_init(
         LSUP_Term *term, LSUP_TermType type,
         const char *data, void *metadata)
 {
-    // This can never be LSUP_TERM_UNDEFINED.
-    if (!data) {
-        log_error ("No data provided for term.");
-        return LSUP_VALUE_ERR;
+    if (UNLIKELY (!LSUP_uri_ptn)) {
+        log_error ("Environment not initialized. Did you call LSUP_init()?");
+        return LSUP_ERROR;
     }
+    // This can never be LSUP_TERM_UNDEFINED.
     if (type <= LSUP_TERM_UNDEFINED || type > MAX_VALID_TERM_TYPE) {
         log_error ("%d is not a valid term type.", type);
         return LSUP_VALUE_ERR;
@@ -117,27 +117,54 @@ LSUP_term_init(
 
     term->type = type;
 
-    // Validate URI.
-    if (term->type == LSUP_TERM_IRIREF) {
-        if (strpbrk (data, invalid_uri_chars) != NULL) {
-            log_error (
-                    "Characters %s are not allowed. Got: %s\n",
-                    invalid_uri_chars, data);
+    char *data_tmp;
+    if (data) {
+        // Validate URI.
+        if (term->type == LSUP_TERM_IRIREF) {
+            if (strpbrk (data, invalid_uri_chars) != NULL) {
+                log_error (
+                        "Characters %s are not allowed. Got: %s\n",
+                        invalid_uri_chars, data);
 
-            return LSUP_VALUE_ERR;
+                return LSUP_VALUE_ERR;
+            }
+
+            if (regexec (LSUP_uri_ptn, data, 0, NULL, 0) != 0) {
+                fprintf (stderr, "Error matching URI pattern.\n");
+
+                return LSUP_VALUE_ERR;
+            }
         }
 
-        if (regexec (LSUP_uri_ptn, data, 0, NULL, 0) != 0) {
-            fprintf (stderr, "Error matching URI pattern.\n");
+        data_tmp = realloc (term->data, strlen (data) + 1);
+        if (UNLIKELY (!data_tmp)) return LSUP_MEM_ERR;
+
+        strcpy (data_tmp, data);
 
+    } else {
+        // No data. Make up a random UUID or URI if allowed.
+        if (type == LSUP_TERM_IRIREF || type == LSUP_TERM_BNODE) {
+            uuid_t uuid;
+            uuid_generate_random (uuid);
+
+            uuid_str_t uuid_str;
+            uuid_unparse_lower (uuid, uuid_str);
+
+            if (type == LSUP_TERM_IRIREF) {
+                data_tmp = realloc (term->data, UUID4_URN_SIZE);
+                if (UNLIKELY (!data_tmp)) return LSUP_MEM_ERR;
+                snprintf (data_tmp, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str);
+                term->data = data_tmp;
+            } else {
+                data_tmp = realloc (term->data, sizeof(uuid_str));
+                strcpy(data_tmp, uuid_str);
+            }
+        } else {
+            log_error ("No data provided for term.");
             return LSUP_VALUE_ERR;
         }
     }
-
-    char *data_tmp = realloc (term->data, strlen (data) + 1);
-    if (UNLIKELY (!data_tmp)) return LSUP_MEM_ERR;
     term->data = data_tmp;
-    strcpy (term->data, data);
 
     if (term->type == LSUP_TERM_LT_LITERAL) {
         // Lang tags longer than 7 characters will be truncated.
@@ -168,26 +195,6 @@ LSUP_term_init(
 }
 
 
-LSUP_rc
-LSUP_uri_init (LSUP_Term *term, const char *data)
-{
-    if (!data) {
-        uuid_t uuid;
-        uuid_generate_random (uuid);
-
-        uuid_str_t uuid_str;
-        uuid_unparse_lower (uuid, uuid_str);
-
-        char uri[UUIDSTR_SIZE + 10];
-        sprintf (uri, "urn:uuid4:%s", uuid_str);
-
-        data = uri;
-    }
-
-    return LSUP_term_init (term, LSUP_TERM_IRIREF, data, NULL);
-}
-
-
 LSUP_Key
 LSUP_term_hash (const LSUP_Term *term)
 {

+ 47 - 15
test/cpython_test.py

@@ -3,21 +3,22 @@ import unittest
 from os import path
 
 from lsup_rdf import env_init, term, triple, graph
+from lsup_rdf.term import IRIRef, Literal, LTLiteral, BNode
 
 TEST_DIR = path.realpath(path.dirname(__file__))
 
 
 class TestTerm(unittest.TestCase):
     def setUp(self):
-        self.s1 = term.Term(term.TERM_IRIREF, "urn:s:1")
-        self.p1 = term.Term(term.TERM_IRIREF, "urn:p:1")
-        self.o1 = term.Term(term.TERM_IRIREF, "urn:o:1")
-        self.s2 = term.Term(term.TERM_IRIREF, "urn:s:2")
-        self.p2 = term.Term(term.TERM_IRIREF, "urn:p:2")
-        self.o2 = term.Term(term.TERM_IRIREF, "urn:o:2")
-        self.s3 = term.Term(term.TERM_IRIREF, "urn:s:3")
-        self.p3 = term.Term(term.TERM_IRIREF, "urn:p:3")
-        self.o3 = term.Term(term.TERM_IRIREF, "urn:o:3")
+        self.s1 = IRIRef("urn:s:1")
+        self.p1 = IRIRef("urn:p:1")
+        self.o1 = IRIRef("urn:o:1")
+        self.s2 = IRIRef("urn:s:2")
+        self.p2 = IRIRef("urn:p:2")
+        self.o2 = IRIRef("urn:o:2")
+        self.s3 = IRIRef("urn:s:3")
+        self.p3 = IRIRef("urn:p:3")
+        self.o3 = IRIRef("urn:o:3")
 
         self.trp = [
             triple.Triple(self.s1, self.p1, self.o1),
@@ -26,12 +27,43 @@ class TestTerm(unittest.TestCase):
         self.t3 = triple.Triple(self.s3, self.p3, self.o3)
         self.t4 = triple.Triple(self.s1, self.p1, self.o1)
 
-    def test_term(self):
-        s1 = term.Term(term.TERM_IRIREF, "urn:s:1")
-
-        self.assertTrue(isinstance(s1, term.Term))
-        self.assertEqual(s1.data, "urn:s:1")
-        self.assertEqual(s1.type, term.TERM_IRIREF)
+    def test_iriref(self):
+        uri = IRIRef("urn:s:1")
+
+        self.assertTrue(isinstance(uri, term.Term))
+        self.assertEqual(uri.data, 'urn:s:1')
+        self.assertEqual(uri._type, term.TERM_IRIREF)
+        self.assertFalse(hasattr(uri, 'datatype'))
+        self.assertFalse(hasattr(uri, 'lang'))
+
+    def test_literal(self):
+        lit = Literal('Hello')
+
+        self.assertTrue(isinstance(lit, term.Term))
+        self.assertEqual(lit.data, 'Hello')
+        self.assertEqual(lit._type, term.TERM_LITERAL)
+        self.assertEqual(
+                lit.datatype, 'http://www.w3.org/2001/XMLSchema#string')
+        self.assertFalse(hasattr(lit, 'lang'))
+
+    def test_lt_literal(self):
+        lt_lit = LTLiteral('Hola', 'es-ES')
+
+        self.assertTrue(isinstance(lt_lit, term.Term))
+        self.assertEqual(lt_lit.data, 'Hola')
+        self.assertEqual(lt_lit._type, term.TERM_LT_LITERAL)
+        self.assertEqual(
+                lt_lit.datatype, 'http://www.w3.org/2001/XMLSchema#string')
+        self.assertEqual(lt_lit.lang, 'es-ES')
+
+    def test_bnode(self):
+        bn = BNode('1234')
+
+        self.assertTrue(isinstance(bn, term.Term))
+        self.assertEqual(bn.data, '1234')
+        self.assertEqual(bn._type, term.TERM_BNODE)
+        self.assertFalse(hasattr(bn, 'datatype'))
+        self.assertFalse(hasattr(bn, 'lang'))
 
     def test_graph(self):
         gr = graph.Graph(graph.STORE_MEM)