Pārlūkot izejas kodu

Update Python modules; improve build script; fix bugs.

Stefano Cossu 3 gadi atpakaļ
vecāks
revīzija
b7c3aad35e
8 mainītis faili ar 191 papildinājumiem un 113 dzēšanām
  1. 1 0
      Makefile
  2. 26 23
      cpython/py_graph.h
  3. 0 8
      cpython/py_lsup_rdf.c
  4. 107 73
      cpython/py_term.h
  5. 2 2
      include/term.h
  6. 48 1
      setup.py
  7. 1 1
      src/graph.c
  8. 6 5
      test/cpython_test.py

+ 1 - 0
Makefile

@@ -151,6 +151,7 @@ profile: bin/profile
 		echo "Profile dump written at $(CALLGRIND_DUMP)"
 
 
+.PHONY: pytest
 py_test:
 	pip3 install --user . && \
 	python3 test/cpython_test.py

+ 26 - 23
cpython/py_graph.h

@@ -45,11 +45,7 @@ StringIterator_next (StringIteratorObject *it_obj)
         return NULL;
     }
 
-    PyObject *rdf_obj = PyUnicode_FromString ((char*)it_obj->line);
-    if (UNLIKELY (!rdf_obj)) return NULL;
-
-    Py_INCREF (rdf_obj);
-    return (rdf_obj);
+    return PyUnicode_FromString ((char *) it_obj->line);
 }
 
 
@@ -136,20 +132,27 @@ static int
 Graph_init (GraphObject *self, PyObject *args, PyObject *kwargs)
 {
     unsigned char store_type;
-    PyObject *uri_obj;
-    LSUP_Term *uri;
+    PyObject *uri_obj = NULL;
+    LSUP_Term *uri = NULL, *src_uri = NULL;
 
-    static char *kwlist[] = {"", "uri", NULL};
+    static char *kwlist[] = {"", "uri_obj", NULL};
 
     if (!PyArg_ParseTupleAndKeywords (
             args, kwargs, "b|O", kwlist, &store_type, &uri_obj))
         return -1;
 
-    if (uri_obj)
-        if (!PyObject_TypeCheck (uri, &TermType)) {
+    if (uri_obj) {
+        if (!PyObject_TypeCheck (uri_obj, &TermType)) {
             PyErr_SetString (PyExc_TypeError, "uri is not a Term type.");
             return -1;
         }
+        src_uri = ((TermObject *) uri_obj)->ob_struct;
+        uri = LSUP_iriref_new (src_uri->data, LSUP_iriref_nsm (src_uri));
+        if (! LSUP_IS_IRI (uri)) {
+            PyErr_SetString (PyExc_TypeError, "uri is not a IRIREF type.");
+            return -1;
+        }
+
     } else uri = LSUP_iriref_new (NULL, NULL);
 
     self->ob_struct = LSUP_graph_new (uri, (LSUP_store_type) store_type);
@@ -157,6 +160,8 @@ Graph_init (GraphObject *self, PyObject *args, PyObject *kwargs)
         PyErr_SetString (PyExc_ValueError, "Could not create graph.");
         return -1;
     }
+    LSUP_Term *uri2 = LSUP_graph_uri (self->ob_struct);
+    log_debug("Graph URI (%p): %s", uri2, uri2->data);
 
     return 0;
 }
@@ -172,12 +177,11 @@ Graph_dealloc (GraphObject *self)
 static PyObject *
 Graph_get_uri (GraphObject *self, void *closure)
 {
-    PyObject *uri = PyUnicode_FromString (
-            LSUP_graph_uri (self->ob_struct)->data);
-    if ( UNLIKELY (!uri)) return NULL;
+    LSUP_Term *uri = LSUP_graph_uri (self->ob_struct);
+    log_debug("Graph URI address: %p", uri);
+    log_debug("Graph URI: %s", uri->data);
 
-    Py_INCREF(uri);
-    return uri;
+    return PyUnicode_FromString (uri->data);
 }
 
 
@@ -190,7 +194,7 @@ Graph_set_uri (GraphObject *self, PyObject *value, void *closure)
     }
 
     LSUP_rc rc = LSUP_graph_set_uri (
-            self->ob_struct, ((TermObject*)value)->ob_struct->data);
+            self->ob_struct, ((TermObject*)value)->ob_struct);
 
     return rc == LSUP_OK ? 0 : -1;
 }
@@ -294,7 +298,7 @@ Graph_new_from_rdf (PyTypeObject *cls, PyObject *args)
         return NULL;
     }
 
-    Py_INCREF(res);
+    Py_INCREF (res);
 
     return (PyObject *) res;
 }
@@ -427,6 +431,8 @@ Graph_add (PyObject *self, PyObject *triples)
 
         if (db_rc == LSUP_OK) rc = LSUP_OK;
         if (UNLIKELY (db_rc < 0)) {
+            PyErr_SetString (
+                    PyExc_ValueError, "Unknown error while adding triples.");
             rc = -1;
             goto finally;
         }
@@ -435,12 +441,9 @@ Graph_add (PyObject *self, PyObject *triples)
 finally:
     LSUP_graph_add_done (it);
 
-    PyObject *ret = PyLong_FromSize_t (LSUP_graph_iter_cur (it));
+    if (rc == LSUP_OK)
+        return PyLong_FromSize_t (LSUP_graph_iter_cur (it));
 
-    if (rc == LSUP_OK) {
-        Py_INCREF (ret);
-        return ret;
-    }
     return NULL;
 }
 
@@ -494,7 +497,7 @@ static PyObject *Graph_lookup (PyObject *self, PyObject *args)
     // Initialize the generator object.
     it_obj = PyObject_New (
             GraphIteratorObject, &GraphIteratorType);
-    if (UNLIKELY (!it_obj)) goto finally;
+    if (UNLIKELY (!it_obj)) return PyErr_NoMemory();
 
     it_obj->it = it;
     it_obj->spo = TRP_DUMMY;

+ 0 - 8
cpython/py_lsup_rdf.c

@@ -63,7 +63,6 @@ PyInit_term()
         PyType_Ready (&TermType) < 0
         || PyType_Ready (&IRIRefType) < 0
         || PyType_Ready (&LiteralType) < 0
-        || PyType_Ready (&LTLiteralType) < 0
         || PyType_Ready (&BNodeType) < 0
     ) return NULL;
 
@@ -100,13 +99,6 @@ PyInit_term()
         return NULL;
     }
 
-    Py_INCREF(&LTLiteralType);
-    if (PyModule_AddObject(m, "LTLiteral", (PyObject *) &LTLiteralType) < 0) {
-        Py_DECREF(&LTLiteralType);
-        Py_DECREF(m);
-        return NULL;
-    }
-
     Py_INCREF(&BNodeType);
     if (PyModule_AddObject(m, "BNode", (PyObject *) &BNodeType) < 0) {
         Py_DECREF(&BNodeType);

+ 107 - 73
cpython/py_term.h

@@ -46,7 +46,7 @@ static int
 Term_iriref_init (TermObject *self, PyObject *args, PyObject *kwargs)
 {
     char *data = NULL;
-    LSUP_NSMap *nsm = NULL;
+    NSMapObject *nsm = NULL;
 
     static char *kwlist[] = {"data", "nsm", NULL};
 
@@ -73,33 +73,21 @@ Term_iriref_init (TermObject *self, PyObject *args, PyObject *kwargs)
 static int
 Term_literal_init (TermObject *self, PyObject *args, PyObject *kwargs)
 {
-    char *data = NULL, *datatype = NULL;
+    char *data = NULL, *datatype = NULL, *lang = NULL;
 
-    static char *kwlist[] = {"", "datatype", NULL};
+    static char *kwlist[] = {"", "datatype", "lang", NULL};
 
     if (!PyArg_ParseTupleAndKeywords (
-            args, kwargs, "s|z", kwlist, &data, &datatype))
+            args, kwargs, "s|zz", kwlist, &data, &datatype, &lang))
         return -1;
 
-    self->ob_struct = LSUP_term_new (LSUP_TERM_LITERAL, data, datatype);
-    if (!self->ob_struct) {
-        PyErr_SetString (PyExc_ValueError, "Could not create term.");
-        return -1;
+    if (lang)
+        self->ob_struct = LSUP_lt_literal_new (data, lang);
+    else {
+        LSUP_Term *dtype = (datatype) ? LSUP_iriref_new (datatype, NULL) : NULL;
+        self->ob_struct = LSUP_literal_new (data, dtype);
     }
 
-    return 0;
-}
-
-
-static int
-Term_lt_literal_init (TermObject *self, PyObject *args, PyObject *kwargs)
-{
-    char *data = NULL, *lang = NULL;
-
-    if (!PyArg_ParseTuple (args, "sz", &data, &lang))
-        return -1;
-
-    self->ob_struct = LSUP_term_new (LSUP_TERM_LT_LITERAL, data, lang);
     if (!self->ob_struct) {
         PyErr_SetString (PyExc_ValueError, "Could not create term.");
         return -1;
@@ -139,63 +127,98 @@ Term_dealloc (TermObject *self)
 static PyObject *
 Term_get_type (TermObject *self, void *closure)
 {
-    PyObject *type = PyLong_FromLong (self->ob_struct->type);
-
-    Py_INCREF (type);
-    return type;
+    return PyLong_FromLong (self->ob_struct->type);
 }
 
 
 static PyObject *
 Term_get_data (TermObject *self, void *closure)
+{ return PyUnicode_FromString (self->ob_struct->data); }
+
+
+static PyObject *
+Term_iriref_get_nsm (TermObject *self, void *closure)
 {
-    PyObject *data = PyUnicode_FromString (self->ob_struct->data);
+    LSUP_Term *term = self->ob_struct;
+    if (!LSUP_IS_IRI(term))
+        Py_RETURN_NONE;
 
-    Py_INCREF (data);
-    return data;
+    LSUP_NSMap *nsm = LSUP_iriref_nsm (term);
+    if (!nsm) Py_RETURN_NONE;
+
+    NSMapObject *nsm_obj = PyObject_New (NSMapObject, &NSMapType);
+    if (UNLIKELY (!nsm_obj)) return PyErr_NoMemory();
+
+    nsm_obj->ob_struct = nsm;
+
+    Py_INCREF (nsm_obj);
+    return (PyObject *) nsm_obj;
 }
 
 
 static PyObject *
-Term_get_datatype (TermObject *self, void *closure)
+Term_iriref_get_prefix (TermObject *self, void *closure)
 {
-    if (!self->ob_struct->datatype) Py_RETURN_NONE;
+    LSUP_Term *term = self->ob_struct;
+    if (! LSUP_IS_IRI (term))
+        Py_RETURN_NONE;
 
-    const LSUP_Term *dtype = LSUP_tcache_get (self->ob_struct->datatype);
-    if (!dtype) Py_RETURN_NONE;
+    return PyUnicode_FromString (LSUP_iriref_prefix (term));
+}
 
-    PyObject *datatype = PyUnicode_FromString (dtype->data);
 
-    Py_INCREF (datatype);
-    return datatype;
+static PyObject *
+Term_iriref_get_path (TermObject *self, void *closure)
+{
+    LSUP_Term *term = self->ob_struct;
+    if (! LSUP_IS_IRI (term))
+        Py_RETURN_NONE;
+
+    return PyUnicode_FromString (LSUP_iriref_path (term));
 }
 
-/*
- * This is the same value for all language-tagged literals.
- */
+
 static PyObject *
-LTLiteral_get_datatype (TermObject *self, void *closure)
+Term_iriref_get_frag (TermObject *self, void *closure)
 {
-    const LSUP_Term *dtype = LSUP_default_datatype;
+    LSUP_Term *term = self->ob_struct;
+    if (! LSUP_IS_IRI (term))
+        Py_RETURN_NONE;
+
+    return PyUnicode_FromString (LSUP_iriref_frag (term));
+}
 
-    PyObject *datatype = PyUnicode_FromString (dtype->data);
+
+static PyObject *
+Term_lit_get_datatype (TermObject *self, void *closure)
+{
+    if (!self->ob_struct->datatype) Py_RETURN_NONE;
+
+    char *dtype_data =
+        self->ob_struct->type == LSUP_TERM_LT_LITERAL ? DEFAULT_DTYPE :
+        self->ob_struct->datatype->data;
+
+    TermObject *datatype = (TermObject *) Py_TYPE (self)->tp_alloc (
+            Py_TYPE (self), 0);
+    if (!datatype) return PyErr_NoMemory();
+
+    datatype->ob_struct = LSUP_iriref_new (dtype_data, NULL);
 
     Py_INCREF (datatype);
-    return datatype;
+    return (PyObject *) datatype;
 }
 
+
 static PyObject *
-Term_get_lang (TermObject *self, void *closure)
+Term_lit_get_lang (TermObject *self, void *closure)
 {
     if (
-            !self->ob_struct->datatype || !self->ob_struct->lang ||
-            strlen (self->ob_struct->lang) == 0)
+            self->ob_struct->type != LSUP_TERM_LT_LITERAL
+            || ! self->ob_struct->lang
+            || strlen (self->ob_struct->lang) == 0)
         Py_RETURN_NONE;
 
-    PyObject *lang = PyUnicode_FromString (self->ob_struct->lang);
-
-    Py_INCREF (lang);
-    return lang;
+    return PyUnicode_FromString (self->ob_struct->lang);
 }
 
 
@@ -203,11 +226,27 @@ static PyGetSetDef Term_getsetters[] = {
     {"_type", (getter) Term_get_type, NULL, "Term type.", NULL},
     {"_data", (getter) Term_get_data, NULL, "Term data.", NULL},
     {
-        "_datatype", (getter) Term_get_datatype,
+        "_nsm", (getter) Term_iriref_get_nsm,
+        NULL, "IRI ref namespace map.", NULL
+    },
+    {
+        "_prefix", (getter) Term_iriref_get_prefix,
+        NULL, "IRI ref prefix.", NULL
+    },
+    {
+        "_path", (getter) Term_iriref_get_path,
+        NULL, "IRI ref path after prefix.", NULL
+    },
+    {
+        "_frag", (getter) Term_iriref_get_frag,
+        NULL, "IRI ref fragment.", NULL
+    },
+    {
+        "_datatype", (getter) Term_lit_get_datatype,
         NULL, "Literal term data type.", NULL
     },
     {
-        "_lang", (getter) Term_get_lang,
+        "_lang", (getter) Term_lit_get_lang,
         NULL, "Literal term language tag.", NULL
     },
     {NULL}
@@ -216,28 +255,34 @@ static PyGetSetDef Term_getsetters[] = {
 
 static PyGetSetDef IRIRef_getsetters[] = {
     {"data", (getter) Term_get_data, NULL, "IRI string.", NULL},
-    {NULL}
-};
-
-
-static PyGetSetDef Literal_getsetters[] = {
-    {"data", (getter) Term_get_data, NULL, "Literal data.", NULL},
     {
-        "datatype", (getter) Term_get_datatype,
-        NULL, "Data type.", NULL
+        "nsm", (getter) Term_iriref_get_nsm,
+        NULL, "Namespace map.", NULL
+    },
+    {
+        "prefix", (getter) Term_iriref_get_prefix,
+        NULL, "IRI ref prefix.", NULL
+    },
+    {
+        "path", (getter) Term_iriref_get_path,
+        NULL, "IRI ref path after prefix.", NULL
+    },
+    {
+        "frag", (getter) Term_iriref_get_frag,
+        NULL, "IRI ref fragment.", NULL
     },
     {NULL}
 };
 
 
-static PyGetSetDef LTLiteral_getsetters[] = {
+static PyGetSetDef Literal_getsetters[] = {
     {"data", (getter) Term_get_data, NULL, "Literal data.", NULL},
     {
-        "datatype", (getter) LTLiteral_get_datatype,
+        "datatype", (getter) Term_lit_get_datatype,
         NULL, "Data type.", NULL
     },
     {
-        "lang", (getter) Term_get_lang,
+        "lang", (getter) Term_lit_get_lang,
         NULL, "Language tag.", NULL
     },
     {NULL}
@@ -297,17 +342,6 @@ PyTypeObject LiteralType = {
 };
 
 
-PyTypeObject LTLiteralType = {
-    PyVarObject_HEAD_INIT(NULL, 0)
-    .tp_name = "term.LTLiteral",
-    .tp_doc = "RDF language-tagged Literal.",
-    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
-    .tp_base = &LiteralType,
-    .tp_init = (initproc) Term_lt_literal_init,
-    .tp_getset = LTLiteral_getsetters,
-};
-
-
 PyTypeObject BNodeType = {
     PyVarObject_HEAD_INIT(NULL, 0)
     .tp_name = "term.BNode",

+ 2 - 2
include/term.h

@@ -82,12 +82,12 @@ typedef struct term_t {
 /** @brief Shorthand to test if a term is a IRI of any kind.
  */
 #define LSUP_IS_IRI(term) \
-    (term->type == LSUP_TERM_IRIREF || term->type == LSUP_TERM_NS_IRIREF)
+    ((term)->type == LSUP_TERM_IRIREF || (term)->type == LSUP_TERM_NS_IRIREF)
 
 /** @brief Shorthand to test if a term is a literal of any kind.
  */
 #define LSUP_IS_LITERAL(term) \
-    term->type == LSUP_TERM_LITERAL || term->type == LSUP_TERM_LT_LITERAL
+    ((term)->type == LSUP_TERM_LITERAL || (term)->type == LSUP_TERM_LT_LITERAL)
 
 
 /** @brief Hash cache for data types.

+ 48 - 1
setup.py

@@ -1,17 +1,25 @@
 from glob import glob
 from os import path
 from setuptools import Extension, setup
+from setuptools.command.install import install
+from subprocess import check_output, CalledProcessError
 
 
 ROOT_DIR = path.dirname(path.realpath(__file__))
 MOD_DIR = path.join(ROOT_DIR, 'cpython')
 SRC_DIR = path.join(ROOT_DIR, 'src')
+CODEC_DIR = path.join(SRC_DIR, 'codec')
 INCL_DIR = path.join(ROOT_DIR, 'include')
 EXT_DIR = path.join(ROOT_DIR, 'ext')
 
+LEXER = 're2c'
+PARSER = 'lemon'
+
+
 sources = (
     glob(path.join(SRC_DIR, '*.c')) +
-    glob(path.join(SRC_DIR, 'codec', '*.c')) +
+    glob(path.join(CODEC_DIR, '*_grammar.c')) +
+    glob(path.join(SRC_DIR, 'codec', '*_parser.c')) +
     glob(path.join(MOD_DIR, '*.c')) +
     [
         path.join(EXT_DIR, 'openldap', 'libraries', 'liblmdb', 'mdb.c'),
@@ -25,6 +33,7 @@ sources = (
 debug = True
 
 compile_args = [
+    '-DLOG_USE_COLOR',
     # '-std=c99',
 ]
 if debug:
@@ -33,6 +42,43 @@ else:
     compile_args.extend(['-g0', '-O3'])
 
 
+class LSUPInstallCmd(install):
+    """
+    Run LSUP-specific hooks in extension build phase.
+
+    TODO Extending the Extension class may be best to narrow the scope to the
+    C module.
+    """
+
+    def run(self):
+        # Run grammar and parser generators.
+        try:
+            lexer_ex_path = check_output(['which', LEXER])
+        except CalledProcessError:
+            raise SystemError(f'Lexer program `{LEXER}` is not installed.')
+
+        try:
+            parser_ex_path = check_output(['which', PARSER])
+        except CalledProcessError:
+            raise SystemError(f'Lexer program `{PARSER}` is not installed.')
+
+        print("Generating grammar.")
+        for fpath in glob(path.join(CODEC_DIR, '*_grammar.y')):
+            check_output([
+                parser_ex_path, fpath, 'q', '-m',
+                '-T' + fpath.join(CODEC_DIR, 'lempar.c'), f'-d{CODEC_DIR}'
+            ])
+
+        print("Generating parser.")
+        for fpath in glob(path.join(CODEC_DIR, '*_lexer.re')):
+            check_output([
+                lexer_ex_path, fpath, '-o',
+                fpath.replace('_lexer.re', '_parser.c'), '-T', '--case-ranges',
+            ])
+
+        install.run(self)
+
+
 setup(
     name="lsup_rdf",
     version="1.0a1",
@@ -42,6 +88,7 @@ setup(
     license='https://notabug.org/scossu/lsup_rdf/src/master/LICENSE',
     package_dir={'lsup_rdf': path.join(MOD_DIR, 'lsup_rdf')},
     packages=['lsup_rdf'],
+    cmdclasss={'install': LSUPInstallCmd},
     ext_modules=[
         Extension(
             "_lsup_rdf",

+ 1 - 1
src/graph.c

@@ -227,7 +227,7 @@ LSUP_graph_set_uri (LSUP_Graph *gr, LSUP_Term *uri)
     }
 
     LSUP_term_free (gr->uri);
-    gr->uri = uri;
+    gr->uri = LSUP_iriref_new (uri->data, LSUP_iriref_nsm (uri));
 
     return LSUP_OK;
 }

+ 6 - 5
test/cpython_test.py

@@ -3,7 +3,7 @@ import unittest
 from os import path
 
 from lsup_rdf import env_init, term, triple, graph
-from lsup_rdf.term import IRIRef, Literal, LTLiteral, BNode
+from lsup_rdf.term import IRIRef, Literal, BNode
 
 TEST_DIR = path.realpath(path.dirname(__file__))
 
@@ -43,17 +43,18 @@ class TestTerm(unittest.TestCase):
         self.assertEqual(lit.data, 'Hello')
         self.assertEqual(lit._type, term.TERM_LITERAL)
         self.assertEqual(
-                lit.datatype, 'http://www.w3.org/2001/XMLSchema#string')
-        self.assertFalse(hasattr(lit, 'lang'))
+                lit.datatype.data, 'http://www.w3.org/2001/XMLSchema#string')
+        self.assertTrue(lit.lang is None)
 
     def test_lt_literal(self):
-        lt_lit = LTLiteral('Hola', 'es-ES')
+        lt_lit = Literal('Hola', lang='es-ES')
 
         self.assertTrue(isinstance(lt_lit, term.Term))
         self.assertEqual(lt_lit.data, 'Hola')
         self.assertEqual(lt_lit._type, term.TERM_LT_LITERAL)
         self.assertEqual(
-                lt_lit.datatype, 'http://www.w3.org/2001/XMLSchema#string')
+            lt_lit.datatype.data, 'http://www.w3.org/2001/XMLSchema#string'
+        )
         self.assertEqual(lt_lit.lang, 'es-ES')
 
     def test_bnode(self):