Browse Source

Basic Python tests.

Stefano Cossu 4 years ago
parent
commit
8555a45d3d
8 changed files with 108 additions and 19 deletions
  1. 11 7
      README.md
  2. 9 3
      TODO.md
  3. 2 5
      cpython/py_graph.h
  4. 8 2
      cpython/py_term.h
  5. 2 0
      setup.py
  6. 0 2
      src/namespace.c
  7. 2 0
      test/assets/test.nt
  8. 74 0
      test/cpython_test.py

+ 11 - 7
README.md

@@ -6,12 +6,14 @@ Embedded RDF (and maybe later, generic graph) store and manipulation library.
 
 
 ## Purpose
 ## Purpose
 
 
-The goal of this library is to provide extremely efficient and compact
-handling of RDF data. At least a C API and Python bindings are planned.
+The goal of this library is to provide efficient and compact handling of RDF
+data. At least a complete C API and Python bindings are planned.
 
 
 This library can be thought of as SQLite or BerkeleyDB for graphs. It can be
 This library can be thought of as SQLite or BerkeleyDB for graphs. It can be
 embedded directly in a program and store persistent data without the need of
 embedded directly in a program and store persistent data without the need of
-running a server.
+running a server. In addition, `lsup_rdf` can perform in-memory graph
+operations such as validation, de/serialization, boolean operations, lookup,
+etc.
 
 
 Two graph back ends are available: a memory one based on hash maps and a
 Two graph back ends are available: a memory one based on hash maps and a
 disk-based one based on [LMDB](https://symas.com/lmdb/), an extremely fast and
 disk-based one based on [LMDB](https://symas.com/lmdb/), an extremely fast and
@@ -29,9 +31,9 @@ remain focused on serving Lakesuperior.
 
 
 ## Development Status
 ## Development Status
 
 
-**Pre-alpha.** The API is not yet defined and may change radically. The code
-may not compile, or throw a fit when run. At the moment this project is only
-intended for curious developers and researchers.
+**Alpha.** The API structure is not yet stable and may change radically. The
+code may not compile, or throw a fit when run. Testing is minimal. At the
+moment this project is only intended for curious developers and researchers.
 
 
 This is also my first stab at writing a C library (coming from Python) and an
 This is also my first stab at writing a C library (coming from Python) and an
 unpaid fun project, so don't be surprised if you find some gross stuff.
 unpaid fun project, so don't be surprised if you find some gross stuff.
@@ -48,9 +50,10 @@ of features as a standalone library:
 - Memory- and disk-backed (persistent) graph storage
 - Memory- and disk-backed (persistent) graph storage
 - Contexts (disk-backed only)
 - Contexts (disk-backed only)
 - Handling of blank nodes
 - Handling of blank nodes
+- Namespace prefixes
 - Validation of literal and URI terms
 - Validation of literal and URI terms
 - Validation of RDF triples
 - Validation of RDF triples
-- Fast graph Lookup using matching patterns
+- Fast graph lookup using matching patterns
 - Graph boolean operations
 - Graph boolean operations
 - Serialization and de-serialization to/from N-Triples and N-Quads
 - Serialization and de-serialization to/from N-Triples and N-Quads
 - Serialization and de-serialization to/from Turtle and TriG
 - Serialization and de-serialization to/from Turtle and TriG
@@ -62,6 +65,7 @@ of features as a standalone library:
 
 
 - Binary serialization and hashing of graphs
 - Binary serialization and hashing of graphs
 - Binary protocol for synchronizing remote replicas
 - Binary protocol for synchronizing remote replicas
+- Backend for massive distributed storage (possibly Ceph)
 - Lua bindings
 - Lua bindings
 
 
 ### Likely Out of Scope
 ### Likely Out of Scope

+ 9 - 3
TODO.md

@@ -17,15 +17,21 @@
     - *D* term, triple, graph modules
     - *D* term, triple, graph modules
     - *D* Codec integration
     - *D* Codec integration
     - *D* Graph remove and lookup ops
     - *D* Graph remove and lookup ops
-    - *W* Namespace module
-    - *P* Query and slicing methods
-    - *P* Tests
+    - *D* Namespace module
+    - *D* Lookup methods
+    - *D* Tests (basic)
+    - *P* Subclass term types
 - *P* Turtle serialization / deserialization
 - *P* Turtle serialization / deserialization
+- *P* Extended tests
+    - *P* C API
+    - *P* Python API
 
 
 
 
 ## Non-critical for MVP
 ## Non-critical for MVP
 
 
 - Term and triple validation
 - Term and triple validation
+- Enhanced graph operations
+    - Extract unique terms and 2-term tuples
 - NQ codec
 - NQ codec
 - TriG codec
 - TriG codec
 
 

+ 2 - 5
cpython/py_graph.h

@@ -410,11 +410,8 @@ Graph_add (PyObject *self, PyObject *triples)
 
 
         log_trace ("Inserting triple #%lu", i);
         log_trace ("Inserting triple #%lu", i);
 
 
-        LSUP_BufferTriple *sspo = LSUP_btriple_from_triple (
-                ((TripleObject *) trp_obj)->ob_struct);
-        LSUP_rc db_rc = LSUP_graph_add_iter (it, sspo);
-
-        LSUP_btriple_free (sspo);
+        LSUP_rc db_rc = LSUP_graph_add_iter (
+                it, ((TripleObject *) trp_obj)->ob_struct);
 
 
         if (db_rc == LSUP_OK) rc = LSUP_OK;
         if (db_rc == LSUP_OK) rc = LSUP_OK;
         if (UNLIKELY (db_rc < 0)) {
         if (UNLIKELY (db_rc < 0)) {

+ 8 - 2
cpython/py_term.h

@@ -28,7 +28,10 @@ Term_init (TermObject *self, PyObject *args, PyObject *kwargs)
             &term_type, &data, &datatype, &lang))
             &term_type, &data, &datatype, &lang))
         return -1;
         return -1;
 
 
-    self->ob_struct = LSUP_term_new ((LSUP_TermType) term_type, data, datatype, lang);
+    char *metadata = datatype ? datatype : lang;
+
+    self->ob_struct = LSUP_term_new (
+            (LSUP_TermType) term_type, data, metadata);
     if (!self->ob_struct) {
     if (!self->ob_struct) {
         PyErr_SetString (PyExc_ValueError, "Could not create term.");
         PyErr_SetString (PyExc_ValueError, "Could not create term.");
         return -1;
         return -1;
@@ -70,7 +73,10 @@ Term_get_datatype (TermObject *self, void *closure)
 {
 {
     if (!self->ob_struct->datatype) Py_RETURN_NONE;
     if (!self->ob_struct->datatype) Py_RETURN_NONE;
 
 
-    PyObject *datatype = PyUnicode_FromString (self->ob_struct->datatype);
+    const LSUP_Term *dtype = LSUP_tcache_get (self->ob_struct->datatype);
+    if (!dtype) Py_RETURN_NONE;
+
+    PyObject *datatype = PyUnicode_FromString (dtype->data);
 
 
     Py_INCREF (datatype);
     Py_INCREF (datatype);
     return datatype;
     return datatype;

+ 2 - 0
setup.py

@@ -17,6 +17,7 @@ sources = (
         path.join(EXT_DIR, 'openldap', 'libraries', 'liblmdb', 'mdb.c'),
         path.join(EXT_DIR, 'openldap', 'libraries', 'liblmdb', 'mdb.c'),
         path.join(EXT_DIR, 'openldap', 'libraries', 'liblmdb', 'midl.c'),
         path.join(EXT_DIR, 'openldap', 'libraries', 'liblmdb', 'midl.c'),
         path.join(EXT_DIR, 'xxHash', 'xxhash.c'),
         path.join(EXT_DIR, 'xxHash', 'xxhash.c'),
+        path.join(EXT_DIR, 'tpl', 'src', 'tpl.c'),
         path.join(EXT_DIR, 'log', 'src', 'log.c'),
         path.join(EXT_DIR, 'log', 'src', 'log.c'),
     ]
     ]
 )
 )
@@ -49,6 +50,7 @@ setup(
                 ROOT_DIR,
                 ROOT_DIR,
                 INCL_DIR,
                 INCL_DIR,
                 path.join(EXT_DIR, 'uthash', 'src'),
                 path.join(EXT_DIR, 'uthash', 'src'),
+                path.join(EXT_DIR, 'tpl', 'src'),
                 path.join(EXT_DIR, 'log', 'src'),
                 path.join(EXT_DIR, 'log', 'src'),
             ],
             ],
             libraries=['uuid'],
             libraries=['uuid'],

+ 0 - 2
src/namespace.c

@@ -71,8 +71,6 @@ LSUP_nsmap_add (NSMap *map, const ns_pfx pfx, const char *nsstr)
 
 
     // Add.
     // Add.
     MALLOC_GUARD (entry, LSUP_MEM_ERR);
     MALLOC_GUARD (entry, LSUP_MEM_ERR);
-    //entry = malloc (sizeof (*entry));
-    //if (UNLIKELY (!entry)) return LSUP_MEM_ERR;
 
 
     entry->ns = strdup (nsstr);
     entry->ns = strdup (nsstr);
     strcpy (entry->pfx, pfx);
     strcpy (entry->pfx, pfx);

+ 2 - 0
test/assets/test.nt

@@ -0,0 +1,2 @@
+<urn:s:1> <urn:p:1> <urn:o:1> .
+<urn:s:2> <urn:p:2> <urn:o:2> .

+ 74 - 0
test/cpython_test.py

@@ -0,0 +1,74 @@
+import unittest
+
+from os import path
+
+from lsup_rdf import env_init, term, triple, graph
+
+TEST_DIR = path.realpath(path.dirname(__file__))
+
+
+class TestTerm(unittest.TestCase):
+    def setUp(self):
+        self.s1 = term.Term(term.TERM_IRIREF, "urn:s:1")
+        self.p1 = term.Term(term.TERM_IRIREF, "urn:p:1")
+        self.o1 = term.Term(term.TERM_IRIREF, "urn:o:1")
+        self.s2 = term.Term(term.TERM_IRIREF, "urn:s:2")
+        self.p2 = term.Term(term.TERM_IRIREF, "urn:p:2")
+        self.o2 = term.Term(term.TERM_IRIREF, "urn:o:2")
+        self.s3 = term.Term(term.TERM_IRIREF, "urn:s:3")
+        self.p3 = term.Term(term.TERM_IRIREF, "urn:p:3")
+        self.o3 = term.Term(term.TERM_IRIREF, "urn:o:3")
+
+        self.trp = [
+            triple.Triple(self.s1, self.p1, self.o1),
+            triple.Triple(self.s2, self.p2, self.o2),
+        ]
+        self.t3 = triple.Triple(self.s3, self.p3, self.o3)
+        self.t4 = triple.Triple(self.s1, self.p1, self.o1)
+
+    def test_term(self):
+        s1 = term.Term(term.TERM_IRIREF, "urn:s:1")
+
+        self.assertTrue(isinstance(s1, term.Term))
+        self.assertEqual(s1.data, "urn:s:1")
+        self.assertEqual(s1.type, term.TERM_IRIREF)
+
+    def test_graph(self):
+        gr = graph.Graph(graph.STORE_MEM)
+        gr.uri = term.Term(term.TERM_IRIREF, 'urn:c:1')
+
+        self.assertEqual(gr.uri, 'urn:c:1')
+
+    def test_graph_ops(self):
+        gr = graph.Graph(graph.STORE_MEM)
+
+        print('Adding triples.')
+        gr.add(self.trp)
+
+        self.assertEqual(len(gr), 2)
+        self.assertTrue(self.trp[0] in gr)
+        self.assertTrue(self.trp[1] in gr)
+        self.assertFalse(self.t3 in gr)
+        self.assertTrue(self.t4 in gr)
+
+        gr.remove(self.s1, None, None)
+
+        self.assertFalse(self.trp[0] in gr)
+        self.assertTrue(self.trp[1] in gr)
+
+        print('Encoded NT:')
+        for line in gr.to_rdf('nt'):
+            print(line)
+
+    def test_deserialize(self):
+        print('From file.')
+        with open(path.join(TEST_DIR, 'assets', 'test.nt'), 'rb') as fh:
+            gr2 = graph.Graph.from_rdf(fh, 'nt')
+
+        self.assertTrue(self.trp[0] in gr2)
+        self.assertTrue(self.trp[1] in gr2)
+
+
+if __name__ == '__main__':
+    env_init()
+    unittest.main()