Browse Source

Basic Python tests.

Stefano Cossu 3 years ago
parent
commit
8555a45d3d
8 changed files with 108 additions and 19 deletions
  1. 11 7
      README.md
  2. 9 3
      TODO.md
  3. 2 5
      cpython/py_graph.h
  4. 8 2
      cpython/py_term.h
  5. 2 0
      setup.py
  6. 0 2
      src/namespace.c
  7. 2 0
      test/assets/test.nt
  8. 74 0
      test/cpython_test.py

+ 11 - 7
README.md

@@ -6,12 +6,14 @@ Embedded RDF (and maybe later, generic graph) store and manipulation library.
 
 ## Purpose
 
-The goal of this library is to provide extremely efficient and compact
-handling of RDF data. At least a C API and Python bindings are planned.
+The goal of this library is to provide efficient and compact handling of RDF
+data. At least a complete C API and Python bindings are planned.
 
 This library can be thought of as SQLite or BerkeleyDB for graphs. It can be
 embedded directly in a program and store persistent data without the need of
-running a server.
+running a server. In addition, `lsup_rdf` can perform in-memory graph
+operations such as validation, de/serialization, boolean operations, lookup,
+etc.
 
 Two graph back ends are available: a memory one based on hash maps and a
 disk-based one based on [LMDB](https://symas.com/lmdb/), an extremely fast and
@@ -29,9 +31,9 @@ remain focused on serving Lakesuperior.
 
 ## Development Status
 
-**Pre-alpha.** The API is not yet defined and may change radically. The code
-may not compile, or throw a fit when run. At the moment this project is only
-intended for curious developers and researchers.
+**Alpha.** The API structure is not yet stable and may change radically. The
+code may not compile, or throw a fit when run. Testing is minimal. At the
+moment this project is only intended for curious developers and researchers.
 
 This is also my first stab at writing a C library (coming from Python) and an
 unpaid fun project, so don't be surprised if you find some gross stuff.
@@ -48,9 +50,10 @@ of features as a standalone library:
 - Memory- and disk-backed (persistent) graph storage
 - Contexts (disk-backed only)
 - Handling of blank nodes
+- Namespace prefixes
 - Validation of literal and URI terms
 - Validation of RDF triples
-- Fast graph Lookup using matching patterns
+- Fast graph lookup using matching patterns
 - Graph boolean operations
 - Serialization and de-serialization to/from N-Triples and N-Quads
 - Serialization and de-serialization to/from Turtle and TriG
@@ -62,6 +65,7 @@ of features as a standalone library:
 
 - Binary serialization and hashing of graphs
 - Binary protocol for synchronizing remote replicas
+- Backend for massive distributed storage (possibly Ceph)
 - Lua bindings
 
 ### Likely Out of Scope

+ 9 - 3
TODO.md

@@ -17,15 +17,21 @@
     - *D* term, triple, graph modules
     - *D* Codec integration
     - *D* Graph remove and lookup ops
-    - *W* Namespace module
-    - *P* Query and slicing methods
-    - *P* Tests
+    - *D* Namespace module
+    - *D* Lookup methods
+    - *D* Tests (basic)
+    - *P* Subclass term types
 - *P* Turtle serialization / deserialization
+- *P* Extended tests
+    - *P* C API
+    - *P* Python API
 
 
 ## Non-critical for MVP
 
 - Term and triple validation
+- Enhanced graph operations
+    - Extract unique terms and 2-term tuples
 - NQ codec
 - TriG codec
 

+ 2 - 5
cpython/py_graph.h

@@ -410,11 +410,8 @@ Graph_add (PyObject *self, PyObject *triples)
 
         log_trace ("Inserting triple #%lu", i);
 
-        LSUP_BufferTriple *sspo = LSUP_btriple_from_triple (
-                ((TripleObject *) trp_obj)->ob_struct);
-        LSUP_rc db_rc = LSUP_graph_add_iter (it, sspo);
-
-        LSUP_btriple_free (sspo);
+        LSUP_rc db_rc = LSUP_graph_add_iter (
+                it, ((TripleObject *) trp_obj)->ob_struct);
 
         if (db_rc == LSUP_OK) rc = LSUP_OK;
         if (UNLIKELY (db_rc < 0)) {

+ 8 - 2
cpython/py_term.h

@@ -28,7 +28,10 @@ Term_init (TermObject *self, PyObject *args, PyObject *kwargs)
             &term_type, &data, &datatype, &lang))
         return -1;
 
-    self->ob_struct = LSUP_term_new ((LSUP_TermType) term_type, data, datatype, lang);
+    char *metadata = datatype ? datatype : lang;
+
+    self->ob_struct = LSUP_term_new (
+            (LSUP_TermType) term_type, data, metadata);
     if (!self->ob_struct) {
         PyErr_SetString (PyExc_ValueError, "Could not create term.");
         return -1;
@@ -70,7 +73,10 @@ Term_get_datatype (TermObject *self, void *closure)
 {
     if (!self->ob_struct->datatype) Py_RETURN_NONE;
 
-    PyObject *datatype = PyUnicode_FromString (self->ob_struct->datatype);
+    const LSUP_Term *dtype = LSUP_tcache_get (self->ob_struct->datatype);
+    if (!dtype) Py_RETURN_NONE;
+
+    PyObject *datatype = PyUnicode_FromString (dtype->data);
 
     Py_INCREF (datatype);
     return datatype;

+ 2 - 0
setup.py

@@ -17,6 +17,7 @@ sources = (
         path.join(EXT_DIR, 'openldap', 'libraries', 'liblmdb', 'mdb.c'),
         path.join(EXT_DIR, 'openldap', 'libraries', 'liblmdb', 'midl.c'),
         path.join(EXT_DIR, 'xxHash', 'xxhash.c'),
+        path.join(EXT_DIR, 'tpl', 'src', 'tpl.c'),
         path.join(EXT_DIR, 'log', 'src', 'log.c'),
     ]
 )
@@ -49,6 +50,7 @@ setup(
                 ROOT_DIR,
                 INCL_DIR,
                 path.join(EXT_DIR, 'uthash', 'src'),
+                path.join(EXT_DIR, 'tpl', 'src'),
                 path.join(EXT_DIR, 'log', 'src'),
             ],
             libraries=['uuid'],

+ 0 - 2
src/namespace.c

@@ -71,8 +71,6 @@ LSUP_nsmap_add (NSMap *map, const ns_pfx pfx, const char *nsstr)
 
     // Add.
     MALLOC_GUARD (entry, LSUP_MEM_ERR);
-    //entry = malloc (sizeof (*entry));
-    //if (UNLIKELY (!entry)) return LSUP_MEM_ERR;
 
     entry->ns = strdup (nsstr);
     strcpy (entry->pfx, pfx);

+ 2 - 0
test/assets/test.nt

@@ -0,0 +1,2 @@
+<urn:s:1> <urn:p:1> <urn:o:1> .
+<urn:s:2> <urn:p:2> <urn:o:2> .

+ 74 - 0
test/cpython_test.py

@@ -0,0 +1,74 @@
+import unittest
+
+from os import path
+
+from lsup_rdf import env_init, term, triple, graph
+
+TEST_DIR = path.realpath(path.dirname(__file__))
+
+
+class TestTerm(unittest.TestCase):
+    def setUp(self):
+        self.s1 = term.Term(term.TERM_IRIREF, "urn:s:1")
+        self.p1 = term.Term(term.TERM_IRIREF, "urn:p:1")
+        self.o1 = term.Term(term.TERM_IRIREF, "urn:o:1")
+        self.s2 = term.Term(term.TERM_IRIREF, "urn:s:2")
+        self.p2 = term.Term(term.TERM_IRIREF, "urn:p:2")
+        self.o2 = term.Term(term.TERM_IRIREF, "urn:o:2")
+        self.s3 = term.Term(term.TERM_IRIREF, "urn:s:3")
+        self.p3 = term.Term(term.TERM_IRIREF, "urn:p:3")
+        self.o3 = term.Term(term.TERM_IRIREF, "urn:o:3")
+
+        self.trp = [
+            triple.Triple(self.s1, self.p1, self.o1),
+            triple.Triple(self.s2, self.p2, self.o2),
+        ]
+        self.t3 = triple.Triple(self.s3, self.p3, self.o3)
+        self.t4 = triple.Triple(self.s1, self.p1, self.o1)
+
+    def test_term(self):
+        s1 = term.Term(term.TERM_IRIREF, "urn:s:1")
+
+        self.assertTrue(isinstance(s1, term.Term))
+        self.assertEqual(s1.data, "urn:s:1")
+        self.assertEqual(s1.type, term.TERM_IRIREF)
+
+    def test_graph(self):
+        gr = graph.Graph(graph.STORE_MEM)
+        gr.uri = term.Term(term.TERM_IRIREF, 'urn:c:1')
+
+        self.assertEqual(gr.uri, 'urn:c:1')
+
+    def test_graph_ops(self):
+        gr = graph.Graph(graph.STORE_MEM)
+
+        print('Adding triples.')
+        gr.add(self.trp)
+
+        self.assertEqual(len(gr), 2)
+        self.assertTrue(self.trp[0] in gr)
+        self.assertTrue(self.trp[1] in gr)
+        self.assertFalse(self.t3 in gr)
+        self.assertTrue(self.t4 in gr)
+
+        gr.remove(self.s1, None, None)
+
+        self.assertFalse(self.trp[0] in gr)
+        self.assertTrue(self.trp[1] in gr)
+
+        print('Encoded NT:')
+        for line in gr.to_rdf('nt'):
+            print(line)
+
+    def test_deserialize(self):
+        print('From file.')
+        with open(path.join(TEST_DIR, 'assets', 'test.nt'), 'rb') as fh:
+            gr2 = graph.Graph.from_rdf(fh, 'nt')
+
+        self.assertTrue(self.trp[0] in gr2)
+        self.assertTrue(self.trp[1] in gr2)
+
+
+if __name__ == '__main__':
+    env_init()
+    unittest.main()