4 vuotta sitten · 8555a45d3d
--- a/README.md
+++ b/README.md
@@ -6,12 +6,14 @@ Embedded RDF (and maybe later, generic graph) store and manipulation library.
 
															 ## Purpose
														
 
															-The goal of this library is to provide extremely efficient and compact
														
 
															-handling of RDF data. At least a C API and Python bindings are planned.
														
 
															+The goal of this library is to provide efficient and compact handling of RDF
														
 
															+data. At least a complete C API and Python bindings are planned.
														
 
															 This library can be thought of as SQLite or BerkeleyDB for graphs. It can be
														
 
															 embedded directly in a program and store persistent data without the need of
														
 
															-running a server.
														
 
															+running a server. In addition, `lsup_rdf` can perform in-memory graph
														
 
															+operations such as validation, de/serialization, boolean operations, lookup,
														
 
															+etc.
														
 
															 Two graph back ends are available: a memory one based on hash maps and a
														
 
															 disk-based one based on [LMDB](https://symas.com/lmdb/), an extremely fast and
														
@@ -29,9 +31,9 @@ remain focused on serving Lakesuperior.
 
															 ## Development Status
														
 
															-**Pre-alpha.** The API is not yet defined and may change radically. The code
														
 
															-may not compile, or throw a fit when run. At the moment this project is only
														
 
															-intended for curious developers and researchers.
														
 
															+**Alpha.** The API structure is not yet stable and may change radically. The
														
 
															+code may not compile, or throw a fit when run. Testing is minimal. At the
														
 
															+moment this project is only intended for curious developers and researchers.
														
 
															 This is also my first stab at writing a C library (coming from Python) and an
														
 
															 unpaid fun project, so don't be surprised if you find some gross stuff.
														
@@ -48,9 +50,10 @@ of features as a standalone library:
 
															 - Memory- and disk-backed (persistent) graph storage
														
 
															 - Contexts (disk-backed only)
														
 
															 - Handling of blank nodes
														
 
															+- Namespace prefixes
														
 
															 - Validation of literal and URI terms
														
 
															 - Validation of RDF triples
														
 
															-- Fast graph Lookup using matching patterns
														
 
															+- Fast graph lookup using matching patterns
														
 
															 - Graph boolean operations
														
 
															 - Serialization and de-serialization to/from N-Triples and N-Quads
														
 
															 - Serialization and de-serialization to/from Turtle and TriG
														
@@ -62,6 +65,7 @@ of features as a standalone library:
 
															 - Binary serialization and hashing of graphs
														
 
															 - Binary protocol for synchronizing remote replicas
														
 
															+- Backend for massive distributed storage (possibly Ceph)
														
 
															 - Lua bindings
														
 
															 ### Likely Out of Scope
														
--- a/TODO.md
+++ b/TODO.md
@@ -17,15 +17,21 @@
 
															     - *D* term, triple, graph modules
														
 
															     - *D* Codec integration
														
 
															     - *D* Graph remove and lookup ops
														
 
															-    - *W* Namespace module
														
 
															-    - *P* Query and slicing methods
														
 
															-    - *P* Tests
														
 
															+    - *D* Namespace module
														
 
															+    - *D* Lookup methods
														
 
															+    - *D* Tests (basic)
														
 
															+    - *P* Subclass term types
														
 
															 - *P* Turtle serialization / deserialization
														
 
															+- *P* Extended tests
														
 
															+    - *P* C API
														
 
															+    - *P* Python API
														
 
															 ## Non-critical for MVP
														
 
															 - Term and triple validation
														
 
															+- Enhanced graph operations
														
 
															+    - Extract unique terms and 2-term tuples
														
 
															 - NQ codec
														
 
															 - TriG codec
														
--- a/cpython/py_graph.h
+++ b/cpython/py_graph.h
@@ -410,11 +410,8 @@ Graph_add (PyObject *self, PyObject *triples)
 
															         log_trace ("Inserting triple #%lu", i);
														
 
															-        LSUP_BufferTriple *sspo = LSUP_btriple_from_triple (
														
 
															-                ((TripleObject *) trp_obj)->ob_struct);
														
 
															-        LSUP_rc db_rc = LSUP_graph_add_iter (it, sspo);
														
 
															-
														
 
															-        LSUP_btriple_free (sspo);
														
 
															+        LSUP_rc db_rc = LSUP_graph_add_iter (
														
 
															+                it, ((TripleObject *) trp_obj)->ob_struct);
														
 
															         if (db_rc == LSUP_OK) rc = LSUP_OK;
														
 
															         if (UNLIKELY (db_rc < 0)) {
														
--- a/cpython/py_term.h
+++ b/cpython/py_term.h
@@ -28,7 +28,10 @@ Term_init (TermObject *self, PyObject *args, PyObject *kwargs)
 
															             &term_type, &data, &datatype, &lang))
														
 
															         return -1;
														
 
															-    self->ob_struct = LSUP_term_new ((LSUP_TermType) term_type, data, datatype, lang);
														
 
															+    char *metadata = datatype ? datatype : lang;
														
 
															+
														
 
															+    self->ob_struct = LSUP_term_new (
														
 
															+            (LSUP_TermType) term_type, data, metadata);
														
 
															     if (!self->ob_struct) {
														
 
															         PyErr_SetString (PyExc_ValueError, "Could not create term.");
														
 
															         return -1;
														
@@ -70,7 +73,10 @@ Term_get_datatype (TermObject *self, void *closure)
 
															 {
														
 
															     if (!self->ob_struct->datatype) Py_RETURN_NONE;
														
 
															-    PyObject *datatype = PyUnicode_FromString (self->ob_struct->datatype);
														
 
															+    const LSUP_Term *dtype = LSUP_tcache_get (self->ob_struct->datatype);
														
 
															+    if (!dtype) Py_RETURN_NONE;
														
 
															+
														
 
															+    PyObject *datatype = PyUnicode_FromString (dtype->data);
														
 
															     Py_INCREF (datatype);
														
 
															     return datatype;
														
--- a/setup.py
+++ b/setup.py
@@ -17,6 +17,7 @@ sources = (
 
															         path.join(EXT_DIR, 'openldap', 'libraries', 'liblmdb', 'mdb.c'),
														
 
															         path.join(EXT_DIR, 'openldap', 'libraries', 'liblmdb', 'midl.c'),
														
 
															         path.join(EXT_DIR, 'xxHash', 'xxhash.c'),
														
 
															+        path.join(EXT_DIR, 'tpl', 'src', 'tpl.c'),
														
 
															         path.join(EXT_DIR, 'log', 'src', 'log.c'),
														
 
															     ]
														
 
															 )
														
@@ -49,6 +50,7 @@ setup(
 
															                 ROOT_DIR,
														
 
															                 INCL_DIR,
														
 
															                 path.join(EXT_DIR, 'uthash', 'src'),
														
 
															+                path.join(EXT_DIR, 'tpl', 'src'),
														
 
															                 path.join(EXT_DIR, 'log', 'src'),
														
 
															             ],
														
 
															             libraries=['uuid'],
														
--- a/src/namespace.c
+++ b/src/namespace.c
@@ -71,8 +71,6 @@ LSUP_nsmap_add (NSMap *map, const ns_pfx pfx, const char *nsstr)
 
															     // Add.
														
 
															     MALLOC_GUARD (entry, LSUP_MEM_ERR);
														
 
															-    //entry = malloc (sizeof (*entry));
														
 
															-    //if (UNLIKELY (!entry)) return LSUP_MEM_ERR;
														
 
															     entry->ns = strdup (nsstr);
														
 
															     strcpy (entry->pfx, pfx);
														
--- a/test/assets/test.nt
+++ b/test/assets/test.nt
@@ -0,0 +1,2 @@
 
															+<urn:s:1> <urn:p:1> <urn:o:1> .
														
 
															+<urn:s:2> <urn:p:2> <urn:o:2> .
														
--- a/test/cpython_test.py
+++ b/test/cpython_test.py
@@ -0,0 +1,74 @@
 
															+import unittest
														
 
															+
														
 
															+from os import path
														
 
															+
														
 
															+from lsup_rdf import env_init, term, triple, graph
														
 
															+
														
 
															+TEST_DIR = path.realpath(path.dirname(__file__))
														
 
															+
														
 
															+
														
 
															+class TestTerm(unittest.TestCase):
														
 
															+    def setUp(self):
														
 
															+        self.s1 = term.Term(term.TERM_IRIREF, "urn:s:1")
														
 
															+        self.p1 = term.Term(term.TERM_IRIREF, "urn:p:1")
														
 
															+        self.o1 = term.Term(term.TERM_IRIREF, "urn:o:1")
														
 
															+        self.s2 = term.Term(term.TERM_IRIREF, "urn:s:2")
														
 
															+        self.p2 = term.Term(term.TERM_IRIREF, "urn:p:2")
														
 
															+        self.o2 = term.Term(term.TERM_IRIREF, "urn:o:2")
														
 
															+        self.s3 = term.Term(term.TERM_IRIREF, "urn:s:3")
														
 
															+        self.p3 = term.Term(term.TERM_IRIREF, "urn:p:3")
														
 
															+        self.o3 = term.Term(term.TERM_IRIREF, "urn:o:3")
														
 
															+
														
 
															+        self.trp = [
														
 
															+            triple.Triple(self.s1, self.p1, self.o1),
														
 
															+            triple.Triple(self.s2, self.p2, self.o2),
														
 
															+        ]
														
 
															+        self.t3 = triple.Triple(self.s3, self.p3, self.o3)
														
 
															+        self.t4 = triple.Triple(self.s1, self.p1, self.o1)
														
 
															+
														
 
															+    def test_term(self):
														
 
															+        s1 = term.Term(term.TERM_IRIREF, "urn:s:1")
														
 
															+
														
 
															+        self.assertTrue(isinstance(s1, term.Term))
														
 
															+        self.assertEqual(s1.data, "urn:s:1")
														
 
															+        self.assertEqual(s1.type, term.TERM_IRIREF)
														
 
															+
														
 
															+    def test_graph(self):
														
 
															+        gr = graph.Graph(graph.STORE_MEM)
														
 
															+        gr.uri = term.Term(term.TERM_IRIREF, 'urn:c:1')
														
 
															+
														
 
															+        self.assertEqual(gr.uri, 'urn:c:1')
														
 
															+
														
 
															+    def test_graph_ops(self):
														
 
															+        gr = graph.Graph(graph.STORE_MEM)
														
 
															+
														
 
															+        print('Adding triples.')
														
 
															+        gr.add(self.trp)
														
 
															+
														
 
															+        self.assertEqual(len(gr), 2)
														
 
															+        self.assertTrue(self.trp[0] in gr)
														
 
															+        self.assertTrue(self.trp[1] in gr)
														
 
															+        self.assertFalse(self.t3 in gr)
														
 
															+        self.assertTrue(self.t4 in gr)
														
 
															+
														
 
															+        gr.remove(self.s1, None, None)
														
 
															+
														
 
															+        self.assertFalse(self.trp[0] in gr)
														
 
															+        self.assertTrue(self.trp[1] in gr)
														
 
															+
														
 
															+        print('Encoded NT:')
														
 
															+        for line in gr.to_rdf('nt'):
														
 
															+            print(line)
														
 
															+
														
 
															+    def test_deserialize(self):
														
 
															+        print('From file.')
														
 
															+        with open(path.join(TEST_DIR, 'assets', 'test.nt'), 'rb') as fh:
														
 
															+            gr2 = graph.Graph.from_rdf(fh, 'nt')
														
 
															+
														
 
															+        self.assertTrue(self.trp[0] in gr2)
														
 
															+        self.assertTrue(self.trp[1] in gr2)
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    env_init()
														
 
															+    unittest.main()
	`@@ -0,0 +1,2 @@`
			`+<urn:s:1> <urn:p:1> <urn:o:1> .`
			`+<urn:s:2> <urn:p:2> <urn:o:2> .`