Ver código fonte

WIP move tests.

Stefano Cossu 5 anos atrás
pai
commit
f8d33f9c93

+ 9 - 6
lakesuperior/model/graph/graph.pxd

@@ -2,10 +2,7 @@ from libc.stdint cimport uint32_t, uint64_t
 
 from cymem.cymem cimport Pool
 
-from lakesuperior.cy_include.collections cimport (
-    HashSet, HashSetConf,
-    #_hash_ft, _key_compare_ft, _mem_alloc_ft, _mem_calloc_ft, _mem_free_ft,
-)
+from lakesuperior.cy_include cimport collections as cc
 from lakesuperior.model.base cimport Buffer
 from lakesuperior.model.graph.triple cimport BufferTriple
 from lakesuperior.model.structures.keyset cimport Keyset
@@ -22,23 +19,29 @@ ctypedef Buffer *BufferPtr
 cdef:
     int term_cmp_fn(const void* key1, const void* key2)
     int trp_cmp_fn(const void* key1, const void* key2)
+    bint graph_eq_fn(SimpleGraph g1, SimpleGraph g2)
     size_t trp_hash_fn(const void* key, int l, uint32_t seed)
     size_t hash_ptr_passthrough(const void* key, int l, uint32_t seed)
 
 cdef class SimpleGraph:
     cdef:
-        HashSet *_terms # Set of unique serialized terms.
-        HashSet *_triples # Set of unique triples.
+        cc.HashSet *_terms # Set of unique serialized terms.
+        cc.HashSet *_triples # Set of unique triples.
         readonly LmdbTriplestore store
         # Temp data pool. It gets managed with the object lifecycle via cymem.
         Pool _pool
 
+        cc.key_compare_ft term_cmp_fn
+        cc.key_compare_ft trp_cmp_fn
+
         void _data_from_lookup(self, tuple trp_ptn, ctx=*) except *
         void _data_from_keyset(self, Keyset data) except *
         inline void _add_from_spok(self, const TripleKey spok) except *
         inline void _add_triple(
             self, Buffer *ss, Buffer *sp, Buffer *so
         ) except *
+        int _remove_triple(self, BufferTriple* trp_buf) except -1
+        bint _trp_contains(self, BufferTriple* btrp)
         set _to_pyset(self)
 
     cpdef void set(self, tuple trp) except *

+ 76 - 45
lakesuperior/model/graph/graph.pyx

@@ -90,6 +90,25 @@ cdef int trp_cmp_fn(const void* key1, const void* key2):
     return is_not_equal
 
 
+cdef bint graph_eq_fn(SimpleGraph g1, SimpleGraph g2):
+    """
+    Compare 2 graphs for equality.
+
+    Note that this returns the opposite value than the triple and term
+    compare functions: 1 (True) if equal, 0 (False) if not.
+    """
+    cdef:
+        void* el
+        cc.HashSetIter it
+
+    cc.hashset_iter_init(&it, g1._triples)
+    while cc.hashset_iter_next(&it, &el) != cc.CC_ITER_END:
+        if cc.hashset_contains(g2._triples, el):
+            return False
+
+    return True
+
+
 cdef size_t term_hash_fn(const void* key, int l, uint32_t seed):
     """
     Hash function for serialized terms (:py:class:`Buffer` objects)
@@ -234,18 +253,21 @@ cdef class SimpleGraph:
         cdef:
             cc.HashSetConf terms_conf, trp_conf
 
+        self.term_cmp_fn = &term_cmp_fn
+        self.trp_cmp_fn = &trp_cmp_fn
+
         cc.hashset_conf_init(&terms_conf)
         terms_conf.load_factor = 0.85
         terms_conf.hash = &term_hash_fn
         terms_conf.hash_seed = term_hash_seed32
-        terms_conf.key_compare = &term_cmp_fn
+        terms_conf.key_compare = self.term_cmp_fn
         terms_conf.key_length = sizeof(Buffer*)
 
         cc.hashset_conf_init(&trp_conf)
         trp_conf.load_factor = 0.75
         trp_conf.hash = &trp_hash_fn
         trp_conf.hash_seed = term_hash_seed32
-        trp_conf.key_compare = &trp_cmp_fn
+        trp_conf.key_compare = self.trp_cmp_fn
         trp_conf.key_length = sizeof(BufferTriple)
 
         cc.hashset_new_conf(&terms_conf, &self._terms)
@@ -413,40 +435,6 @@ cdef class SimpleGraph:
         cdef size_t terms_sz = cc.hashset_size(self._terms)
         logger.info(f'Terms set size: {terms_sz}')
 
-        #cdef cc.HashSetIter ti
-        #cdef Buffer *t
-        #cc.hashset_iter_init(&ti, self._terms)
-        #while calg.set_iter_has_more(&ti):
-        #    t = <Buffer *>calg.set_iter_next(&ti)
-
-        # # # Test area
-        #cdef:
-        #    cc.HashSet* testset
-        #    cc.HashSetConf testconf
-        #    int i = 24
-        #    size_t sz
-
-        #cc.hashset_conf_init(&testconf)
-        #testconf.hash = &hash_ptr_passthrough # spookyhash_64?
-        #testconf.hash_seed = term_hash_seed32
-        #testconf.key_length = sizeof(int*)
-        #testconf.key_compare = &trp_cmp_fn
-        #testconf.key_length = sizeof(BufferTriple*)
-
-        #cc.hashset_new_conf(&testconf, &testset)
-
-        #sz = cc.hashset_size(testset)
-        #print(f'Test set size (start): {sz}')
-
-        #cc.hashset_add(testset, &i)
-        #sz = cc.hashset_size(testset)
-        #print(f'Test set size (1st insert): {sz}')
-
-        #cc.hashset_add(testset, &i)
-        #sz = cc.hashset_size(testset)
-        #print(f'Test set size (2nd insert): {sz}')
-        # # # END test area
-
         trp.s = ss
         trp.p = sp
         trp.o = so
@@ -472,11 +460,37 @@ cdef class SimpleGraph:
                     <size_t>test_trp.s, <size_t>test_trp.p, <size_t>test_trp.o))
 
 
+    cdef int _remove_triple(self, BufferTriple* trp_buf) except -1:
+        """
+        Remove one triple from the graph.
+        """
+        return cc.hashset_remove(self._triples, trp_buf, NULL)
 
-        #cdef BufferTriple *tt
-        #calg.set_iterate(self._triples, &ti)
-        #while calg.set_iter_has_more(&ti):
-        #    tt = <BufferTriple *>calg.set_iter_next(&ti)
+
+    cdef bint _trp_contains(self, BufferTriple* btrp):
+        cdef:
+            cc.HashSetIter it
+            void* cur
+            void* ss = <void*>btrp.s
+            void* sp = <void*>btrp.p
+            void* so = <void*>btrp.o
+
+        if (
+            cc.hashset_add_or_get(self._terms, &ss) != cc.CC_DUP_KEY or
+            cc.hashset_add_or_get(self._terms, &sp) != cc.CC_DUP_KEY or
+            cc.hashset_add_or_get(self._terms, &so) != cc.CC_DUP_KEY
+        ):
+            return False
+
+        btrp.s = <Buffer*>ss
+        btrp.p = <Buffer*>sp
+        btrp.o = <Buffer*>so
+
+        cc.hashset_iter_init(&it, self._triples)
+        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
+            if self.trp_cmp_fn(&cur, &btrp) == 0:
+                return True
+        return False
 
 
     cdef set _to_pyset(self):
@@ -520,7 +534,7 @@ cdef class SimpleGraph:
         cdef size_t cur = 0
 
         trp_ct = len(trp)
-        trp_buf = <SPOBuffer>self._pool.alloc(3 * trp_ct, sizeof(Buffer))
+        trp_buf = <Buffer *>self._pool.alloc(3 * trp_ct, sizeof(Buffer))
 
         for s, p, o in trp:
             term.serialize_from_rdflib(s, trp_buf + cur, self._pool)
@@ -547,7 +561,19 @@ cdef class SimpleGraph:
         :param tuple item: A 3-tuple of RDFlib terms. Only exact terms, i.e.
             wildcards are not accepted.
         """
-        self.data.remove(trp)
+        cdef:
+            Buffer ss, sp, so
+            BufferTriple trp_buf
+
+        term.serialize_from_rdflib(trp[0], &ss, self._pool)
+        term.serialize_from_rdflib(trp[1], &sp, self._pool)
+        term.serialize_from_rdflib(trp[2], &so, self._pool)
+
+        trp_buf.s = &ss
+        trp_buf.p = &sp
+        trp_buf.o = &so
+
+        self._remove_triple(&trp_buf)
 
 
     def __len__(self):
@@ -558,7 +584,7 @@ cdef class SimpleGraph:
     @use_data
     def __eq__(self, other):
         """ Equality operator between ``SimpleGraph`` instances. """
-        return self.data == other
+        return graph_eq_fn(self, other)
 
 
     def __repr__(self):
@@ -625,14 +651,19 @@ cdef class SimpleGraph:
         :rtype: boolean
         """
         cdef:
+            Buffer ss, sp, so
             BufferTriple btrp
 
+        btrp.s = &ss
+        btrp.p = &sp
+        btrp.o = &so
+
         s, p, o = trp
         term.serialize_from_rdflib(s, btrp.s)
         term.serialize_from_rdflib(p, btrp.p)
         term.serialize_from_rdflib(o, btrp.o)
 
-        return bool(cc.hashset_contains(self._triples, &btrp))
+        return self._trp_contains(&btrp)
 
 
     def __iter__(self):
@@ -778,7 +809,7 @@ cdef class SimpleGraph:
 
         # Iterate over serialized triples.
         cc.hashset_iter_init(&ti, self._triples)
-        while cc.hashset_iter_next(&ti, &void_p) == cc.CC_OK:
+        while cc.hashset_iter_next(&ti, &void_p) != cc.CC_ITER_END:
             if void_p == NULL:
                 trp_p = <BufferTriple *>void_p
                 res.add((

+ 9 - 1
setup.py

@@ -26,8 +26,16 @@ try:
 except ImportError:
     USE_CYTHON = False
 else:
-    if Cython.__version__ == CYTHON_VERSION:
+    cy_installed_version = Cython.__version__
+    if cy_installed_version == CYTHON_VERSION:
         USE_CYTHON = True
+    else:
+        raise ImportError(
+            f'The installed Cython version ({cy_installed_version}) '
+            f'does not match the required version ({CYTHON_VERSION}). '
+            'Please insstall the exact required Cython version in order to '
+            'generate the C sources.'
+        )
 
 
 # ``pytest_runner`` is referenced in ``setup_requires``.

+ 66 - 0
tests/0_data_structures/test_graph.py

@@ -0,0 +1,66 @@
+import pytest
+
+from rdflib import Graph, Namespace, URIRef
+
+from lakesuperior.model.graph.graph import SimpleGraph, Imr
+
+@pytest.fixture(scope='class')
+def trp():
+    return (
+        (URIRef('urn:s:0'), URIRef('urn:p:0'), URIRef('urn:o:0')),
+        (URIRef('urn:s:0'), URIRef('urn:p:0'), URIRef('urn:o:0')),
+        (URIRef('urn:s:0'), URIRef('urn:p:1'), URIRef('urn:o:0')),
+        (URIRef('urn:s:0'), URIRef('urn:p:1'), URIRef('urn:o:1')),
+        (URIRef('urn:s:1'), URIRef('urn:p:1'), URIRef('urn:o:1')),
+        (URIRef('urn:s:1'), URIRef('urn:p:2'), URIRef('urn:o:2')),
+    )
+
+@pytest.mark.usefixtures('trp')
+class TestGraphOps:
+    """
+    Test various graph operations.
+    """
+    def test_len(self, trp):
+        """
+        Test the length of a graph with and without duplicates.
+        """
+        gr = SimpleGraph()
+        assert len(gr) == 0
+
+        gr.add((trp[0],))
+        assert len(gr) == 1
+
+        gr.add((trp[1],)) # Same values
+        assert len(gr) == 1
+
+        gr.add((trp[2],))
+        assert len(gr) == 2
+
+        gr.add(trp)
+        assert len(gr) == 5
+
+
+    def test_dup(self, trp):
+        """
+        Test operations with duplicate triples.
+        """
+        gr = SimpleGraph()
+        #import pdb; pdb.set_trace()
+
+        gr.add((trp[0],))
+        assert trp[1] in gr
+        assert trp[2] not in gr
+
+
+    def test_remove(self, trp):
+        """
+        Test adding and removing triples.
+        """
+        gr = SimpleGraph()
+
+        gr.add(trp)
+        gr.remove(trp[1])
+        assert len(gr) == 4
+        assert trp[0] not in gr
+        assert trp[1] not in gr
+

+ 0 - 0
tests/0_store/test_lmdb_store.py → tests/1_store/test_lmdb_store.py


+ 0 - 0
tests/1_api/test_admin_api.py → tests/2_api/test_admin_api.py


+ 0 - 0
tests/1_api/test_resource_api.py → tests/2_api/test_resource_api.py


+ 0 - 0
tests/2_endpoints/test_admin.py → tests/3_endpoints/test_admin.py


+ 0 - 0
tests/2_endpoints/test_ldp.py → tests/3_endpoints/test_ldp.py


+ 0 - 0
tests/2_endpoints/test_query.py → tests/3_endpoints/test_query.py


+ 0 - 0
tests/3_ancillary/test_toolbox.py → tests/4_ancillary/test_toolbox.py