ソースを参照

[WIP] Add Imr tests and start fixing IMR class.

Stefano Cossu 6 年 前
コミット
40678e0c49

+ 2 - 1
.gitmodules

@@ -11,4 +11,5 @@
     url = https://github.com/centaurean/spookyhash.git
 [submodule "ext/collections-c"]
     path = ext/collections-c
-    url = https://github.com/srdja/Collections-C
+    url = https://github.com/scossu/lsup-collections-c.git
+    branch = fix_iter_segfault

+ 1 - 1
ext/collections-c

@@ -1 +1 @@
-Subproject commit a188188f61b5ab3d5614446de3a3674a620e73b9
+Subproject commit 402d5fa7d29000a578dbaba425179d45115e7f10

+ 6 - 4
lakesuperior/model/graph/graph.pxd

@@ -48,11 +48,12 @@ cdef class SimpleGraph:
         void ip_subtraction(self, SimpleGraph other) except *
         void ip_intersection(self, SimpleGraph other) except *
         void ip_xor(self, SimpleGraph other) except *
+        SimpleGraph empty_copy(self)
 
-    cpdef SimpleGraph union(self, SimpleGraph other)
-    cpdef SimpleGraph subtraction(self, SimpleGraph other)
-    cpdef SimpleGraph intersection(self, SimpleGraph other)
-    cpdef SimpleGraph xor(self, SimpleGraph other)
+    cpdef union_(self, SimpleGraph other)
+    cpdef subtraction(self, SimpleGraph other)
+    cpdef intersection(self, SimpleGraph other)
+    cpdef xor(self, SimpleGraph other)
     cpdef void set(self, tuple trp) except *
     cpdef void remove_triples(self, pattern) except *
     cpdef object as_rdflib(self)
@@ -62,5 +63,6 @@ cdef class SimpleGraph:
 cdef class Imr(SimpleGraph):
     cdef:
         readonly str uri
+        Imr empty_copy(self)
 
     cpdef as_rdflib(self)

+ 35 - 41
lakesuperior/model/graph/graph.pyx

@@ -362,7 +362,16 @@ cdef class SimpleGraph:
 
     # # # BASIC SET OPERATIONS # # #
 
-    cpdef SimpleGraph union(self, SimpleGraph other):
+    cdef SimpleGraph empty_copy(self):
+        """
+        Create an empty copy carrying over some key properties.
+
+        Override in subclasses to accommodate for different init properties.
+        """
+        return self.__class__(store=getattr(self, 'store'))
+
+
+    cpdef union_(self, SimpleGraph other):
         """
         Perform set union resulting in a new SimpleGraph instance.
 
@@ -376,9 +385,9 @@ cdef class SimpleGraph:
         cdef:
             void *cur
             cc.HashSetIter it
-            SimpleGraph new_gr = SimpleGraph()
             BufferTriple *trp
 
+        new_gr = self.empty_copy()
         new_gr.store = self.store
 
         for gr in (self, other):
@@ -410,7 +419,7 @@ cdef class SimpleGraph:
             self._add_triple(bt)
 
 
-    cpdef SimpleGraph intersection(self, SimpleGraph other):
+    cpdef intersection(self, SimpleGraph other):
         """
         Graph intersection.
 
@@ -422,7 +431,8 @@ cdef class SimpleGraph:
         cdef:
             void *cur
             cc.HashSetIter it
-            SimpleGraph new_gr = SimpleGraph()
+
+        new_gr = self.empty_copy()
 
         cc.hashset_iter_init(&it, self._triples)
         while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
@@ -458,7 +468,7 @@ cdef class SimpleGraph:
                 self._remove_triple(bt)
 
 
-    cpdef SimpleGraph subtraction(self, SimpleGraph other):
+    cpdef subtraction(self, SimpleGraph other):
         """
         Graph set-theoretical subtraction.
 
@@ -473,7 +483,8 @@ cdef class SimpleGraph:
         cdef:
             void *cur
             cc.HashSetIter it
-            SimpleGraph new_gr = SimpleGraph()
+
+        new_gr = self.empty_copy()
 
         cc.hashset_iter_init(&it, self._triples)
         while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
@@ -508,7 +519,7 @@ cdef class SimpleGraph:
                 self._remove_triple(bt)
 
 
-    cpdef SimpleGraph xor(self, SimpleGraph other):
+    cpdef xor(self, SimpleGraph other):
         """
         Graph Exclusive disjunction (XOR).
 
@@ -520,9 +531,10 @@ cdef class SimpleGraph:
         cdef:
             void *cur
             cc.HashSetIter it
-            SimpleGraph new_gr = SimpleGraph()
             BufferTriple* bt
 
+        new_gr = self.empty_copy()
+
         # Add triples in this and not in other.
         cc.hashset_iter_init(&it, self._triples)
         while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
@@ -805,7 +817,7 @@ cdef class SimpleGraph:
 
     def __add__(self, other):
         """ Alias for set-theoretical union. """
-        return self.union(other)
+        return self.union_(other)
 
 
     def __iadd__(self, other):
@@ -837,7 +849,7 @@ cdef class SimpleGraph:
 
     def __or__(self, other):
         """ Set-theoretical union. """
-        return self.union(other)
+        return self.union_(other)
 
 
     def __ior__(self, other):
@@ -1068,13 +1080,13 @@ cdef class Imr(SimpleGraph):
 
         :param rdflib.URIRef uri: The graph URI.
             This will serve as the subject for some queries.
-        :param set data: Initial data as a set of 3-tuples of RDFLib terms.
-        :param tuple lookup: tuple of a 3-tuple of lookup terms, and a context.
-            E.g. ``((URIRef('urn:ns:a'), None, None), URIRef('urn:ns:ctx'))``.
-            Any and all elements may be ``None``.
-        :param lmdbStore store: the store to look data up.
+        :param args: Positional arguments inherited from
+            ``SimpleGraph.__init__``.
+        :param kwargs: Keyword arguments inherited from
+            ``SimpleGraph.__init__``.
         """
         self.uri = str(uri)
+        #super().__init(*args, **kwargs)
 
 
     @property
@@ -1094,7 +1106,7 @@ cdef class Imr(SimpleGraph):
 
         :rtype: SimpleGraph
         """
-        return SimpleGraph(self.data)
+        raise NotImplementedError() # TODO
 
 
     def __repr__(self):
@@ -1107,31 +1119,6 @@ cdef class Imr(SimpleGraph):
         return (f'<{self.__class__.__name__} @{hex(id(self))} uri={self.uri}, '
             f'length={len(self.data)}>')
 
-    def __sub__(self, other):
-        """
-        Set difference. This creates a new Imr with the same subject URI.
-        """
-        return self.__class__(uri=self.uri, data=self.data - other)
-
-    def __and__(self, other):
-        """
-        Set intersection. This creates a new Imr with the same subject URI.
-        """
-        return self.__class__(uri=self.uri, data=self.data & other)
-
-    def __or__(self, other):
-        """
-        Set union. This creates a new Imr with the same subject URI.
-        """
-        return self.__class__(uri=self.uri, data=self.data | other)
-
-    def __xor__(self, other):
-        """
-        Set exclusive OR (XOR). This creates a new Imr with the same subject
-        URI.
-        """
-        return self.__class__(uri=self.uri, data=self.data ^ other)
-
 
     def __getitem__(self, item):
         """
@@ -1150,6 +1137,13 @@ cdef class Imr(SimpleGraph):
             raise TypeError(f'Wrong slice format: {item}.')
 
 
+    cdef Imr empty_copy(self):
+        """
+        Create an empty instance carrying over some key properties.
+        """
+        return self.__class__(uri=self.uri, store=getattr(self, 'store'))
+
+
     def value(self, p, strict=False):
         """
         Get an individual value.

+ 277 - 0
tests/0_data_structures/test_graph.py

@@ -274,3 +274,280 @@ class TestGraphOps:
         assert trp[5] in gr1
 
 
+
+@pytest.mark.usefixtures('trp')
+class TestImrOps:
+    """
+    Test various graph operations.
+    """
+    def test_len(self, trp):
+        """
+        Test the length of a graph with and without duplicates.
+        """
+        imr = Imr(uri='http://example.edu/imr01')
+        assert len(imr) == 0
+
+        imr.add((trp[0],))
+        assert len(imr) == 1
+
+        imr.add((trp[1],)) # Same values
+        assert len(imr) == 1
+
+        imr.add((trp[2],))
+        assert len(imr) == 2
+
+        imr.add(trp)
+        assert len(imr) == 6
+
+
+    def test_dup(self, trp):
+        """
+        Test operations with duplicate triples.
+        """
+        imr = Imr(uri='http://example.edu/imr01')
+        #import pdb; pdb.set_trace()
+
+        imr.add((trp[0],))
+        assert trp[1] in imr
+        assert trp[2] not in imr
+
+
+    def test_remove(self, trp):
+        """
+        Test adding and removing triples.
+        """
+        imr = Imr(uri='http://example.edu/imr01')
+
+        imr.add(trp)
+        imr.remove(trp[0])
+        assert len(imr) == 5
+        assert trp[0] not in imr
+        assert trp[1] not in imr
+
+        # This is the duplicate triple.
+        imr.remove(trp[1])
+        assert len(imr) == 5
+
+        # This is the triple in reverse order.
+        imr.remove(trp[2])
+        assert len(imr) == 4
+
+        imr.remove(trp[4])
+        assert len(imr) == 3
+
+
+    def test_union(self, trp):
+        """
+        Test graph union.
+        """
+        gr1 = Imr(uri='http://example.edu/imr01')
+        gr2 = Imr(uri='http://example.edu/imr02')
+
+        gr1.add(trp[0:3])
+        gr2.add(trp[2:6])
+
+        gr3 = gr1 | gr2
+
+        assert len(gr3) == 5
+        assert trp[0] in gr3
+        assert trp[4] in gr3
+
+        assert gr3.uri == 'http://example.edu/imr01'
+
+
+    def test_ip_union(self, trp):
+        """
+        Test graph in-place union.
+        """
+        gr1 = Imr(uri='http://example.edu/imr01')
+        gr2 = Imr(uri='http://example.edu/imr02')
+
+        gr1.add(trp[0:3])
+        gr2.add(trp[2:6])
+
+        gr1 |= gr2
+
+        assert len(gr1) == 5
+        assert trp[0] in gr1
+        assert trp[4] in gr1
+
+        assert gr1.uri == 'http://example.edu/imr01'
+
+
+    def test_addition(self, trp):
+        """
+        Test graph addition.
+        """
+        gr1 = Imr(uri='http://example.edu/imr01')
+        gr2 = Imr(uri='http://example.edu/imr02')
+
+        gr1.add(trp[0:3])
+        gr2.add(trp[2:6])
+
+        gr3 = gr1 + gr2
+
+        assert len(gr3) == 5
+        assert trp[0] in gr3
+        assert trp[4] in gr3
+
+        assert gr3.uri == 'http://example.edu/imr01'
+
+
+    def test_ip_addition(self, trp):
+        """
+        Test graph in-place addition.
+        """
+        gr1 = Imr(uri='http://example.edu/imr01')
+        gr2 = Imr(uri='http://example.edu/imr02')
+
+        gr1.add(trp[0:3])
+        gr2.add(trp[2:6])
+
+        gr1 += gr2
+
+        assert len(gr1) == 5
+        assert trp[0] in gr1
+        assert trp[4] in gr1
+
+        assert gr1.uri == 'http://example.edu/imr01'
+
+
+    def test_subtraction(self, trp):
+        """
+        Test graph addition.
+        """
+        gr1 = Imr(uri='http://example.edu/imr01')
+        gr2 = Imr(uri='http://example.edu/imr02')
+
+        gr1.add(trp[0:4])
+        gr2.add(trp[2:6])
+
+        gr3 = gr1 - gr2
+
+        assert len(gr3) == 1
+        assert trp[0] in gr3
+        assert trp[1] in gr3
+        assert trp[2] not in gr3
+        assert trp[3] not in gr3
+        assert trp[4] not in gr3
+
+        gr3 = gr2 - gr1
+
+        assert len(gr3) == 2
+        assert trp[0] not in gr3
+        assert trp[1] not in gr3
+        assert trp[2] not in gr3
+        assert trp[3] not in gr3
+        assert trp[4] in gr3
+        assert trp[5] in gr3
+
+        assert gr3.uri == 'http://example.edu/imr01'
+
+
+    def test_ip_subtraction(self, trp):
+        """
+        Test graph in-place addition.
+        """
+        gr1 = Imr(uri='http://example.edu/imr01')
+        gr2 = Imr(uri='http://example.edu/imr02')
+
+        gr1.add(trp[0:4])
+        gr2.add(trp[2:6])
+
+        gr1 -= gr2
+
+        assert len(gr1) == 1
+        assert trp[0] in gr1
+        assert trp[1] in gr1
+        assert trp[2] not in gr1
+        assert trp[3] not in gr1
+        assert trp[4] not in gr1
+
+        assert gr1.uri == 'http://example.edu/imr01'
+
+
+
+    def test_intersect(self, trp):
+        """
+        Test graph intersextion.
+        """
+        gr1 = Imr(uri='http://example.edu/imr01')
+        gr2 = Imr(uri='http://example.edu/imr02')
+
+        gr1.add(trp[0:4])
+        gr2.add(trp[2:6])
+
+        gr3 = gr1 & gr2
+
+        assert len(gr3) == 2
+        assert trp[2] in gr3
+        assert trp[3] in gr3
+        assert trp[0] not in gr3
+        assert trp[5] not in gr3
+
+        assert gr3.uri == 'http://example.edu/imr01'
+
+
+    def test_ip_intersect(self, trp):
+        """
+        Test graph intersextion.
+        """
+        gr1 = Imr(uri='http://example.edu/imr01')
+        gr2 = Imr(uri='http://example.edu/imr02')
+
+        gr1.add(trp[0:4])
+        gr2.add(trp[2:6])
+
+        gr1 &= gr2
+
+        assert len(gr1) == 2
+        assert trp[2] in gr1
+        assert trp[3] in gr1
+        assert trp[0] not in gr1
+        assert trp[5] not in gr1
+
+        assert gr1.uri == 'http://example.edu/imr01'
+
+
+    def test_xor(self, trp):
+        """
+        Test graph intersextion.
+        """
+        gr1 = Imr(uri='http://example.edu/imr01')
+        gr2 = Imr(uri='http://example.edu/imr02')
+
+        gr1.add(trp[0:4])
+        gr2.add(trp[2:6])
+
+        gr3 = gr1 ^ gr2
+
+        assert len(gr3) == 3
+        assert trp[2] not in gr3
+        assert trp[3] not in gr3
+        assert trp[0] in gr3
+        assert trp[5] in gr3
+
+        assert gr3.uri == 'http://example.edu/imr01'
+
+
+    def test_ip_xor(self, trp):
+        """
+        Test graph intersextion.
+        """
+        gr1 = Imr(uri='http://example.edu/imr01')
+        gr2 = Imr(uri='http://example.edu/imr02')
+
+        gr1.add(trp[0:4])
+        gr2.add(trp[2:6])
+
+        gr1 ^= gr2
+
+        assert len(gr1) == 3
+        assert trp[2] not in gr1
+        assert trp[3] not in gr1
+        assert trp[0] in gr1
+        assert trp[5] in gr1
+
+        assert gr1.uri == 'http://example.edu/imr01'
+
+