Quellcode durchsuchen

Allocate data on boolean operations (fix segfault).

Stefano Cossu vor 5 Jahren
Ursprung
Commit
be3ad0d2e3

+ 2 - 1
lakesuperior/model/graph/graph.pxd

@@ -31,7 +31,8 @@ cdef class SimpleGraph:
         cc.key_compare_ft term_cmp_fn
         cc.key_compare_ft term_cmp_fn
         cc.key_compare_ft trp_cmp_fn
         cc.key_compare_ft trp_cmp_fn
 
 
-        inline void add_triple(self, BufferTriple *trp) except *
+        inline BufferTriple* store_triple(self, const BufferTriple* strp)
+        inline void add_triple(self, BufferTriple *trp, bint add=*) except *
         int remove_triple(self, BufferTriple* trp_buf) except -1
         int remove_triple(self, BufferTriple* trp_buf) except -1
         bint trp_contains(self, BufferTriple* btrp)
         bint trp_contains(self, BufferTriple* btrp)
 
 

+ 64 - 9
lakesuperior/model/graph/graph.pyx

@@ -337,12 +337,12 @@ cdef class SimpleGraph:
             cc.HashSetIter it
             cc.HashSetIter it
             void *cur
             void *cur
 
 
-        terms = {}
+        terms = set()
 
 
         cc.hashset_iter_init(&it, self._terms)
         cc.hashset_iter_init(&it, self._terms)
         while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
         while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
             s_term = <Buffer*>cur
             s_term = <Buffer*>cur
-            terms.append((f'0x{<size_t>cur:02x}', term.deserialize_to_rdflib(s_term)))
+            terms.add((f'0x{<size_t>cur:02x}', term.deserialize_to_rdflib(s_term)))
 
 
         return terms
         return terms
 
 
@@ -577,7 +577,7 @@ cdef class SimpleGraph:
             cc.hashset_iter_init(&it, gr._triples)
             cc.hashset_iter_init(&it, gr._triples)
             while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
             while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
                 bt = <BufferTriple*>cur
                 bt = <BufferTriple*>cur
-                new_gr.add_triple(bt)
+                new_gr.add_triple(bt, True)
 
 
         return new_gr
         return new_gr
 
 
@@ -599,7 +599,7 @@ cdef class SimpleGraph:
         cc.hashset_iter_init(&it, other._triples)
         cc.hashset_iter_init(&it, other._triples)
         while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
         while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
             bt = <BufferTriple*>cur
             bt = <BufferTriple*>cur
-            self.add_triple(bt)
+            self.add_triple(bt, True)
 
 
 
 
     cpdef intersection(self, SimpleGraph other):
     cpdef intersection(self, SimpleGraph other):
@@ -624,7 +624,7 @@ cdef class SimpleGraph:
             #    <size_t>bt.s, <size_t>bt.p, <size_t>bt.o))
             #    <size_t>bt.s, <size_t>bt.p, <size_t>bt.o))
             if other.trp_contains(bt):
             if other.trp_contains(bt):
                 #print('Adding.')
                 #print('Adding.')
-                new_gr.add_triple(bt)
+                new_gr.add_triple(bt, True)
 
 
         return new_gr
         return new_gr
 
 
@@ -676,7 +676,7 @@ cdef class SimpleGraph:
             #    <size_t>bt.s, <size_t>bt.p, <size_t>bt.o))
             #    <size_t>bt.s, <size_t>bt.p, <size_t>bt.o))
             if not other.trp_contains(bt):
             if not other.trp_contains(bt):
                 #print('Adding.')
                 #print('Adding.')
-                new_gr.add_triple(bt)
+                new_gr.add_triple(bt, True)
 
 
         return new_gr
         return new_gr
 
 
@@ -723,14 +723,14 @@ cdef class SimpleGraph:
         while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
         while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
             bt = <BufferTriple*>cur
             bt = <BufferTriple*>cur
             if not other.trp_contains(bt):
             if not other.trp_contains(bt):
-                new_gr.add_triple(bt)
+                new_gr.add_triple(bt, True)
 
 
         # Other way around.
         # Other way around.
         cc.hashset_iter_init(&it, other._triples)
         cc.hashset_iter_init(&it, other._triples)
         while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
         while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
             bt = <BufferTriple*>cur
             bt = <BufferTriple*>cur
             if not self.trp_contains(bt):
             if not self.trp_contains(bt):
-                new_gr.add_triple(bt)
+                new_gr.add_triple(bt, True)
 
 
         return new_gr
         return new_gr
 
 
@@ -770,13 +770,68 @@ cdef class SimpleGraph:
         self |= tmp
         self |= tmp
 
 
 
 
-    cdef inline void add_triple(self, BufferTriple* trp) except *:
+    cdef inline BufferTriple* store_triple(self, const BufferTriple* strp):
+        """
+        Store triple data in the graph.
+
+        Normally, raw data underlying the triple and terms are only referenced
+        by pointers. If the destination data are garbage collected before the
+        graph is, segfaults are bound to happen.
+
+        This method copies the data to the graph's memory pool, so they are
+        managed with the lifecycle of the graph.
+
+        Note that this method stores items regardless of whether thwy are
+        duplicate or not, so there may be some duplication.
+        """
+        cdef:
+            BufferTriple* dtrp = <BufferTriple*>self.pool.alloc(
+                1, sizeof(BufferTriple)
+            )
+            Buffer* spo = <Buffer*>self.pool.alloc(3, sizeof(Buffer))
+
+        if not dtrp:
+            raise MemoryError()
+        if not spo:
+            raise MemoryError()
+
+        dtrp.s = spo
+        dtrp.p = spo + 1
+        dtrp.o = spo + 2
+
+        spo[0].addr = self.pool.alloc(strp.s.sz, 1)
+        spo[0].sz = strp.s.sz
+        spo[1].addr = self.pool.alloc(strp.p.sz, 1)
+        spo[1].sz = strp.p.sz
+        spo[2].addr = self.pool.alloc(strp.o.sz, 1)
+        spo[2].sz = strp.o.sz
+
+        if not spo[0].addr or not spo[1].addr or not spo[2].addr:
+            raise MemoryError()
+
+        memcpy(dtrp.s.addr, strp.s.addr, strp.s.sz)
+        memcpy(dtrp.p.addr, strp.p.addr, strp.p.sz)
+        memcpy(dtrp.o.addr, strp.o.addr, strp.o.sz)
+
+        return dtrp
+
+
+    cdef inline void add_triple(
+        self, BufferTriple* trp, bint add=False
+    ) except *:
         """
         """
         Add a triple from 3 (TPL) serialized terms.
         Add a triple from 3 (TPL) serialized terms.
 
 
         Each of the terms is added to the term set if not existing. The triple
         Each of the terms is added to the term set if not existing. The triple
         also is only added if not existing.
         also is only added if not existing.
+
+        :param BufferTriple* trp: The triple to add.
+        :param bint add: if ``True``, the triple and term data will be
+            allocated and copied into the graph memory pool.
         """
         """
+        if add:
+            trp = self.store_triple(trp)
+
         logger.info('Inserting terms.')
         logger.info('Inserting terms.')
         cc.hashset_add(self._terms, trp.s)
         cc.hashset_add(self._terms, trp.s)
         cc.hashset_add(self._terms, trp.p)
         cc.hashset_add(self._terms, trp.p)

+ 4 - 2
lakesuperior/store/ldp_rs/rsrc_centric_layout.py

@@ -294,11 +294,13 @@ class RsrcCentricLayout:
         imr = Imr(uri=nsc['fcres'][uid])
         imr = Imr(uri=nsc['fcres'][uid])
 
 
         for ctx in contexts:
         for ctx in contexts:
-            imr |= self.store.graph_lookup((None, None, None), ctx)
+            gr = self.store.graph_lookup((None, None, None), ctx)
+            imr |= gr
 
 
         # Include inbound relationships.
         # Include inbound relationships.
         if incl_inbound and len(imr):
         if incl_inbound and len(imr):
-            imr |= SimpleGraph(data=self.get_inbound_rel(nsc['fcres'][uid]))
+            gr = SimpleGraph(data=self.get_inbound_rel(nsc['fcres'][uid]))
+            imr |= gr
 
 
         if strict:
         if strict:
             self._check_rsrc_status(imr)
             self._check_rsrc_status(imr)