Browse Source

WIP Add multiple triples in one go.
[ci skip]

Stefano Cossu 5 years ago
parent
commit
3fcfe11261

+ 99 - 0
docs/structures.rst

@@ -0,0 +1,99 @@
+Data Structure Internals
+========================
+
+**(Draft)**
+
+Lakesuperior has its own methods for handling in-memory graphs. These methods
+rely on C data structures and are therefore much faster than Python/RDFLib
+objects.
+
+The graph data model modules are in :py:module:`lakesuperior.model.graph`.
+
+The Graph Data Model
+--------------------
+
+Triples are stored in a C hash set. Each triple is represented by a pointer to
+a ``BufferTriple`` structure stored in a temporary memory pool. This pool is
+tied to the life cycle of the ``SimpleGraph`` object it belongs to.
+
+A triple structure contains three pointers to ``Buffer`` structures, which
+contain a serialized version of a RDF term. These structures are stored in the
+``SimpleGraph`` memory pool as well.
+
+Each ``SimpleGraph`` object has a ``_terms`` property and a ``_triples``
+property. These are C hash sets holding addresses of unique terms and
+triples inserted in the graph. If the same term is entered more than once,
+in any position in any triple, the first one entered is used and is pointed to
+by the triple. This makes the graph data structure very compact.
+
+In summary, the pointers can be represented this way::
+
+   <serialized term data in mem pool (x3)>
+         ^      ^      ^
+         |      |      |
+   <Term structures in mem pool (x3)>
+         ^      ^      ^
+         |      |      |
+   <Term struct addresses in _terms set (x3)>
+         ^      ^      ^
+         |      |      |
+   <Triple structure in mem pool>
+         ^
+         |
+   <address of triple in _triples set>
+
+Let's say we insert the following triples in a ``SimpleGraph``::
+
+   <urn:s:0> <urn:p:0> <urn:o:0>
+   <urn:s:0> <urn:p:1> <urn:o:1>
+   <urn:s:0> <urn:p:1> <urn:o:2>
+   <urn:s:0> <urn:p:0> <urn:o:0>
+
+The memory pool contains the following byte arrays  of raw data, displayed in
+the following list with their relative addresses (simplified to 8-bit
+addresses and fixed-length byte strings for readability)::
+
+   0x00     <urn:s:0>
+   0x09     <urn:p:0>
+   0x12     <urn:o:0>
+
+   0x1b     <urn:s:0>
+   0x24     <urn:p:1>
+   0x2d     <urn:o:1>
+
+   0x36     <urn:s:0>
+   0x3f     <urn:p:1>
+   0x48     <urn:o:2>
+
+   0x51     <urn:s:0>
+   0x5a     <urn:p:0>
+   0x63     <urn:o:0>
+
+However, the ``_terms`` set contains only ``Buffer`` structures pointing to
+unique addresses::
+
+   0x00
+   0x09
+   0x12
+   0x24
+   0x2d
+   0x48
+
+The other terms are just unutilized. They will be deallocated en masse when
+the ``SimpleGraph`` object is garbage collected.
+
+The ``_triples`` set would then contain 3 unique entries pointing to the unique
+term addresses::
+
+   0x00  0x09  0x12
+   0x00  0x24  0x2d
+   0x00  0x24  0x48
+
+(the actual addresses would actually belong to the structures pointing to the
+raw data, but this is just an illustrative example).
+
+The advantage of this approach is that the memory pool is contiguous and
+append-only (until it gets purged), so it's cheap to just add to it, while the
+sets that must maintain uniqueness and are the ones that most operations
+(lookup, adding, removing, slicing, copying, etc.) are done on, contain much
+less data and are therefore faster.

+ 11 - 12
lakesuperior/cy_include/collections.pxd

@@ -155,11 +155,11 @@ cdef extern from "common.h":
 
 cdef extern from "hashtable.h":
 
-#    ctypedef struct TableEntry:
-#        void*       key
-#        void*       value
-#        size_t      hash
-#        TableEntry* next
+    ctypedef struct TableEntry:
+        void*       key
+        void*       value
+        size_t      hash
+        TableEntry* next
 
     ctypedef struct HashTable:
         pass
@@ -177,12 +177,11 @@ cdef extern from "hashtable.h":
         mem_free_ft   mem_free
 
     ctypedef struct HashTableIter:
-        pass
-#        HashTable* table
-#        size_t bucket_index
-#        TableEntry* prev_entry
-#        TableEntry* next_entry
-#
+        HashTable* table
+        size_t bucket_index
+        TableEntry* prev_entry
+        TableEntry* next_entry
+
 #    size_t get_table_index(HashTable *table, void *key)
 #
 #    void hashtable_conf_init(HashTableConf* conf)
@@ -215,7 +214,7 @@ cdef extern from "hashtable.h":
 #
 #    size_t hashtable_hash(void* key, int len, uint32_t seed)
 #
-#    size_t hashtable_hash_ptr(void* key, int len, uint32_t seed)
+    size_t hashtable_hash_ptr(void* key, int len, uint32_t seed)
 #
 #    ctypedef void (*_hashtable_foreach_key_op_ft)(void*)
 #

+ 5 - 2
lakesuperior/model/graph/graph.pxd

@@ -21,7 +21,7 @@ ctypedef Buffer *BufferPtr
 
 cdef:
     int term_cmp_fn(const void* key1, const void* key2)
-    int triple_cmp_fn(const void* key1, const void* key2)
+    int trp_cmp_fn(const void* key1, const void* key2)
     size_t trp_hash_fn(const void* key, int l, uint32_t seed)
     size_t hash_ptr_passthrough(const void* key, int l, uint32_t seed)
 
@@ -39,13 +39,16 @@ cdef class SimpleGraph:
         inline void _add_triple(
             self, Buffer *ss, Buffer *sp, Buffer *so
         ) except *
-        set _data_as_set(self)
+        set _to_pyset(self)
 
     cpdef void set(self, tuple trp) except *
     cpdef void remove_triples(self, pattern) except *
     cpdef object as_rdflib(self)
     cpdef set terms(self, str type)
 
+    cpdef SimpleGraph union(self, SimpleGraph other)
+    cpdef void ip_union(self, SimpleGraph other)
+
 cdef class Imr(SimpleGraph):
     cdef:
         readonly str uri

+ 190 - 45
lakesuperior/model/graph/graph.pyx

@@ -54,20 +54,17 @@ cdef int term_cmp_fn(const void* key1, const void* key2):
     b2 = <Buffer *>key2
 
     if b1.sz != b2.sz:
+        logger.info(f'Sizes differ: {b1.sz} != {b2.sz}. Return 1.')
         return 1
 
-    #print('Term A:')
-    #print((<unsigned char *>b1.addr)[:b1.sz])
-    #print('Term b:')
-    #print((<unsigned char *>b2.addr)[:b2.sz])
     cdef int cmp = memcmp(b1.addr, b2.addr, b1.sz)
     logger.info(f'term memcmp: {cmp}')
     return cmp
 
 
-cdef int triple_cmp_fn(const void* key1, const void* key2):
+cdef int trp_cmp_fn(const void* key1, const void* key2):
     """
-    Compare function for two triples in a CAlg set.
+    Compare function for two triples in a set.
 
     Here, pointers to terms are compared for s, p, o. The pointers should be
     guaranteed to point to unique values (i.e. no two pointers have the same
@@ -78,13 +75,27 @@ cdef int triple_cmp_fn(const void* key1, const void* key2):
     """
     t1 = <BufferTriple *>key1
     t2 = <BufferTriple *>key2
+    print('Comparing: <0x{:02x}> <0x{:02x}> <0x{:02x}>'.format(
+        <unsigned long>t1.s, <unsigned long>t1.p, <unsigned long>t1.o))
+    print('With:      <0x{:02x}> <0x{:02x}> <0x{:02x}>'.format(
+        <unsigned long>t2.s, <unsigned long>t2.p, <unsigned long>t2.o))
 
-    return (
+    cdef int is_not_equal = (
         t1.s.addr != t2.s.addr or
         t1.p.addr != t2.p.addr or
         t1.o.addr != t2.o.addr
     )
 
+    logger.info(f'Triples are NOT equal and will be added: {is_not_equal}')
+    return is_not_equal
+
+
+cdef size_t term_hash_fn(const void* key, int l, uint32_t seed):
+    """
+    Hash function for serialized terms (:py:class:`Buffer` objects)
+    """
+    return <size_t>spookyhash_64((<Buffer*>key).addr, (<Buffer*>key).sz, seed)
+
 
 cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
     """
@@ -109,7 +120,8 @@ cdef size_t hash_ptr_passthrough(const void* key, int l, uint32_t seed):
 
 
 cdef inline bint lookup_none_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
     """
     Dummy callback for queries with all parameters unbound.
 
@@ -118,7 +130,9 @@ cdef inline bint lookup_none_cmp_fn(
     return True
 
 
-cdef inline bint lookup_s_cmp_fn(const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+cdef inline bint lookup_s_cmp_fn(
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
     """
     Lookup callback compare function for a given s in a triple.
 
@@ -130,27 +144,37 @@ cdef inline bint lookup_s_cmp_fn(const BufferTriple *trp, const Buffer *t1, cons
     return term_cmp_fn(t1, trp[0].s)
 
 
-cdef inline bint lookup_p_cmp_fn(const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+cdef inline bint lookup_p_cmp_fn(
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
     return term_cmp_fn(t1, trp[0].p)
 
 
-cdef inline bint lookup_o_cmp_fn(const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+cdef inline bint lookup_o_cmp_fn(
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
     return term_cmp_fn(t1, trp[0].o)
 
 
-cdef inline bint lookup_sp_cmp_fn(const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+cdef inline bint lookup_sp_cmp_fn(
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
     return (
             term_cmp_fn(t1, trp[0].s)
             and term_cmp_fn(t2, trp[0].p))
 
 
-cdef inline bint lookup_so_cmp_fn(const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+cdef inline bint lookup_so_cmp_fn(
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
     return (
             term_cmp_fn(t1, trp[0].s)
             and term_cmp_fn(t2, trp[0].o))
 
 
-cdef inline bint lookup_po_cmp_fn(const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+cdef inline bint lookup_po_cmp_fn(
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
     return (
             term_cmp_fn(t1, trp[0].p)
             and term_cmp_fn(t2, trp[0].o))
@@ -186,7 +210,7 @@ cdef class SimpleGraph:
     """
 
     def __cinit__(
-            self, Keyset keyset=None, store=None, set data=set(), **kwargs):
+            self, Keyset keyset=None, store=None, set data=set(), *args, **kwargs):
         """
         Initialize the graph with pre-existing data or by looking up a store.
 
@@ -208,22 +232,21 @@ cdef class SimpleGraph:
         :param lmdbStore store: the store to look data up.
         """
         cdef:
-            cc.HashSetConf terms_conf
-            cc.HashSetConf trp_conf
+            cc.HashSetConf terms_conf, trp_conf
 
         cc.hashset_conf_init(&terms_conf)
         terms_conf.load_factor = 0.85
-        terms_conf.hash = &hash_ptr_passthrough # spookyhash_64?
+        terms_conf.hash = &term_hash_fn
         terms_conf.hash_seed = term_hash_seed32
         terms_conf.key_compare = &term_cmp_fn
-        terms_conf.key_length = sizeof(void*)
+        terms_conf.key_length = sizeof(Buffer*)
 
         cc.hashset_conf_init(&trp_conf)
         trp_conf.load_factor = 0.75
-        trp_conf.hash = &hash_ptr_passthrough # spookyhash_64?
+        trp_conf.hash = &trp_hash_fn
         trp_conf.hash_seed = term_hash_seed32
-        trp_conf.key_compare = &triple_cmp_fn
-        trp_conf.key_length = sizeof(void*)
+        trp_conf.key_compare = &trp_cmp_fn
+        trp_conf.key_length = sizeof(BufferTriple)
 
         cc.hashset_new_conf(&terms_conf, &self._terms)
         cc.hashset_new_conf(&trp_conf, &self._triples)
@@ -240,6 +263,9 @@ cdef class SimpleGraph:
             for s, p, o in data:
                 self._add_from_rdflib(s, p, o)
 
+        print(len(self))
+        print('SimpleGraph cinit complete.')
+
 
     def __dealloc__(self):
         """
@@ -256,7 +282,58 @@ cdef class SimpleGraph:
 
         :rtype: set
         """
-        return self._data_as_set()
+        return self._to_pyset()
+
+
+    # # # BASIC SET OPERATIONS # # #
+
+    cpdef SimpleGraph union(self, SimpleGraph other):
+        """
+        Perform set union resulting in a new SimpleGraph instance.
+
+        TODO Allow union of multiple graphs at a time.
+
+        :param SimpleGraph other: The other graph to merge.
+
+        :rtype: SimpleGraph
+        :return: A new SimpleGraph instance.
+        """
+        cdef:
+            void *cur
+            cc.HashSetIter it
+            SimpleGraph new_gr = SimpleGraph()
+            BufferTriple *trp
+
+        new_gr.store = self.store
+
+        for gr in (self, other):
+            cc.hashset_iter_init(&it, gr._triples)
+            while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
+                bt = <BufferTriple*>cur
+                new_gr._add_triple(bt.s, bt.p, bt.o)
+
+        return new_gr
+
+
+    cpdef void ip_union(self, SimpleGraph other):
+        """
+        Perform an in-place set union that adds triples to this instance
+
+        TODO Allow union of multiple graphs at a time.
+
+        :param SimpleGraph other: The other graph to merge.
+
+        :rtype: void
+        """
+        cdef:
+            void *cur
+            cc.HashSetIter it
+            BufferTriple *trp
+
+        cc.hashset_iter_init(&it, other._triples)
+        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
+            bt = <BufferTriple*>cur
+            self._add_triple(bt.s, bt.p, bt.o)
 
 
     cdef void _data_from_lookup(self, tuple trp_ptn, ctx=None) except *:
@@ -313,6 +390,10 @@ cdef class SimpleGraph:
         """
         trp = <BufferTriple *>self._pool.alloc(1, sizeof(BufferTriple))
 
+        logger.info('ss: {}'.format((<unsigned char *>ss.addr)[:ss.sz]))
+        logger.info('sp: {}'.format((<unsigned char *>sp.addr)[:sp.sz]))
+        logger.info('so: {}'.format((<unsigned char *>so.addr)[:so.sz]))
+
         logger.info('Inserting terms.')
         logger.info(f'ss addr: {<unsigned long>ss.addr}')
         logger.info(f'ss sz: {ss.sz}')
@@ -321,13 +402,17 @@ cdef class SimpleGraph:
         print('Insert ss: @0x{:02x}'.format(<unsigned long>ss))
         cc.hashset_add_or_get(self._terms, <void **>&ss)
         print('Now ss is: @0x{:02x}'.format(<unsigned long>ss))
-        logger.info('Insert sp')
+
+        print('Insert sp: @0x{:02x}'.format(<unsigned long>sp))
         cc.hashset_add_or_get(self._terms, <void **>&sp)
-        logger.info('Insert so')
+        print('Now sp is: @0x{:02x}'.format(<unsigned long>sp))
+
+        print('Insert so: @0x{:02x}'.format(<unsigned long>so))
         cc.hashset_add_or_get(self._terms, <void **>&so)
+        print('Now so is: @0x{:02x}'.format(<unsigned long>so))
         logger.info('inserted terms.')
         cdef size_t terms_sz = cc.hashset_size(self._terms)
-        logger.info('Terms set size: {terms_sz}')
+        logger.info(f'Terms set size: {terms_sz}')
 
         #cdef cc.HashSetIter ti
         #cdef Buffer *t
@@ -335,21 +420,67 @@ cdef class SimpleGraph:
         #while calg.set_iter_has_more(&ti):
         #    t = <Buffer *>calg.set_iter_next(&ti)
 
+        # # # Test area
+        #cdef:
+        #    cc.HashSet* testset
+        #    cc.HashSetConf testconf
+        #    int i = 24
+        #    size_t sz
+
+        #cc.hashset_conf_init(&testconf)
+        #testconf.hash = &hash_ptr_passthrough # spookyhash_64?
+        #testconf.hash_seed = term_hash_seed32
+        #testconf.key_length = sizeof(int*)
+        #testconf.key_compare = &trp_cmp_fn
+        #testconf.key_length = sizeof(BufferTriple*)
+
+        #cc.hashset_new_conf(&testconf, &testset)
+
+        #sz = cc.hashset_size(testset)
+        #print(f'Test set size (start): {sz}')
+
+        #cc.hashset_add(testset, &i)
+        #sz = cc.hashset_size(testset)
+        #print(f'Test set size (1st insert): {sz}')
+
+        #cc.hashset_add(testset, &i)
+        #sz = cc.hashset_size(testset)
+        #print(f'Test set size (2nd insert): {sz}')
+        # # # END test area
+
         trp.s = ss
         trp.p = sp
         trp.o = so
+        cdef size_t trp_sz = cc.hashset_size(self._triples)
+        logger.info(f'Triples set size before adding: {trp_sz}')
 
         r = cc.hashset_add(self._triples, trp)
         print('Insert triple result:')
         print(r)
 
+        trp_sz = cc.hashset_size(self._triples)
+        logger.info(f'Triples set size after adding: {trp_sz}')
+
+        cdef:
+            cc.HashSetIter ti
+            BufferTriple *test_trp
+            void *cur
+
+        cc.hashset_iter_init(&ti, self._triples)
+        while cc.hashset_iter_next(&ti, &cur) != cc.CC_ITER_END:
+            test_trp = <BufferTriple *>cur
+            print('Triple in set: 0x{:02x} 0x{:02x} 0x{:02x}'.format(
+                    <size_t>test_trp.s, <size_t>test_trp.p, <size_t>test_trp.o))
+
+
+
         #cdef BufferTriple *tt
         #calg.set_iterate(self._triples, &ti)
         #while calg.set_iter_has_more(&ti):
         #    tt = <BufferTriple *>calg.set_iter_next(&ti)
 
 
-    cdef set _data_as_set(self):
+    cdef set _to_pyset(self):
         """
         Convert triple data to a Python set.
 
@@ -381,35 +512,48 @@ cdef class SimpleGraph:
 
     # Basic set operations.
 
-    def add(self, triple):
-        """ Add one triple to the graph. """
-        ss = <Buffer *>self._pool.alloc(1, sizeof(Buffer))
-        sp = <Buffer *>self._pool.alloc(1, sizeof(Buffer))
-        so = <Buffer *>self._pool.alloc(1, sizeof(Buffer))
+    def add(self, trp):
+        """
+        Add triples to the graph.
+
+        :param iterable triples: Set, list or tuple of 3-tuple triples.
+        """
+        cdef size_t cur = 0
 
-        s, p, o = triple
+        trp_ct = len(trp)
+        trp_buf = <SPOBuffer>self._pool.alloc(3 * trp_ct, sizeof(Buffer))
 
-        term.serialize_from_rdflib(s, ss, self._pool)
-        term.serialize_from_rdflib(p, sp, self._pool)
-        term.serialize_from_rdflib(o, so, self._pool)
+        for s, p, o in trp:
+            term.serialize_from_rdflib(s, trp_buf + cur, self._pool)
+            term.serialize_from_rdflib(p, trp_buf + cur + 1, self._pool)
+            term.serialize_from_rdflib(o, trp_buf + cur + 2, self._pool)
 
-        self._add_triple(ss, sp, so)
+            self._add_triple(
+                trp_buf + cur,
+                trp_buf + cur + 1,
+                trp_buf + cur + 2
+            )
+            cur += 3
+
+
+    def len_terms(self):
+        """ Number of triples in the graph. """
+        return cc.hashset_size(self._terms)
 
 
-    def remove(self, item):
+    def remove(self, trp):
         """
         Remove one item from the graph.
 
         :param tuple item: A 3-tuple of RDFlib terms. Only exact terms, i.e.
             wildcards are not accepted.
         """
-        self.data.remove(item)
+        self.data.remove(trp)
 
 
     def __len__(self):
         """ Number of triples in the graph. """
-        #return calg.set_num_entries(self._triples)
-        return len(self.data)
+        return cc.hashset_size(self._triples)
 
 
     @use_data
@@ -522,7 +666,7 @@ cdef class SimpleGraph:
         if None in trp:
             raise ValueError(f'Invalid triple: {trp}')
         self.remove_triples((trp[0], trp[1], None))
-        self.add(trp)
+        self.add((trp,))
 
 
     cpdef void remove_triples(self, pattern) except *:
@@ -601,9 +745,9 @@ cdef class SimpleGraph:
         # Decide comparison logic outside the loop.
         if s is not None and p is not None and o is not None:
             # Return immediately if 3-term match is requested.
-            term.serialize_from_rdflib(s, trp.s)
-            term.serialize_from_rdflib(p, trp.p)
-            term.serialize_from_rdflib(o, trp.o)
+            term.serialize_from_rdflib(s, trp.s, self._pool)
+            term.serialize_from_rdflib(p, trp.p, self._pool)
+            term.serialize_from_rdflib(o, trp.o, self._pool)
 
             if cc.hashset_contains(self._triples, &trp):
                 res.add((s, p, o))
@@ -687,6 +831,7 @@ cdef class Imr(SimpleGraph):
             Any and all elements may be ``None``.
         :param lmdbStore store: the store to look data up.
         """
+        print(len(self))
         self.uri = str(uri)
 
 

+ 3 - 6
lakesuperior/model/graph/term.pyx

@@ -22,7 +22,7 @@ cdef int serialize(const Term *term, Buffer *sterm, Pool pool=None) except -1:
     """
     Serialize a Term into a binary buffer.
 
-    The returned result is dynamically allocated and must be manually freed.
+    The returned result is dynamically allocated in the provided memory pool.
     """
     cdef:
         unsigned char *addr
@@ -57,7 +57,7 @@ cdef int deserialize(const Buffer *data, Term *term) except -1:
 
 cdef int from_rdflib(term_obj, Term *term) except -1:
     """
-    Return a Term struct obtained from a Python/RDFLiib term.
+    Return a Term struct obtained from a Python/RDFLib term.
     """
     _data = str(term_obj).encode()
     term[0].data = _data
@@ -111,10 +111,7 @@ cdef int serialize_from_rdflib(
         else:
             raise ValueError(f'Unsupported term type: {type(term_obj)}')
 
-    tpl.tpl_jot(tpl.TPL_MEM, &addr, &sz, LSUP_TERM_STRUCT_PK_FMT, &_term)
-
-    data[0].addr = addr
-    data[0].sz = sz
+    serialize(&_term, data, pool)
 
 
 cdef object to_rdflib(const Term *term):

+ 3 - 3
lakesuperior/store/ldp_rs/rsrc_centric_layout.py

@@ -381,7 +381,7 @@ class RsrcCentricLayout:
         #Get version graphs proper.
         for vtrp in imr:
             # Add the hasVersion triple to the result graph.
-            vmeta.add(vtrp)
+            vmeta.add((vtrp,))
             vmeta_gr = SimpleGraph(
                 lookup=((
                     None, nsc['foaf'].primaryTopic, vtrp[2]), HIST_GR_URI),
@@ -394,7 +394,7 @@ class RsrcCentricLayout:
                             (trp[1] != nsc['rdf'].type
                             or trp[2] not in self.ignore_vmeta_types)
                             and (trp[1] not in self.ignore_vmeta_preds)):
-                        vmeta.add((vtrp[2], trp[1], trp[2]))
+                        vmeta.add(((vtrp[2], trp[1], trp[2]),))
 
         return vmeta
 
@@ -443,7 +443,7 @@ class RsrcCentricLayout:
                     lookup=((s, cont_p, None), c), store=self.store)[s : cont_p]
             #new_dset = set(ds.graph(c)[s : cont_p])
             for ss in new_dset:
-                dset.add(ss)
+                dset.add((ss,))
                 cc = URIRef(ss.replace(nsc['fcres'], nsc['fcstruct']))
                 sub_dset = self.store.triples((ss, cont_p, None), cc)
                 #if set(ds.graph(cc)[ss : cont_p]):