Переглянути джерело

WIP Add multiple triples in one go.
[ci skip]

Stefano Cossu 5 роки тому
батько
коміт
3fcfe11261

+ 99 - 0
docs/structures.rst

@@ -0,0 +1,99 @@
+Data Structure Internals
+========================
+
+**(Draft)**
+
+Lakesuperior has its own methods for handling in-memory graphs. These methods
+rely on C data structures and are therefore much faster than Python/RDFLib
+objects.
+
+The graph data model modules are in :py:module:`lakesuperior.model.graph`.
+
+The Graph Data Model
+--------------------
+
+Triples are stored in a C hash set. Each triple is represented by a pointer to
+a ``BufferTriple`` structure stored in a temporary memory pool. This pool is
+tied to the life cycle of the ``SimpleGraph`` object it belongs to.
+
+A triple structure contains three pointers to ``Buffer`` structures, which
+contain a serialized version of a RDF term. These structures are stored in the
+``SimpleGraph`` memory pool as well.
+
+Each ``SimpleGraph`` object has a ``_terms`` property and a ``_triples``
+property. These are C hash sets holding addresses of unique terms and
+triples inserted in the graph. If the same term is entered more than once,
+in any position in any triple, the first one entered is used and is pointed to
+by the triple. This makes the graph data structure very compact.
+
+In summary, the pointers can be represented this way::
+
+   <serialized term data in mem pool (x3)>
+         ^      ^      ^
+         |      |      |
+   <Term structures in mem pool (x3)>
+         ^      ^      ^
+         |      |      |
+   <Term struct addresses in _terms set (x3)>
+         ^      ^      ^
+         |      |      |
+   <Triple structure in mem pool>
+         ^
+         |
+   <address of triple in _triples set>
+
+Let's say we insert the following triples in a ``SimpleGraph``::
+
+   <urn:s:0> <urn:p:0> <urn:o:0>
+   <urn:s:0> <urn:p:1> <urn:o:1>
+   <urn:s:0> <urn:p:1> <urn:o:2>
+   <urn:s:0> <urn:p:0> <urn:o:0>
+
+The memory pool contains the following byte arrays  of raw data, displayed in
+the following list with their relative addresses (simplified to 8-bit
+addresses and fixed-length byte strings for readability)::
+
+   0x00     <urn:s:0>
+   0x09     <urn:p:0>
+   0x12     <urn:o:0>
+
+   0x1b     <urn:s:0>
+   0x24     <urn:p:1>
+   0x2d     <urn:o:1>
+
+   0x36     <urn:s:0>
+   0x3f     <urn:p:1>
+   0x48     <urn:o:2>
+
+   0x51     <urn:s:0>
+   0x5a     <urn:p:0>
+   0x63     <urn:o:0>
+
+However, the ``_terms`` set contains only ``Buffer`` structures pointing to
+unique addresses::
+
+   0x00
+   0x09
+   0x12
+   0x24
+   0x2d
+   0x48
+
+The other terms are just unutilized. They will be deallocated en masse when
+the ``SimpleGraph`` object is garbage collected.
+
+The ``_triples`` set would then contain 3 unique entries pointing to the unique
+term addresses::
+
+   0x00  0x09  0x12
+   0x00  0x24  0x2d
+   0x00  0x24  0x48
+
+(the actual addresses would actually belong to the structures pointing to the
+raw data, but this is just an illustrative example).
+
+The advantage of this approach is that the memory pool is contiguous and
+append-only (until it gets purged), so it's cheap to just add to it, while the
+sets that must maintain uniqueness and are the ones that most operations
+(lookup, adding, removing, slicing, copying, etc.) are done on, contain much
+less data and are therefore faster.

+ 11 - 12
lakesuperior/cy_include/collections.pxd

@@ -155,11 +155,11 @@ cdef extern from "common.h":
 
 
 cdef extern from "hashtable.h":
 cdef extern from "hashtable.h":
 
 
-#    ctypedef struct TableEntry:
-#        void*       key
-#        void*       value
-#        size_t      hash
-#        TableEntry* next
+    ctypedef struct TableEntry:
+        void*       key
+        void*       value
+        size_t      hash
+        TableEntry* next
 
 
     ctypedef struct HashTable:
     ctypedef struct HashTable:
         pass
         pass
@@ -177,12 +177,11 @@ cdef extern from "hashtable.h":
         mem_free_ft   mem_free
         mem_free_ft   mem_free
 
 
     ctypedef struct HashTableIter:
     ctypedef struct HashTableIter:
-        pass
-#        HashTable* table
-#        size_t bucket_index
-#        TableEntry* prev_entry
-#        TableEntry* next_entry
-#
+        HashTable* table
+        size_t bucket_index
+        TableEntry* prev_entry
+        TableEntry* next_entry
+
 #    size_t get_table_index(HashTable *table, void *key)
 #    size_t get_table_index(HashTable *table, void *key)
 #
 #
 #    void hashtable_conf_init(HashTableConf* conf)
 #    void hashtable_conf_init(HashTableConf* conf)
@@ -215,7 +214,7 @@ cdef extern from "hashtable.h":
 #
 #
 #    size_t hashtable_hash(void* key, int len, uint32_t seed)
 #    size_t hashtable_hash(void* key, int len, uint32_t seed)
 #
 #
-#    size_t hashtable_hash_ptr(void* key, int len, uint32_t seed)
+    size_t hashtable_hash_ptr(void* key, int len, uint32_t seed)
 #
 #
 #    ctypedef void (*_hashtable_foreach_key_op_ft)(void*)
 #    ctypedef void (*_hashtable_foreach_key_op_ft)(void*)
 #
 #

+ 5 - 2
lakesuperior/model/graph/graph.pxd

@@ -21,7 +21,7 @@ ctypedef Buffer *BufferPtr
 
 
 cdef:
 cdef:
     int term_cmp_fn(const void* key1, const void* key2)
     int term_cmp_fn(const void* key1, const void* key2)
-    int triple_cmp_fn(const void* key1, const void* key2)
+    int trp_cmp_fn(const void* key1, const void* key2)
     size_t trp_hash_fn(const void* key, int l, uint32_t seed)
     size_t trp_hash_fn(const void* key, int l, uint32_t seed)
     size_t hash_ptr_passthrough(const void* key, int l, uint32_t seed)
     size_t hash_ptr_passthrough(const void* key, int l, uint32_t seed)
 
 
@@ -39,13 +39,16 @@ cdef class SimpleGraph:
         inline void _add_triple(
         inline void _add_triple(
             self, Buffer *ss, Buffer *sp, Buffer *so
             self, Buffer *ss, Buffer *sp, Buffer *so
         ) except *
         ) except *
-        set _data_as_set(self)
+        set _to_pyset(self)
 
 
     cpdef void set(self, tuple trp) except *
     cpdef void set(self, tuple trp) except *
     cpdef void remove_triples(self, pattern) except *
     cpdef void remove_triples(self, pattern) except *
     cpdef object as_rdflib(self)
     cpdef object as_rdflib(self)
     cpdef set terms(self, str type)
     cpdef set terms(self, str type)
 
 
+    cpdef SimpleGraph union(self, SimpleGraph other)
+    cpdef void ip_union(self, SimpleGraph other)
+
 cdef class Imr(SimpleGraph):
 cdef class Imr(SimpleGraph):
     cdef:
     cdef:
         readonly str uri
         readonly str uri

+ 190 - 45
lakesuperior/model/graph/graph.pyx

@@ -54,20 +54,17 @@ cdef int term_cmp_fn(const void* key1, const void* key2):
     b2 = <Buffer *>key2
     b2 = <Buffer *>key2
 
 
     if b1.sz != b2.sz:
     if b1.sz != b2.sz:
+        logger.info(f'Sizes differ: {b1.sz} != {b2.sz}. Return 1.')
         return 1
         return 1
 
 
-    #print('Term A:')
-    #print((<unsigned char *>b1.addr)[:b1.sz])
-    #print('Term b:')
-    #print((<unsigned char *>b2.addr)[:b2.sz])
     cdef int cmp = memcmp(b1.addr, b2.addr, b1.sz)
     cdef int cmp = memcmp(b1.addr, b2.addr, b1.sz)
     logger.info(f'term memcmp: {cmp}')
     logger.info(f'term memcmp: {cmp}')
     return cmp
     return cmp
 
 
 
 
-cdef int triple_cmp_fn(const void* key1, const void* key2):
+cdef int trp_cmp_fn(const void* key1, const void* key2):
     """
     """
-    Compare function for two triples in a CAlg set.
+    Compare function for two triples in a set.
 
 
     Here, pointers to terms are compared for s, p, o. The pointers should be
     Here, pointers to terms are compared for s, p, o. The pointers should be
     guaranteed to point to unique values (i.e. no two pointers have the same
     guaranteed to point to unique values (i.e. no two pointers have the same
@@ -78,13 +75,27 @@ cdef int triple_cmp_fn(const void* key1, const void* key2):
     """
     """
     t1 = <BufferTriple *>key1
     t1 = <BufferTriple *>key1
     t2 = <BufferTriple *>key2
     t2 = <BufferTriple *>key2
+    print('Comparing: <0x{:02x}> <0x{:02x}> <0x{:02x}>'.format(
+        <unsigned long>t1.s, <unsigned long>t1.p, <unsigned long>t1.o))
+    print('With:      <0x{:02x}> <0x{:02x}> <0x{:02x}>'.format(
+        <unsigned long>t2.s, <unsigned long>t2.p, <unsigned long>t2.o))
 
 
-    return (
+    cdef int is_not_equal = (
         t1.s.addr != t2.s.addr or
         t1.s.addr != t2.s.addr or
         t1.p.addr != t2.p.addr or
         t1.p.addr != t2.p.addr or
         t1.o.addr != t2.o.addr
         t1.o.addr != t2.o.addr
     )
     )
 
 
+    logger.info(f'Triples are NOT equal and will be added: {is_not_equal}')
+    return is_not_equal
+
+
+cdef size_t term_hash_fn(const void* key, int l, uint32_t seed):
+    """
+    Hash function for serialized terms (:py:class:`Buffer` objects)
+    """
+    return <size_t>spookyhash_64((<Buffer*>key).addr, (<Buffer*>key).sz, seed)
+
 
 
 cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
 cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
     """
     """
@@ -109,7 +120,8 @@ cdef size_t hash_ptr_passthrough(const void* key, int l, uint32_t seed):
 
 
 
 
 cdef inline bint lookup_none_cmp_fn(
 cdef inline bint lookup_none_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
     """
     """
     Dummy callback for queries with all parameters unbound.
     Dummy callback for queries with all parameters unbound.
 
 
@@ -118,7 +130,9 @@ cdef inline bint lookup_none_cmp_fn(
     return True
     return True
 
 
 
 
-cdef inline bint lookup_s_cmp_fn(const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+cdef inline bint lookup_s_cmp_fn(
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
     """
     """
     Lookup callback compare function for a given s in a triple.
     Lookup callback compare function for a given s in a triple.
 
 
@@ -130,27 +144,37 @@ cdef inline bint lookup_s_cmp_fn(const BufferTriple *trp, const Buffer *t1, cons
     return term_cmp_fn(t1, trp[0].s)
     return term_cmp_fn(t1, trp[0].s)
 
 
 
 
-cdef inline bint lookup_p_cmp_fn(const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+cdef inline bint lookup_p_cmp_fn(
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
     return term_cmp_fn(t1, trp[0].p)
     return term_cmp_fn(t1, trp[0].p)
 
 
 
 
-cdef inline bint lookup_o_cmp_fn(const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+cdef inline bint lookup_o_cmp_fn(
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
     return term_cmp_fn(t1, trp[0].o)
     return term_cmp_fn(t1, trp[0].o)
 
 
 
 
-cdef inline bint lookup_sp_cmp_fn(const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+cdef inline bint lookup_sp_cmp_fn(
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
     return (
     return (
             term_cmp_fn(t1, trp[0].s)
             term_cmp_fn(t1, trp[0].s)
             and term_cmp_fn(t2, trp[0].p))
             and term_cmp_fn(t2, trp[0].p))
 
 
 
 
-cdef inline bint lookup_so_cmp_fn(const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+cdef inline bint lookup_so_cmp_fn(
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
     return (
     return (
             term_cmp_fn(t1, trp[0].s)
             term_cmp_fn(t1, trp[0].s)
             and term_cmp_fn(t2, trp[0].o))
             and term_cmp_fn(t2, trp[0].o))
 
 
 
 
-cdef inline bint lookup_po_cmp_fn(const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+cdef inline bint lookup_po_cmp_fn(
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
     return (
     return (
             term_cmp_fn(t1, trp[0].p)
             term_cmp_fn(t1, trp[0].p)
             and term_cmp_fn(t2, trp[0].o))
             and term_cmp_fn(t2, trp[0].o))
@@ -186,7 +210,7 @@ cdef class SimpleGraph:
     """
     """
 
 
     def __cinit__(
     def __cinit__(
-            self, Keyset keyset=None, store=None, set data=set(), **kwargs):
+            self, Keyset keyset=None, store=None, set data=set(), *args, **kwargs):
         """
         """
         Initialize the graph with pre-existing data or by looking up a store.
         Initialize the graph with pre-existing data or by looking up a store.
 
 
@@ -208,22 +232,21 @@ cdef class SimpleGraph:
         :param lmdbStore store: the store to look data up.
         :param lmdbStore store: the store to look data up.
         """
         """
         cdef:
         cdef:
-            cc.HashSetConf terms_conf
-            cc.HashSetConf trp_conf
+            cc.HashSetConf terms_conf, trp_conf
 
 
         cc.hashset_conf_init(&terms_conf)
         cc.hashset_conf_init(&terms_conf)
         terms_conf.load_factor = 0.85
         terms_conf.load_factor = 0.85
-        terms_conf.hash = &hash_ptr_passthrough # spookyhash_64?
+        terms_conf.hash = &term_hash_fn
         terms_conf.hash_seed = term_hash_seed32
         terms_conf.hash_seed = term_hash_seed32
         terms_conf.key_compare = &term_cmp_fn
         terms_conf.key_compare = &term_cmp_fn
-        terms_conf.key_length = sizeof(void*)
+        terms_conf.key_length = sizeof(Buffer*)
 
 
         cc.hashset_conf_init(&trp_conf)
         cc.hashset_conf_init(&trp_conf)
         trp_conf.load_factor = 0.75
         trp_conf.load_factor = 0.75
-        trp_conf.hash = &hash_ptr_passthrough # spookyhash_64?
+        trp_conf.hash = &trp_hash_fn
         trp_conf.hash_seed = term_hash_seed32
         trp_conf.hash_seed = term_hash_seed32
-        trp_conf.key_compare = &triple_cmp_fn
-        trp_conf.key_length = sizeof(void*)
+        trp_conf.key_compare = &trp_cmp_fn
+        trp_conf.key_length = sizeof(BufferTriple)
 
 
         cc.hashset_new_conf(&terms_conf, &self._terms)
         cc.hashset_new_conf(&terms_conf, &self._terms)
         cc.hashset_new_conf(&trp_conf, &self._triples)
         cc.hashset_new_conf(&trp_conf, &self._triples)
@@ -240,6 +263,9 @@ cdef class SimpleGraph:
             for s, p, o in data:
             for s, p, o in data:
                 self._add_from_rdflib(s, p, o)
                 self._add_from_rdflib(s, p, o)
 
 
+        print(len(self))
+        print('SimpleGraph cinit complete.')
+
 
 
     def __dealloc__(self):
     def __dealloc__(self):
         """
         """
@@ -256,7 +282,58 @@ cdef class SimpleGraph:
 
 
         :rtype: set
         :rtype: set
         """
         """
-        return self._data_as_set()
+        return self._to_pyset()
+
+
+    # # # BASIC SET OPERATIONS # # #
+
+    cpdef SimpleGraph union(self, SimpleGraph other):
+        """
+        Perform set union resulting in a new SimpleGraph instance.
+
+        TODO Allow union of multiple graphs at a time.
+
+        :param SimpleGraph other: The other graph to merge.
+
+        :rtype: SimpleGraph
+        :return: A new SimpleGraph instance.
+        """
+        cdef:
+            void *cur
+            cc.HashSetIter it
+            SimpleGraph new_gr = SimpleGraph()
+            BufferTriple *trp
+
+        new_gr.store = self.store
+
+        for gr in (self, other):
+            cc.hashset_iter_init(&it, gr._triples)
+            while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
+                bt = <BufferTriple*>cur
+                new_gr._add_triple(bt.s, bt.p, bt.o)
+
+        return new_gr
+
+
+    cpdef void ip_union(self, SimpleGraph other):
+        """
+        Perform an in-place set union that adds triples to this instance
+
+        TODO Allow union of multiple graphs at a time.
+
+        :param SimpleGraph other: The other graph to merge.
+
+        :rtype: void
+        """
+        cdef:
+            void *cur
+            cc.HashSetIter it
+            BufferTriple *trp
+
+        cc.hashset_iter_init(&it, other._triples)
+        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
+            bt = <BufferTriple*>cur
+            self._add_triple(bt.s, bt.p, bt.o)
 
 
 
 
     cdef void _data_from_lookup(self, tuple trp_ptn, ctx=None) except *:
     cdef void _data_from_lookup(self, tuple trp_ptn, ctx=None) except *:
@@ -313,6 +390,10 @@ cdef class SimpleGraph:
         """
         """
         trp = <BufferTriple *>self._pool.alloc(1, sizeof(BufferTriple))
         trp = <BufferTriple *>self._pool.alloc(1, sizeof(BufferTriple))
 
 
+        logger.info('ss: {}'.format((<unsigned char *>ss.addr)[:ss.sz]))
+        logger.info('sp: {}'.format((<unsigned char *>sp.addr)[:sp.sz]))
+        logger.info('so: {}'.format((<unsigned char *>so.addr)[:so.sz]))
+
         logger.info('Inserting terms.')
         logger.info('Inserting terms.')
         logger.info(f'ss addr: {<unsigned long>ss.addr}')
         logger.info(f'ss addr: {<unsigned long>ss.addr}')
         logger.info(f'ss sz: {ss.sz}')
         logger.info(f'ss sz: {ss.sz}')
@@ -321,13 +402,17 @@ cdef class SimpleGraph:
         print('Insert ss: @0x{:02x}'.format(<unsigned long>ss))
         print('Insert ss: @0x{:02x}'.format(<unsigned long>ss))
         cc.hashset_add_or_get(self._terms, <void **>&ss)
         cc.hashset_add_or_get(self._terms, <void **>&ss)
         print('Now ss is: @0x{:02x}'.format(<unsigned long>ss))
         print('Now ss is: @0x{:02x}'.format(<unsigned long>ss))
-        logger.info('Insert sp')
+
+        print('Insert sp: @0x{:02x}'.format(<unsigned long>sp))
         cc.hashset_add_or_get(self._terms, <void **>&sp)
         cc.hashset_add_or_get(self._terms, <void **>&sp)
-        logger.info('Insert so')
+        print('Now sp is: @0x{:02x}'.format(<unsigned long>sp))
+
+        print('Insert so: @0x{:02x}'.format(<unsigned long>so))
         cc.hashset_add_or_get(self._terms, <void **>&so)
         cc.hashset_add_or_get(self._terms, <void **>&so)
+        print('Now so is: @0x{:02x}'.format(<unsigned long>so))
         logger.info('inserted terms.')
         logger.info('inserted terms.')
         cdef size_t terms_sz = cc.hashset_size(self._terms)
         cdef size_t terms_sz = cc.hashset_size(self._terms)
-        logger.info('Terms set size: {terms_sz}')
+        logger.info(f'Terms set size: {terms_sz}')
 
 
         #cdef cc.HashSetIter ti
         #cdef cc.HashSetIter ti
         #cdef Buffer *t
         #cdef Buffer *t
@@ -335,21 +420,67 @@ cdef class SimpleGraph:
         #while calg.set_iter_has_more(&ti):
         #while calg.set_iter_has_more(&ti):
         #    t = <Buffer *>calg.set_iter_next(&ti)
         #    t = <Buffer *>calg.set_iter_next(&ti)
 
 
+        # # # Test area
+        #cdef:
+        #    cc.HashSet* testset
+        #    cc.HashSetConf testconf
+        #    int i = 24
+        #    size_t sz
+
+        #cc.hashset_conf_init(&testconf)
+        #testconf.hash = &hash_ptr_passthrough # spookyhash_64?
+        #testconf.hash_seed = term_hash_seed32
+        #testconf.key_length = sizeof(int*)
+        #testconf.key_compare = &trp_cmp_fn
+        #testconf.key_length = sizeof(BufferTriple*)
+
+        #cc.hashset_new_conf(&testconf, &testset)
+
+        #sz = cc.hashset_size(testset)
+        #print(f'Test set size (start): {sz}')
+
+        #cc.hashset_add(testset, &i)
+        #sz = cc.hashset_size(testset)
+        #print(f'Test set size (1st insert): {sz}')
+
+        #cc.hashset_add(testset, &i)
+        #sz = cc.hashset_size(testset)
+        #print(f'Test set size (2nd insert): {sz}')
+        # # # END test area
+
         trp.s = ss
         trp.s = ss
         trp.p = sp
         trp.p = sp
         trp.o = so
         trp.o = so
+        cdef size_t trp_sz = cc.hashset_size(self._triples)
+        logger.info(f'Triples set size before adding: {trp_sz}')
 
 
         r = cc.hashset_add(self._triples, trp)
         r = cc.hashset_add(self._triples, trp)
         print('Insert triple result:')
         print('Insert triple result:')
         print(r)
         print(r)
 
 
+        trp_sz = cc.hashset_size(self._triples)
+        logger.info(f'Triples set size after adding: {trp_sz}')
+
+        cdef:
+            cc.HashSetIter ti
+            BufferTriple *test_trp
+            void *cur
+
+        cc.hashset_iter_init(&ti, self._triples)
+        while cc.hashset_iter_next(&ti, &cur) != cc.CC_ITER_END:
+            test_trp = <BufferTriple *>cur
+            print('Triple in set: 0x{:02x} 0x{:02x} 0x{:02x}'.format(
+                    <size_t>test_trp.s, <size_t>test_trp.p, <size_t>test_trp.o))
+
+
+
         #cdef BufferTriple *tt
         #cdef BufferTriple *tt
         #calg.set_iterate(self._triples, &ti)
         #calg.set_iterate(self._triples, &ti)
         #while calg.set_iter_has_more(&ti):
         #while calg.set_iter_has_more(&ti):
         #    tt = <BufferTriple *>calg.set_iter_next(&ti)
         #    tt = <BufferTriple *>calg.set_iter_next(&ti)
 
 
 
 
-    cdef set _data_as_set(self):
+    cdef set _to_pyset(self):
         """
         """
         Convert triple data to a Python set.
         Convert triple data to a Python set.
 
 
@@ -381,35 +512,48 @@ cdef class SimpleGraph:
 
 
     # Basic set operations.
     # Basic set operations.
 
 
-    def add(self, triple):
-        """ Add one triple to the graph. """
-        ss = <Buffer *>self._pool.alloc(1, sizeof(Buffer))
-        sp = <Buffer *>self._pool.alloc(1, sizeof(Buffer))
-        so = <Buffer *>self._pool.alloc(1, sizeof(Buffer))
+    def add(self, trp):
+        """
+        Add triples to the graph.
+
+        :param iterable triples: Set, list or tuple of 3-tuple triples.
+        """
+        cdef size_t cur = 0
 
 
-        s, p, o = triple
+        trp_ct = len(trp)
+        trp_buf = <SPOBuffer>self._pool.alloc(3 * trp_ct, sizeof(Buffer))
 
 
-        term.serialize_from_rdflib(s, ss, self._pool)
-        term.serialize_from_rdflib(p, sp, self._pool)
-        term.serialize_from_rdflib(o, so, self._pool)
+        for s, p, o in trp:
+            term.serialize_from_rdflib(s, trp_buf + cur, self._pool)
+            term.serialize_from_rdflib(p, trp_buf + cur + 1, self._pool)
+            term.serialize_from_rdflib(o, trp_buf + cur + 2, self._pool)
 
 
-        self._add_triple(ss, sp, so)
+            self._add_triple(
+                trp_buf + cur,
+                trp_buf + cur + 1,
+                trp_buf + cur + 2
+            )
+            cur += 3
+
+
+    def len_terms(self):
+        """ Number of triples in the graph. """
+        return cc.hashset_size(self._terms)
 
 
 
 
-    def remove(self, item):
+    def remove(self, trp):
         """
         """
         Remove one item from the graph.
         Remove one item from the graph.
 
 
         :param tuple item: A 3-tuple of RDFlib terms. Only exact terms, i.e.
         :param tuple item: A 3-tuple of RDFlib terms. Only exact terms, i.e.
             wildcards are not accepted.
             wildcards are not accepted.
         """
         """
-        self.data.remove(item)
+        self.data.remove(trp)
 
 
 
 
     def __len__(self):
     def __len__(self):
         """ Number of triples in the graph. """
         """ Number of triples in the graph. """
-        #return calg.set_num_entries(self._triples)
-        return len(self.data)
+        return cc.hashset_size(self._triples)
 
 
 
 
     @use_data
     @use_data
@@ -522,7 +666,7 @@ cdef class SimpleGraph:
         if None in trp:
         if None in trp:
             raise ValueError(f'Invalid triple: {trp}')
             raise ValueError(f'Invalid triple: {trp}')
         self.remove_triples((trp[0], trp[1], None))
         self.remove_triples((trp[0], trp[1], None))
-        self.add(trp)
+        self.add((trp,))
 
 
 
 
     cpdef void remove_triples(self, pattern) except *:
     cpdef void remove_triples(self, pattern) except *:
@@ -601,9 +745,9 @@ cdef class SimpleGraph:
         # Decide comparison logic outside the loop.
         # Decide comparison logic outside the loop.
         if s is not None and p is not None and o is not None:
         if s is not None and p is not None and o is not None:
             # Return immediately if 3-term match is requested.
             # Return immediately if 3-term match is requested.
-            term.serialize_from_rdflib(s, trp.s)
-            term.serialize_from_rdflib(p, trp.p)
-            term.serialize_from_rdflib(o, trp.o)
+            term.serialize_from_rdflib(s, trp.s, self._pool)
+            term.serialize_from_rdflib(p, trp.p, self._pool)
+            term.serialize_from_rdflib(o, trp.o, self._pool)
 
 
             if cc.hashset_contains(self._triples, &trp):
             if cc.hashset_contains(self._triples, &trp):
                 res.add((s, p, o))
                 res.add((s, p, o))
@@ -687,6 +831,7 @@ cdef class Imr(SimpleGraph):
             Any and all elements may be ``None``.
             Any and all elements may be ``None``.
         :param lmdbStore store: the store to look data up.
         :param lmdbStore store: the store to look data up.
         """
         """
+        print(len(self))
         self.uri = str(uri)
         self.uri = str(uri)
 
 
 
 

+ 3 - 6
lakesuperior/model/graph/term.pyx

@@ -22,7 +22,7 @@ cdef int serialize(const Term *term, Buffer *sterm, Pool pool=None) except -1:
     """
     """
     Serialize a Term into a binary buffer.
     Serialize a Term into a binary buffer.
 
 
-    The returned result is dynamically allocated and must be manually freed.
+    The returned result is dynamically allocated in the provided memory pool.
     """
     """
     cdef:
     cdef:
         unsigned char *addr
         unsigned char *addr
@@ -57,7 +57,7 @@ cdef int deserialize(const Buffer *data, Term *term) except -1:
 
 
 cdef int from_rdflib(term_obj, Term *term) except -1:
 cdef int from_rdflib(term_obj, Term *term) except -1:
     """
     """
-    Return a Term struct obtained from a Python/RDFLiib term.
+    Return a Term struct obtained from a Python/RDFLib term.
     """
     """
     _data = str(term_obj).encode()
     _data = str(term_obj).encode()
     term[0].data = _data
     term[0].data = _data
@@ -111,10 +111,7 @@ cdef int serialize_from_rdflib(
         else:
         else:
             raise ValueError(f'Unsupported term type: {type(term_obj)}')
             raise ValueError(f'Unsupported term type: {type(term_obj)}')
 
 
-    tpl.tpl_jot(tpl.TPL_MEM, &addr, &sz, LSUP_TERM_STRUCT_PK_FMT, &_term)
-
-    data[0].addr = addr
-    data[0].sz = sz
+    serialize(&_term, data, pool)
 
 
 
 
 cdef object to_rdflib(const Term *term):
 cdef object to_rdflib(const Term *term):

+ 3 - 3
lakesuperior/store/ldp_rs/rsrc_centric_layout.py

@@ -381,7 +381,7 @@ class RsrcCentricLayout:
         #Get version graphs proper.
         #Get version graphs proper.
         for vtrp in imr:
         for vtrp in imr:
             # Add the hasVersion triple to the result graph.
             # Add the hasVersion triple to the result graph.
-            vmeta.add(vtrp)
+            vmeta.add((vtrp,))
             vmeta_gr = SimpleGraph(
             vmeta_gr = SimpleGraph(
                 lookup=((
                 lookup=((
                     None, nsc['foaf'].primaryTopic, vtrp[2]), HIST_GR_URI),
                     None, nsc['foaf'].primaryTopic, vtrp[2]), HIST_GR_URI),
@@ -394,7 +394,7 @@ class RsrcCentricLayout:
                             (trp[1] != nsc['rdf'].type
                             (trp[1] != nsc['rdf'].type
                             or trp[2] not in self.ignore_vmeta_types)
                             or trp[2] not in self.ignore_vmeta_types)
                             and (trp[1] not in self.ignore_vmeta_preds)):
                             and (trp[1] not in self.ignore_vmeta_preds)):
-                        vmeta.add((vtrp[2], trp[1], trp[2]))
+                        vmeta.add(((vtrp[2], trp[1], trp[2]),))
 
 
         return vmeta
         return vmeta
 
 
@@ -443,7 +443,7 @@ class RsrcCentricLayout:
                     lookup=((s, cont_p, None), c), store=self.store)[s : cont_p]
                     lookup=((s, cont_p, None), c), store=self.store)[s : cont_p]
             #new_dset = set(ds.graph(c)[s : cont_p])
             #new_dset = set(ds.graph(c)[s : cont_p])
             for ss in new_dset:
             for ss in new_dset:
-                dset.add(ss)
+                dset.add((ss,))
                 cc = URIRef(ss.replace(nsc['fcres'], nsc['fcstruct']))
                 cc = URIRef(ss.replace(nsc['fcres'], nsc['fcstruct']))
                 sub_dset = self.store.triples((ss, cont_p, None), cc)
                 sub_dset = self.store.triples((ss, cont_p, None), cc)
                 #if set(ds.graph(cc)[ss : cont_p]):
                 #if set(ds.graph(cc)[ss : cont_p]):