浏览代码

Modify upstream collections to allow add or get.

Stefano Cossu 5 年之前
父节点
当前提交
dc387ed4db
共有 4 个文件被更改,包括 44 次插入79 次删除
  1. 1 1
      ext/collections-c
  2. 40 39
      lakesuperior/cy_include/collections.pxd
  3. 0 1
      lakesuperior/model/graph/graph.pxd
  4. 3 38
      lakesuperior/model/graph/graph.pyx

+ 1 - 1
ext/collections-c

@@ -1 +1 @@
-Subproject commit a188188f61b5ab3d5614446de3a3674a620e73b9
+Subproject commit be792220b26ee81e6d18695e1d22b5e6b4daf3c0

+ 40 - 39
lakesuperior/cy_include/collections.pxd

@@ -175,32 +175,30 @@ cdef extern from "hashtable.h":
 
     ctypedef table_entry_s TableEntry
 
-cdef extern from "hashtable.c":
-
-    struct hashtable_s:
-        size_t          capacity
-        size_t          size
-        size_t          threshold
-        uint32_t        hash_seed
-        int             key_len
-        float           load_factor
-        TableEntry**      buckets
-
-        #size_t (*hash)        (const void *key, int l, uint32_t seed)
-        #bint    (*key_cmp) (const void *key1, const void *key2)
-        #void *(*mem_alloc)  (size_t size)
-        #void *(*mem_calloc) (size_t blocks, size_t size)
-        #void  (*mem_free)   (void *block)
-        hash_ft           hash
-        key_compare_ft    key_cmp
-        mem_alloc_ft      mem_alloc
-        mem_calloc_ft     mem_calloc
-        mem_free_ft       mem_free
-
-
-cdef extern from "hashtable.h":
-
-    ctypedef hashtable_s HashTable
+#cdef extern from "hashtable.c":
+#
+#    struct hashtable_s:
+#        size_t          capacity
+#        size_t          size
+#        size_t          threshold
+#        uint32_t        hash_seed
+#        int             key_len
+#        float           load_factor
+#        TableEntry**      buckets
+#
+#        #size_t (*hash)        (const void *key, int l, uint32_t seed)
+#        #bint    (*key_cmp) (const void *key1, const void *key2)
+#        #void *(*mem_alloc)  (size_t size)
+#        #void *(*mem_calloc) (size_t blocks, size_t size)
+#        #void  (*mem_free)   (void *block)
+#        hash_ft           hash
+#        key_compare_ft    key_cmp
+#        mem_alloc_ft      mem_alloc
+#        mem_calloc_ft     mem_calloc
+#        mem_free_ft       mem_free
+
+    ctypedef struct HashTable:
+        pass
 
     struct hashtable_conf_s:
         float               load_factor
@@ -278,23 +276,24 @@ cdef extern from "hashtable.h":
     cc_stat hashtable_iter_remove(HashTableIter* iter, void** out)
 
 
-cdef extern from "hashset.c":
-
-    struct hashset_s:
-        HashTable*      table
-        int*            dummy
-
-        mem_alloc_ft  mem_alloc
-        mem_calloc_ft mem_calloc
-        mem_free_ft   mem_free
-        #void *(*mem_alloc)  (size_t size)
-        #void *(*mem_calloc) (size_t blocks, size_t size)
-        #void  (*mem_free)   (void *block)
+#cdef extern from "hashset.c":
+#
+#    struct hashset_s:
+#        HashTable*      table
+#        int*            dummy
+#
+#        mem_alloc_ft  mem_alloc
+#        mem_calloc_ft mem_calloc
+#        mem_free_ft   mem_free
+#        #void *(*mem_alloc)  (size_t size)
+#        #void *(*mem_calloc) (size_t blocks, size_t size)
+#        #void  (*mem_free)   (void *block)
 
 
 cdef extern from "hashset.h":
 
-    ctypedef hashset_s HashSet
+    ctypedef struct HashSet:
+        pass
 
     ctypedef HashTableConf HashSetConf
 
@@ -311,6 +310,8 @@ cdef extern from "hashset.h":
 
     cc_stat hashset_add(HashSet* set, void* element)
 
+    cc_stat hashset_add_or_get(HashSet* set, void** element)
+
     cc_stat hashset_remove(HashSet* set, void* element, void** out)
 
     void hashset_remove_all(HashSet* set)

+ 0 - 1
lakesuperior/model/graph/graph.pxd

@@ -39,7 +39,6 @@ cdef class SimpleGraph:
         inline void _add_triple(
             self, Buffer *ss, Buffer *sp, Buffer *so
         ) except *
-        int _add_or_get_term(self, Buffer **data) except -1
         set _data_as_set(self)
 
     cpdef void set(self, tuple trp) except *

+ 3 - 38
lakesuperior/model/graph/graph.pyx

@@ -227,8 +227,6 @@ cdef class SimpleGraph:
 
         hashset_new_conf(&terms_conf, &self._terms)
         hashset_new_conf(&trp_conf, &self._triples)
-        print(f'Terms member: {self._terms.dummy[0]}')
-        print(f'Triples member: {self._triples.dummy[0]}')
 
         self.store = store or env.app_globals.rdf_store
         self._pool = Pool()
@@ -327,13 +325,13 @@ cdef class SimpleGraph:
         #logger.info((<unsigned char *>ss.addr)[:ss.sz])
         logger.info('Insert ss @:')
         print(<unsigned long>ss)
-        self._add_or_get_term(&ss)
+        cc.hashset_add_or_get(self._terms, <void **>&ss)
         logger.info('Now ss is @:')
         print(<unsigned long>ss)
         logger.info('Insert sp')
-        self._add_or_get_term(&sp)
+        cc.hashset_add_or_get(self._terms, <void **>&sp)
         logger.info('Insert so')
-        self._add_or_get_term(&so)
+        cc.hashset_add_or_get(self._terms, <void **>&so)
         logger.info('inserted terms.')
         cdef size_t terms_sz = hashset_size(self._terms)
         logger.info('Terms set size: {terms_sz}')
@@ -358,39 +356,6 @@ cdef class SimpleGraph:
         #    tt = <BufferTriple *>calg.set_iter_next(&ti)
 
 
-    cdef int _add_or_get_term(self, Buffer **data) except -1:
-        """
-        Insert a term in the terms set, or get one that already exists.
-
-        If the new term is inserted, its address is stored in the memory pool
-        and persists with the :py:class:`SimpleGraph` instance carrying it.
-        Otherwise, the overwritten term is garbage collected as soon as the
-        calling function exits.
-
-        The return value gives an indication of whether the term was added or
-        not.
-        """
-        cdef TableEntry *entry
-
-        table = self._terms.table
-
-        entry = table.buckets[get_table_index(table, data[0].addr)]
-
-        while entry:
-            if table.key_cmp(data[0].addr, entry.key) == 0:
-                # If the term is found, assign the address of entry.key
-                # to the data parameter.
-                data[0] = <Buffer *>entry.key
-                return 1
-            entry = entry.next
-
-        # If the term is not found, add it.
-        # TODO This is inefficient because it searches for the term again.
-        # TODO It would be best to break down the hashset_add function and
-        # TODO remove the check.
-        return hashset_add(self._terms, data[0])
-
-
     cdef set _data_as_set(self):
         """
         Convert triple data to a Python set.