Jelajahi Sumber

AAARRRRGGGHH....

Stefano Cossu 6 tahun lalu
induk
melakukan
2b40911d5b

+ 12 - 8
lakesuperior/cy_include/calg.pxd

@@ -3,32 +3,36 @@ cdef extern from 'set.h':
     #ctypedef _SetEntry SetEntry
     ctypedef void *SetValue
 
+    ctypedef unsigned int (*SetHashFunc)(SetValue value)
+    ctypedef bint (*SetEqualFunc)(SetValue value1, SetValue value2)
+    ctypedef void (*SetFreeFunc)(SetValue value)
+
     ctypedef struct SetEntry:
         SetValue data
         SetEntry *next
 
-    ctypedef struct Set:
+    ctypedef struct _Set:
         SetEntry **table
         unsigned int entries
         unsigned int table_size
         unsigned int prime_index
-        #SetHashFunc hash_func
-        #SetEqualFunc equal_func
-        #SetFreeFunc free_func
+        SetHashFunc hash_func
+        SetEqualFunc equal_func
+        SetFreeFunc free_func
+
+    ctypedef _Set Set
 
     ctypedef struct SetIterator:
         pass
 
-    ctypedef unsigned int (*SetHashFunc)(SetValue value)
-    ctypedef bint (*SetEqualFunc)(SetValue value1, SetValue value2)
-    ctypedef void (*SetFreeFunc)(SetValue value)
-
     Set *set_new(SetHashFunc hash_func, SetEqualFunc equal_func)
     void set_free(Set *set)
     # TODO This should return an int, ideally. See
     # https://github.com/fragglet/c-algorithms/issues/20
     bint set_insert(Set *set, SetValue data)
+    bint set_insert_or_assign(Set *set, SetValue *data)
     bint set_query(Set *set, SetValue data)
+    bint set_enlarge(Set *set)
     unsigned int set_num_entries(Set *set)
     SetValue *set_to_array(Set *set)
     Set *set_union(Set *set1, Set *set2)

+ 3 - 1
lakesuperior/model/graph/graph.pxd

@@ -12,6 +12,7 @@ ctypedef bint (*lookup_fn_t)(
         const BufferTriple *trp, const Buffer *t1, const Buffer *t2)
 
 ctypedef Buffer SPOBuffer[3]
+ctypedef Buffer *BufferPtr
 
 cdef:
     unsigned int term_hash_fn(const calg.SetValue data)
@@ -32,8 +33,9 @@ cdef class SimpleGraph:
         void _data_from_keyset(self, Keyset data) except *
         inline void _add_from_spok(self, const TripleKey spok) except *
         inline void _add_triple(
-            self, const Buffer *ss, const Buffer *sp, const Buffer *so
+            self, Buffer *ss, Buffer *sp, Buffer *so
         ) except *
+        int _add_or_get_term(self, Buffer **data) except -1
         set _data_as_set(self)
 
     cpdef void set(self, tuple trp) except *

+ 61 - 5
lakesuperior/model/graph/graph.pyx

@@ -171,6 +171,7 @@ cdef inline bint lookup_po_cmp_fn(
 
 
 
+
 cdef class SimpleGraph:
     """
     Fast and simple implementation of a graph.
@@ -302,7 +303,7 @@ cdef class SimpleGraph:
 
 
     cdef inline void _add_triple(
-        self, const Buffer *ss, const Buffer *sp, const Buffer *so
+        self, BufferPtr ss, BufferPtr sp, BufferPtr so
     ) except *:
         """
         Add a triple from 3 (TPL) serialized terms.
@@ -317,12 +318,15 @@ cdef class SimpleGraph:
         logger.info(f'ss sz: {ss.sz}')
         #logger.info('ss:')
         #logger.info((<unsigned char *>ss.addr)[:ss.sz])
-        logger.info('Insert ss')
-        calg.set_insert(self._terms, ss)
+        logger.info('Insert ss @:')
+        print(<unsigned long>ss)
+        self._add_or_get_term(&ss)
+        logger.info('Now ss is @:')
+        print(<unsigned long>ss)
         logger.info('Insert sp')
-        calg.set_insert(self._terms, sp)
+        self._add_or_get_term(&sp)
         logger.info('Insert so')
-        calg.set_insert(self._terms, so)
+        self._add_or_get_term(&so)
         logger.info('inserted terms.')
         cdef size_t terms_sz = calg.set_num_entries(self._terms)
         logger.info('Terms set size: {terms_sz}')
@@ -347,6 +351,58 @@ cdef class SimpleGraph:
             tt = <BufferTriple *>calg.set_iter_next(&ti)
 
 
+    cdef int _add_or_get_term(self, Buffer **data) except -1:
+        """
+        Insert a term in the terms set, or get one that already exists.
+
+        This is a slightly modified replica of the :py:func:`calg.set_insert`
+        which takes a ``Buffer **` pointer and inserts it in the set, or if
+        existing, replaces it with the existing term. The caller can then keep
+        using the term in the same way without having to know whether the term
+        was added or not.
+
+        If the new term is inserted, its address is stored in the memory pool
+        and persists with the :py:class:`SimpleGraph` instance carrying it.
+        Otherwise, the overwritten term is garbage collected as soon as the
+        calling function exits.
+
+        The return value gives an indication of whether the term was added or
+        not.
+        """
+
+        cdef:
+            calg.SetEntry *newentry
+            calg.SetEntry *rover
+            unsigned int index
+            calg._Set *blah = <calg._Set *>self._pool.alloc(1, sizeof(calg._Set))
+
+        blah.entries
+
+        if (self._terms.entries * 3) / self._terms.table_size > 0:
+            if not calg.set_enlarge(self._terms):
+                raise MemoryError()
+
+        index = self._terms.hash_func(data) % self._terms.table_size
+
+        rover = self._terms.table[index]
+
+        while rover != NULL:
+            if (self._terms.equal_func(data[0], rover.data) != 0):
+                data[0] = <Buffer *>rover.data
+
+                return 0
+
+            rover = rover.next
+
+        newentry = <calg.SetEntry *>self._pool.alloc(1, sizeof(calg.SetEntry))
+        newentry.data = data;
+        newentry.next = self._terms.table[index]
+        self._terms.table[index] = newentry
+        self._terms.entries += 1
+
+        return 1
+
+
     cdef set _data_as_set(self):
         """
         Convert triple data to a Python set.