6 years ago · 285505de7c
--- a/lakesuperior/model/base.pyx
+++ b/lakesuperior/model/base.pyx
@@ -1,3 +1,7 @@
 
															+__doc__ = """
														
 
															+Basic model typedefs, constants and common methods.
														
 
															+"""
														
 
															+
														
 
															 cdef bytes buffer_dump(const Buffer* buf):
														
 
															     """
														
 
															     Return a buffer's content as a string.
														
--- a/lakesuperior/model/callbacks.pyx
+++ b/lakesuperior/model/callbacks.pyx
@@ -1,5 +1,10 @@
 
															 from lakesuperior.model.base cimport Key, TripleKey
														
 
															+__doc__ = """
														
 
															+Callback methods for various loop functions.
														
 
															+"""
														
 
															+
														
 
															+
														
 
															 cdef inline bint lookup_sk_cmp_fn(
														
 
															     const TripleKey* spok, const Key k1, const Key k2
														
 
															 ):
														
--- a/lakesuperior/model/structures/hash.pyx
+++ b/lakesuperior/model/structures/hash.pyx
@@ -4,6 +4,14 @@ from libc.string cimport memcpy
 
															 from lakesuperior.model.base cimport Buffer
														
 
															 from lakesuperior.cy_include cimport spookyhash as sph
														
 
															+__doc__ = """
														
 
															+C hashing functions used with Cython models.
														
 
															+
														
 
															+The hashing algorithm is `SpookyHash
														
 
															+<http://burtleburtle.net/bob/hash/spooky.html>`_ which produces up to 128-bit
														
 
															+(16-byte) digests.
														
 
															+"""
														
 
															+
														
 
															 memcpy(&term_hash_seed32, TERM_HASH_SEED, HLEN_32)
														
 
															 memcpy(&term_hash_seed64_1, TERM_HASH_SEED, HLEN_64)
														
@@ -32,10 +40,6 @@ cdef inline int hash128(const Buffer *message, Hash128 *hash) except -1:
 
															     """
														
 
															     Get the hash value of a byte string with a defined size.
														
 
															-    The hashing algorithm is `SpookyHash
														
 
															-    <http://burtleburtle.net/bob/hash/spooky.html>`_ which produces 128-bit
														
 
															-    (16-byte) digests.
														
 
															-
														
 
															     Note that this returns a char array while the smaller functions return
														
 
															     numeric types (uint, ulong).
														
--- a/lakesuperior/model/structures/keyset.pyx
+++ b/lakesuperior/model/structures/keyset.pyx
@@ -14,23 +14,35 @@ logger = logging.getLogger(__name__)
 
															 cdef class Keyset:
														
 
															     """
														
 
															-    Pre-allocated set of ``TripleKey``s.
														
 
															+    Memory-contiguous array of ``TripleKey``s.
														
 
															+
														
 
															+    The keys are ``size_t`` values that are linked to terms in the triplestore.
														
 
															+    Therefore, a triplestore lookup is necessary to view or use the terms, but
														
 
															+    several types of manipulation and filtering can be done very efficiently
														
 
															+    without looking at the term values.
														
 
															     The set is not checked for duplicates all the time: e.g., when creating
														
 
															-    from a single set of triples coming from the store, the duplicate check
														
 
															-    is turned off for efficiency. When merging with other sets, duplicate
														
 
															-    checking should be turned on.
														
 
															+    from a single set of triples coming from the store, the duplicate check is
														
 
															+    turned off for efficiency and because the source is guaranteed to provide
														
 
															+    unique values. When merging with other sets, duplicate checking should be
														
 
															+    turned on.
														
 
															-    Since this class is based on a contiguous block of memory, it is best to
														
 
															-    do very little manipulation. Several operations involve copying the whole
														
 
															+    Since this class is based on a contiguous block of memory, it is best not
														
 
															+    to do targeted manipulation. Several operations involve copying the whole
														
 
															     data block, so e.g. bulk removal and intersection are much more efficient
														
 
															     than individual record operations.
														
 
															+
														
 
															     """
														
 
															     def __cinit__(self, size_t capacity=0, float expand_ratio=.75):
														
 
															         """
														
 
															         Initialize and allocate memory for the data set.
														
 
															         :param size_t capacity: Number of elements to be accounted for.
														
 
															+
														
 
															+        :param float expand_ratio: by how much, relatively to the current
														
 
															+            size, the memory block is expanded when full. A value of 0
														
 
															+            disables automatic expansion, and inserting beyond capacity will
														
 
															+            raise an error.
														
 
															         """
														
 
															         self.capacity = capacity
														
 
															         self.expand_ratio = expand_ratio
														
@@ -56,7 +68,11 @@ cdef class Keyset:
 
															     cdef void seek(self, size_t idx=0):
														
 
															         """
														
 
															-        Place the cursor at a certain index, 0 by default.
														
 
															+        Place the cursor at a given index, 0 by default.
														
 
															+
														
 
															+        :param size_t idx: Position to place the cursor. The position can be
														
 
															+            at maximum the next unused slot, any value higher than that will
														
 
															+            position the cursor at the next unused slot.
														
 
															         """
														
 
															         self.cur = min(idx, self.free_i)
														
@@ -99,8 +115,7 @@ cdef class Keyset:
 
															     cdef inline int add(
														
 
															-            self, const TripleKey* val, bint check_dup=False,
														
 
															-            bint check_cap=True
														
 
															+        self, const TripleKey* val, bint check_dup=False, bint check_cap=True
														
 
															     ) except -1:
														
 
															         """
														
 
															         Add a triple key to the array.
														
@@ -111,9 +126,12 @@ cdef class Keyset:
 
															         if check_cap and self.free_i >= self.capacity:
														
 
															             if self.expand_ratio > 0:
														
 
															-                # In some edge casees, a very small ratio may round down to a
														
 
															-                # zero increase, so the baseline increase is 1 element.
														
 
															-                self.resize(1 + <size_t>(self.capacity * (1 + self.expand_ratio)))
														
 
															+                # In some casees, a very small initial value and ratio may
														
 
															+                # round down to a zero increase, so the baseline increase is
														
 
															+                # 1 element.
														
 
															+                self.resize(
														
 
															+                    1 + <size_t>(self.capacity * (1 + self.expand_ratio))
														
 
															+                )
														
 
															             else:
														
 
															                 raise MemoryError('No space left in key set.')