Browse Source

Update docstrings in the rest of the model package.

Stefano Cossu 5 years ago
parent
commit
285505de7c

+ 4 - 0
lakesuperior/model/base.pyx

@@ -1,3 +1,7 @@
+__doc__ = """
+Basic model typedefs, constants and common methods.
+"""
+
 cdef bytes buffer_dump(const Buffer* buf):
     """
     Return a buffer's content as a string.

+ 5 - 0
lakesuperior/model/callbacks.pyx

@@ -1,5 +1,10 @@
 from lakesuperior.model.base cimport Key, TripleKey
 
+__doc__ = """
+Callback methods for various loop functions.
+"""
+
+
 cdef inline bint lookup_sk_cmp_fn(
     const TripleKey* spok, const Key k1, const Key k2
 ):

+ 8 - 4
lakesuperior/model/structures/hash.pyx

@@ -4,6 +4,14 @@ from libc.string cimport memcpy
 from lakesuperior.model.base cimport Buffer
 from lakesuperior.cy_include cimport spookyhash as sph
 
+__doc__ = """
+C hashing functions used with Cython models.
+
+The hashing algorithm is `SpookyHash
+<http://burtleburtle.net/bob/hash/spooky.html>`_ which produces up to 128-bit
+(16-byte) digests.
+"""
+
 
 memcpy(&term_hash_seed32, TERM_HASH_SEED, HLEN_32)
 memcpy(&term_hash_seed64_1, TERM_HASH_SEED, HLEN_64)
@@ -32,10 +40,6 @@ cdef inline int hash128(const Buffer *message, Hash128 *hash) except -1:
     """
     Get the hash value of a byte string with a defined size.
 
-    The hashing algorithm is `SpookyHash
-    <http://burtleburtle.net/bob/hash/spooky.html>`_ which produces 128-bit
-    (16-byte) digests.
-
     Note that this returns a char array while the smaller functions return
     numeric types (uint, ulong).
 

+ 30 - 12
lakesuperior/model/structures/keyset.pyx

@@ -14,23 +14,35 @@ logger = logging.getLogger(__name__)
 
 cdef class Keyset:
     """
-    Pre-allocated set of ``TripleKey``s.
+    Memory-contiguous array of ``TripleKey``s.
+
+    The keys are ``size_t`` values that are linked to terms in the triplestore.
+    Therefore, a triplestore lookup is necessary to view or use the terms, but
+    several types of manipulation and filtering can be done very efficiently
+    without looking at the term values.
 
     The set is not checked for duplicates all the time: e.g., when creating
-    from a single set of triples coming from the store, the duplicate check
-    is turned off for efficiency. When merging with other sets, duplicate
-    checking should be turned on.
+    from a single set of triples coming from the store, the duplicate check is
+    turned off for efficiency and because the source is guaranteed to provide
+    unique values. When merging with other sets, duplicate checking should be
+    turned on.
 
-    Since this class is based on a contiguous block of memory, it is best to
-    do very little manipulation. Several operations involve copying the whole
+    Since this class is based on a contiguous block of memory, it is best not
+    to do targeted manipulation. Several operations involve copying the whole
     data block, so e.g. bulk removal and intersection are much more efficient
     than individual record operations.
+
     """
     def __cinit__(self, size_t capacity=0, float expand_ratio=.75):
         """
         Initialize and allocate memory for the data set.
 
         :param size_t capacity: Number of elements to be accounted for.
+
+        :param float expand_ratio: by how much, relatively to the current
+            size, the memory block is expanded when full. A value of 0
+            disables automatic expansion, and inserting beyond capacity will
+            raise an error.
         """
         self.capacity = capacity
         self.expand_ratio = expand_ratio
@@ -56,7 +68,11 @@ cdef class Keyset:
 
     cdef void seek(self, size_t idx=0):
         """
-        Place the cursor at a certain index, 0 by default.
+        Place the cursor at a given index, 0 by default.
+
+        :param size_t idx: Position to place the cursor. The position can be
+            at maximum the next unused slot, any value higher than that will
+            position the cursor at the next unused slot.
         """
         self.cur = min(idx, self.free_i)
 
@@ -99,8 +115,7 @@ cdef class Keyset:
 
 
     cdef inline int add(
-            self, const TripleKey* val, bint check_dup=False,
-            bint check_cap=True
+        self, const TripleKey* val, bint check_dup=False, bint check_cap=True
     ) except -1:
         """
         Add a triple key to the array.
@@ -111,9 +126,12 @@ cdef class Keyset:
 
         if check_cap and self.free_i >= self.capacity:
             if self.expand_ratio > 0:
-                # In some edge casees, a very small ratio may round down to a
-                # zero increase, so the baseline increase is 1 element.
-                self.resize(1 + <size_t>(self.capacity * (1 + self.expand_ratio)))
+                # In some casees, a very small initial value and ratio may
+                # round down to a zero increase, so the baseline increase is
+                # 1 element.
+                self.resize(
+                    1 + <size_t>(self.capacity * (1 + self.expand_ratio))
+                )
             else:
                 raise MemoryError('No space left in key set.')