Browse Source

Update docstrings in the rest of the model package.

Stefano Cossu 6 years ago
parent
commit
285505de7c

+ 4 - 0
lakesuperior/model/base.pyx

@@ -1,3 +1,7 @@
+__doc__ = """
+Basic model typedefs, constants and common methods.
+"""
+
 cdef bytes buffer_dump(const Buffer* buf):
 cdef bytes buffer_dump(const Buffer* buf):
     """
     """
     Return a buffer's content as a string.
     Return a buffer's content as a string.

+ 5 - 0
lakesuperior/model/callbacks.pyx

@@ -1,5 +1,10 @@
 from lakesuperior.model.base cimport Key, TripleKey
 from lakesuperior.model.base cimport Key, TripleKey
 
 
+__doc__ = """
+Callback methods for various loop functions.
+"""
+
+
 cdef inline bint lookup_sk_cmp_fn(
 cdef inline bint lookup_sk_cmp_fn(
     const TripleKey* spok, const Key k1, const Key k2
     const TripleKey* spok, const Key k1, const Key k2
 ):
 ):

+ 8 - 4
lakesuperior/model/structures/hash.pyx

@@ -4,6 +4,14 @@ from libc.string cimport memcpy
 from lakesuperior.model.base cimport Buffer
 from lakesuperior.model.base cimport Buffer
 from lakesuperior.cy_include cimport spookyhash as sph
 from lakesuperior.cy_include cimport spookyhash as sph
 
 
+__doc__ = """
+C hashing functions used with Cython models.
+
+The hashing algorithm is `SpookyHash
+<http://burtleburtle.net/bob/hash/spooky.html>`_ which produces up to 128-bit
+(16-byte) digests.
+"""
+
 
 
 memcpy(&term_hash_seed32, TERM_HASH_SEED, HLEN_32)
 memcpy(&term_hash_seed32, TERM_HASH_SEED, HLEN_32)
 memcpy(&term_hash_seed64_1, TERM_HASH_SEED, HLEN_64)
 memcpy(&term_hash_seed64_1, TERM_HASH_SEED, HLEN_64)
@@ -32,10 +40,6 @@ cdef inline int hash128(const Buffer *message, Hash128 *hash) except -1:
     """
     """
     Get the hash value of a byte string with a defined size.
     Get the hash value of a byte string with a defined size.
 
 
-    The hashing algorithm is `SpookyHash
-    <http://burtleburtle.net/bob/hash/spooky.html>`_ which produces 128-bit
-    (16-byte) digests.
-
     Note that this returns a char array while the smaller functions return
     Note that this returns a char array while the smaller functions return
     numeric types (uint, ulong).
     numeric types (uint, ulong).
 
 

+ 30 - 12
lakesuperior/model/structures/keyset.pyx

@@ -14,23 +14,35 @@ logger = logging.getLogger(__name__)
 
 
 cdef class Keyset:
 cdef class Keyset:
     """
     """
-    Pre-allocated set of ``TripleKey``s.
+    Memory-contiguous array of ``TripleKey``s.
+
+    The keys are ``size_t`` values that are linked to terms in the triplestore.
+    Therefore, a triplestore lookup is necessary to view or use the terms, but
+    several types of manipulation and filtering can be done very efficiently
+    without looking at the term values.
 
 
     The set is not checked for duplicates all the time: e.g., when creating
     The set is not checked for duplicates all the time: e.g., when creating
-    from a single set of triples coming from the store, the duplicate check
-    is turned off for efficiency. When merging with other sets, duplicate
-    checking should be turned on.
+    from a single set of triples coming from the store, the duplicate check is
+    turned off for efficiency and because the source is guaranteed to provide
+    unique values. When merging with other sets, duplicate checking should be
+    turned on.
 
 
-    Since this class is based on a contiguous block of memory, it is best to
-    do very little manipulation. Several operations involve copying the whole
+    Since this class is based on a contiguous block of memory, it is best not
+    to do targeted manipulation. Several operations involve copying the whole
     data block, so e.g. bulk removal and intersection are much more efficient
     data block, so e.g. bulk removal and intersection are much more efficient
     than individual record operations.
     than individual record operations.
+
     """
     """
     def __cinit__(self, size_t capacity=0, float expand_ratio=.75):
     def __cinit__(self, size_t capacity=0, float expand_ratio=.75):
         """
         """
         Initialize and allocate memory for the data set.
         Initialize and allocate memory for the data set.
 
 
         :param size_t capacity: Number of elements to be accounted for.
         :param size_t capacity: Number of elements to be accounted for.
+
+        :param float expand_ratio: by how much, relatively to the current
+            size, the memory block is expanded when full. A value of 0
+            disables automatic expansion, and inserting beyond capacity will
+            raise an error.
         """
         """
         self.capacity = capacity
         self.capacity = capacity
         self.expand_ratio = expand_ratio
         self.expand_ratio = expand_ratio
@@ -56,7 +68,11 @@ cdef class Keyset:
 
 
     cdef void seek(self, size_t idx=0):
     cdef void seek(self, size_t idx=0):
         """
         """
-        Place the cursor at a certain index, 0 by default.
+        Place the cursor at a given index, 0 by default.
+
+        :param size_t idx: Position to place the cursor. The position can be
+            at maximum the next unused slot, any value higher than that will
+            position the cursor at the next unused slot.
         """
         """
         self.cur = min(idx, self.free_i)
         self.cur = min(idx, self.free_i)
 
 
@@ -99,8 +115,7 @@ cdef class Keyset:
 
 
 
 
     cdef inline int add(
     cdef inline int add(
-            self, const TripleKey* val, bint check_dup=False,
-            bint check_cap=True
+        self, const TripleKey* val, bint check_dup=False, bint check_cap=True
     ) except -1:
     ) except -1:
         """
         """
         Add a triple key to the array.
         Add a triple key to the array.
@@ -111,9 +126,12 @@ cdef class Keyset:
 
 
         if check_cap and self.free_i >= self.capacity:
         if check_cap and self.free_i >= self.capacity:
             if self.expand_ratio > 0:
             if self.expand_ratio > 0:
-                # In some edge casees, a very small ratio may round down to a
-                # zero increase, so the baseline increase is 1 element.
-                self.resize(1 + <size_t>(self.capacity * (1 + self.expand_ratio)))
+                # In some casees, a very small initial value and ratio may
+                # round down to a zero increase, so the baseline increase is
+                # 1 element.
+                self.resize(
+                    1 + <size_t>(self.capacity * (1 + self.expand_ratio))
+                )
             else:
             else:
                 raise MemoryError('No space left in key set.')
                 raise MemoryError('No space left in key set.')