hash.pyx 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. from libc.stdint cimport uint32_t, uint64_t
  2. from libc.string cimport memcpy
  3. from lakesuperior.model.base cimport Buffer
  4. from lakesuperior.cy_include cimport spookyhash as sph
  5. __doc__ = """
  6. C hashing functions used with Cython models.
  7. The hashing algorithm is `SpookyHash
  8. <http://burtleburtle.net/bob/hash/spooky.html>`_ which produces up to 128-bit
  9. (16-byte) digests.
  10. """
  11. memcpy(&term_hash_seed32, TERM_HASH_SEED, HLEN_32)
  12. memcpy(&term_hash_seed64_1, TERM_HASH_SEED, HLEN_64)
  13. memcpy(&term_hash_seed64_2, TERM_HASH_SEED + HLEN_64, HLEN_64)
  14. cdef inline int hash32(const Buffer *message, Hash32 *hash) except -1:
  15. """
  16. Get a 32-bit (unsigned int) hash value of a byte string.
  17. """
  18. cdef uint32_t seed = term_hash_seed64_1
  19. hash[0] = sph.spookyhash_32(message[0].addr, message[0].sz, seed)
  20. cdef inline int hash64(const Buffer *message, Hash64 *hash) except -1:
  21. """
  22. Get a 64-bit (unsigned long) hash value of a byte string.
  23. """
  24. cdef uint64_t seed = term_hash_seed32
  25. hash[0] = sph.spookyhash_64(message[0].addr, message[0].sz, seed)
  26. cdef inline int hash128(const Buffer *message, Hash128 *hash) except -1:
  27. """
  28. Get the hash value of a byte string with a defined size.
  29. Note that this returns a char array while the smaller functions return
  30. numeric types (uint, ulong).
  31. The initial seeds are determined in the application configuration.
  32. :rtype: Hash128
  33. """
  34. cdef:
  35. DoubleHash64 seed = [term_hash_seed64_1, term_hash_seed64_2]
  36. Hash128 digest
  37. sph.spookyhash_128(message[0].addr, message[0].sz, seed, seed + 1)
  38. # This casts the 2 contiguous uint64_t's into a char[16] pointer.
  39. hash[0] = <Hash128>seed