hash.pyx 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. from libc.stdint cimport uint64_t
  2. from libc.string cimport memcpy
  3. term_hash_seed = b'\xff\xf2Q\xf2j\x0bG\xc1\x8a}\xca\x92\x98^y\x12'
  4. """
  5. Seed for computing the term hash.
  6. This is a 16-byte string that will be split up into two ``uint64``
  7. numbers to make up the ``spookyhash_128`` seeds.
  8. """
  9. memcpy(&term_hash_seed1, term_hash_seed, SEED_LEN)
  10. memcpy(&term_hash_seed2, term_hash_seed + SEED_LEN, SEED_LEN)
  11. # We only need a couple of functions from spookyhash. No need for a pxd file.
  12. cdef extern from 'spookyhash_api.h':
  13. void spookyhash_128(
  14. const void *input, size_t input_size, uint64_t *hash_1,
  15. uint64_t *hash_2)
  16. uint64_t spookyhash_64(const void *input, size_t input_size, uint64_t seed)
  17. cdef inline Hash128 hash128(
  18. const unsigned char *message, size_t message_size):
  19. """
  20. Get the hash value of a byte string with a defined size.
  21. The hashing algorithm is `SpookyHash
  22. <http://burtleburtle.net/bob/hash/spooky.html>`_ which produces 128-bit
  23. (16-byte) digests.
  24. The initial seeds are determined in the application configuration.
  25. :rtype: Hash128
  26. """
  27. cdef:
  28. DoubleHash64 seed = [term_hash_seed1, term_hash_seed2]
  29. Hash128 digest
  30. spookyhash_128(message, message_size, seed, seed + 1)
  31. # This casts the 2 contiguous uint64_t's into a char pointer.
  32. return <Hash128>seed
  33. cdef inline Hash64 hash64(
  34. const unsigned char *message, size_t message_size):
  35. """
  36. Get a 64-bit (unsigned long) hash value of a byte string.
  37. This function also uses SpookyHash. Note that this returns a UInt64 while
  38. the 128-bit function returns a char array.
  39. """
  40. cdef uint64_t seed = term_hash_seed1
  41. return spookyhash_64(message, message_size, seed)