|
@@ -1,473 +0,0 @@
|
|
|
-#include <stdio.h>
|
|
|
-#include <stdlib.h>
|
|
|
-#include <string.h>
|
|
|
-#include <limits.h>
|
|
|
-#include <assert.h>
|
|
|
-
|
|
|
-#include "htable.h"
|
|
|
-
|
|
|
-
|
|
|
-#define BUCKET_EMPTY 1 << 0
|
|
|
-#define BUCKET_DELETED 1 << 1
|
|
|
-
|
|
|
-#if defined(DEBUG)
|
|
|
-#define ASSERT assert
|
|
|
-#else
|
|
|
-#define ASSERT(x)
|
|
|
-#endif
|
|
|
-
|
|
|
-#define MAX_GROWTH_STEP (1024U * 1024)
|
|
|
-
|
|
|
-#define APPROX_85_PERCENT(x) (((x) * 870) >> 10)
|
|
|
-#define APPROX_40_PERCENT(x) (((x) * 409) >> 10)
|
|
|
-
|
|
|
-#define MIN_HT_SIZE 1 << 3
|
|
|
-
|
|
|
-/** @brief Bucket structure.
|
|
|
- *
|
|
|
- * This struct does only contain bucket metadata which are of predictable size.
|
|
|
- */
|
|
|
-typedef struct {
|
|
|
- ht_hash_t hash;
|
|
|
- uint16_t psl;
|
|
|
-} bucket_t;
|
|
|
-
|
|
|
-typedef struct htable_t {
|
|
|
- htsize_t size;
|
|
|
- htsize_t nitems;
|
|
|
- uint64_t divinfo;
|
|
|
-
|
|
|
- bucket_t * buckets;
|
|
|
- void * data;
|
|
|
- uint64_t seed;
|
|
|
-
|
|
|
- key_hash_fn_t key_hash_fn;
|
|
|
- key_eq_fn_t key_eq_fn;
|
|
|
-
|
|
|
- ksize_t ksize;
|
|
|
- vsize_t vsize;
|
|
|
-} HTable;
|
|
|
-
|
|
|
-// Fill and compare empty buckets.
|
|
|
-static const unsigned char del_marker[sizeof(ksize_t)] = {0};
|
|
|
-
|
|
|
-/** @brief Access bucket key address by bucket index.
|
|
|
- *
|
|
|
- * data is the address of the raw data array, i the bucket index.
|
|
|
- */
|
|
|
-#define HT_KEY(data, i) data + ((ht->ksize + ht->vsize) * i)
|
|
|
-
|
|
|
-/** @brief Access bucket value address by bucket index.
|
|
|
- *
|
|
|
- * data is the address of the raw data array, i the bucket index.
|
|
|
- */
|
|
|
-#define HT_VAL(data, i) data + ((ht->ksize + ht->vsize) * i) + ht->ksize
|
|
|
-
|
|
|
-/* * * GENERIC UTILITIES * * */
|
|
|
-
|
|
|
-static inline bool is_empty_bucket(const HTable *ht, htsize_t i)
|
|
|
-{ return memcmp(HT_KEY(ht, i), del_marker, ht->ksize) == 0; }
|
|
|
-
|
|
|
-/*
|
|
|
- * Find first bit.
|
|
|
- */
|
|
|
-static inline int fls(int x)
|
|
|
-{ return x ? (sizeof(int) * CHAR_BIT) - __builtin_clz(x) : 0; }
|
|
|
-
|
|
|
-
|
|
|
-/*
|
|
|
- * Fast 32bit division and remainder.
|
|
|
- *
|
|
|
- * Reference:
|
|
|
- *
|
|
|
- * Torbjörn Granlund and Peter L. Montgomery, "Division by Invariant
|
|
|
- * Integers Using Multiplication", ACM SIGPLAN Notices, Issue 6, Vol 29,
|
|
|
- * http://gmplib.org/~tege/divcnst-pldi94.pdf, 61-72, June 1994.
|
|
|
- *
|
|
|
- * The following example computes q = a / b and r = a % b:
|
|
|
- *
|
|
|
- * uint64_t divinfo = fast_div32_init(b);
|
|
|
- * q = fast_div32(a, b, divinfo);
|
|
|
- * r = fast_rem32(a, b, divinfo);
|
|
|
- */
|
|
|
-
|
|
|
-static inline uint64_t
|
|
|
-fast_div32_init(uint32_t div)
|
|
|
-{
|
|
|
- uint64_t mt;
|
|
|
- uint8_t s1, s2;
|
|
|
- int l;
|
|
|
-
|
|
|
- l = fls(div - 1);
|
|
|
- mt = (uint64_t)(0x100000000ULL * ((1ULL << l) - div));
|
|
|
- s1 = (l > 1) ? 1U : (uint8_t)l;
|
|
|
- s2 = (l == 0) ? 0 : (uint8_t)(l - 1);
|
|
|
- return (uint64_t)(mt / div + 1) << 32 | (uint32_t)s1 << 8 | s2;
|
|
|
-}
|
|
|
-
|
|
|
-static inline uint32_t
|
|
|
-fast_div32(uint32_t v, uint32_t div, uint64_t divinfo)
|
|
|
-{
|
|
|
- const uint32_t m = divinfo >> 32;
|
|
|
- const unsigned s1 = (divinfo & 0x0000ff00) >> 8;
|
|
|
- const unsigned s2 = (divinfo & 0x000000ff);
|
|
|
- const uint32_t t = (uint32_t)(((uint64_t)v * m) >> 32);
|
|
|
- (void)div; // unused
|
|
|
- return (t + ((v - t) >> s1)) >> s2;
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-static inline uint32_t
|
|
|
-fast_rem32(uint32_t v, uint32_t div, uint64_t divinfo)
|
|
|
-{ return v - div * fast_div32(v, div, divinfo); }
|
|
|
-
|
|
|
-
|
|
|
-static int __attribute__((__unused__))
|
|
|
-//static int
|
|
|
-validate_psl_p(const HTable *ht, unsigned i)
|
|
|
-{
|
|
|
- unsigned base_i = fast_rem32(ht->buckets[i].hash, ht->size, ht->divinfo);
|
|
|
- unsigned diff = (base_i > i) ? ht->size - base_i + i : i - base_i;
|
|
|
- return is_empty_bucket(ht, i) || diff == ht->buckets[i].psl;
|
|
|
-}
|
|
|
-
|
|
|
-/* * * PUBLIC API * * */
|
|
|
-
|
|
|
-LSUP_rc LSUP_htable_new(
|
|
|
- htsize_t size, ksize_t ksize, vsize_t vsize,
|
|
|
- key_hash_fn_t key_hash_fn, key_eq_fn_t key_eq_fn, LSUP_HTable **ht_p)
|
|
|
-{
|
|
|
- HTable *ht = calloc(1, sizeof(HTable));
|
|
|
- if (!ht) return ENOMEM;
|
|
|
- *ht_p = ht;
|
|
|
-
|
|
|
- ht->ksize = ksize;
|
|
|
- ht->vsize = vsize;
|
|
|
- ht->key_hash_fn = key_hash_fn;
|
|
|
- ht->key_eq_fn = key_eq_fn;
|
|
|
- ht->size = 0;
|
|
|
-
|
|
|
- return LSUP_htable_resize(ht, size);
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-LSUP_rc
|
|
|
-LSUP_htable_copy(const HTable *src, HTable **dest_p)
|
|
|
-{
|
|
|
- HTable *dest;
|
|
|
- CRITICAL(dest = calloc(1, sizeof(HTable)));
|
|
|
-
|
|
|
- dest->size = src->size;
|
|
|
- dest->nitems = src->nitems;
|
|
|
- dest->divinfo = src->divinfo;
|
|
|
-
|
|
|
- CRITICAL(dest->buckets = malloc(sizeof(bucket_t) * dest->size));
|
|
|
- memcpy(dest->buckets, src->buckets, sizeof(bucket_t) * dest->size);
|
|
|
-
|
|
|
- CRITICAL(dest->data = malloc(dest->size));
|
|
|
- memcpy(dest->data, src->data, dest->size);
|
|
|
-
|
|
|
- dest->seed = src->seed ^ (random() | (random() << sizeof(ht_hash_t)));
|
|
|
-
|
|
|
- dest->key_hash_fn = src->key_hash_fn;
|
|
|
- dest->key_eq_fn = src->key_eq_fn;
|
|
|
-
|
|
|
- dest->ksize = src->ksize;
|
|
|
- dest->vsize = src->vsize;
|
|
|
-
|
|
|
- *dest_p = dest;
|
|
|
-
|
|
|
- return LSUP_OK;
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-/**
|
|
|
- * Resize a table.
|
|
|
- */
|
|
|
-LSUP_rc LSUP_htable_resize(HTable *ht, htsize_t newsize)
|
|
|
-{
|
|
|
- void *old_data = ht->data;
|
|
|
- const htsize_t oldsize = ht->size;
|
|
|
-
|
|
|
- // Clip size to min & max limits.
|
|
|
- if (newsize < MIN_HT_SIZE) newsize = MIN_HT_SIZE;
|
|
|
- if (newsize > HTSIZE_MAX) newsize = HTSIZE_MAX;
|
|
|
-
|
|
|
- TRACE("Resizing htable to %lu.", (size_t)newsize);
|
|
|
-
|
|
|
- CRITICAL(ht->buckets = calloc(newsize, sizeof(bucket_t)));
|
|
|
- CRITICAL(ht->data = calloc(
|
|
|
- (ht->ksize + ht->vsize) * newsize, sizeof(bucket_t)));
|
|
|
-
|
|
|
- ht->size = newsize;
|
|
|
- ht->nitems = 0;
|
|
|
-
|
|
|
- ht->divinfo = fast_div32_init(newsize);
|
|
|
- ht->seed ^= random() | (random() << sizeof(ht_hash_t));
|
|
|
-
|
|
|
- for (unsigned i = 0; i < oldsize; i++) {
|
|
|
- /* Skip the empty buckets. */
|
|
|
- if (!is_empty_bucket(old_data, i))
|
|
|
- LSUP_htable_insert(
|
|
|
- ht, HT_KEY(old_data, i),
|
|
|
- HT_VAL(old_data, i));
|
|
|
- }
|
|
|
- free(old_data);
|
|
|
-
|
|
|
- return LSUP_OK;
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-htsize_t LSUP_htable_capacity(LSUP_HTable *ht)
|
|
|
-{ return ht->size; }
|
|
|
-
|
|
|
-
|
|
|
-htsize_t LSUP_htable_size(LSUP_HTable *ht)
|
|
|
-{ return ht->nitems; }
|
|
|
-
|
|
|
-
|
|
|
-/*
|
|
|
- * Insert without resizing (assuming resizing is already done).
|
|
|
- */
|
|
|
-LSUP_rc LSUP_htable_insert(HTable *ht, const void *key, const void *val)
|
|
|
-{
|
|
|
- bucket_t entry_s;
|
|
|
- bucket_t *bucket, *entry = &entry_s;
|
|
|
-
|
|
|
- ASSERT(key != NULL);
|
|
|
-
|
|
|
- /*
|
|
|
- * Setup the bucket entry.
|
|
|
- */
|
|
|
- entry->hash = ht->key_hash_fn(key, ht->ksize, ht->seed);
|
|
|
- entry->psl = 0;
|
|
|
-
|
|
|
- /*
|
|
|
- * From the paper: "when inserting, if a record probes a location
|
|
|
- * that is already occupied, the record that has traveled longer
|
|
|
- * in its probe sequence keeps the location, and the other one
|
|
|
- * continues on its probe sequence" (page 12).
|
|
|
- *
|
|
|
- * Basically: if the probe sequence length (PSL) of the element
|
|
|
- * being inserted is greater than PSL of the element in the bucket,
|
|
|
- * then swap them and continue.
|
|
|
- */
|
|
|
- htsize_t i = fast_rem32(entry->hash, ht->size, ht->divinfo);
|
|
|
-
|
|
|
- // Locate the index to insert the KV into.
|
|
|
- for(;;) {
|
|
|
- bucket = ht->buckets + i;
|
|
|
-
|
|
|
- if(is_empty_bucket(ht, i)) break;
|
|
|
-
|
|
|
- ASSERT(validate_psl_p(ht, i));
|
|
|
-
|
|
|
- // There is a key in the bucket.
|
|
|
- if (ht->key_eq_fn(HT_KEY(ht, i), key, ht->ksize)) {
|
|
|
- // Duplicate key: do nothing.
|
|
|
- TRACE(STR, "Duplicate key.");
|
|
|
- return LSUP_NOACTION;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * We found a "rich" bucket. Capture its location.
|
|
|
- */
|
|
|
- if (entry->psl > bucket->psl) {
|
|
|
- //TRACE("Entry PSL: %d; Bucket PSL: %d", entry->psl, bucket->psl);
|
|
|
- TRACE(STR, "SWAP");
|
|
|
- /*
|
|
|
- * Place our key-value pair by swapping the "rich"
|
|
|
- * bucket with our entry. Copy the structures.
|
|
|
- */
|
|
|
- bucket_t *tmp = entry;
|
|
|
- entry = bucket;
|
|
|
- bucket = tmp;
|
|
|
- }
|
|
|
-
|
|
|
- entry->psl++;
|
|
|
-
|
|
|
- /* Continue to the next bucket. */
|
|
|
- ASSERT(validate_psl_p(ht, i));
|
|
|
- i = fast_rem32(i + 1, ht->size, ht->divinfo);
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * Found a free bucket: insert the entry.
|
|
|
- */
|
|
|
- TRACE("Inserting into bucket #%d", i);
|
|
|
- memcpy(bucket, entry, sizeof(bucket_t)); // copy
|
|
|
- memcpy(HT_KEY(ht->data, i), key, ht->ksize);
|
|
|
- memcpy(HT_VAL(ht->data, i), val, ht->vsize);
|
|
|
- ht->nitems++;
|
|
|
-
|
|
|
- ASSERT(validate_psl_p(ht, i));
|
|
|
-
|
|
|
- return LSUP_OK;
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-/*
|
|
|
- * rhashmap_put: insert a value given the key.
|
|
|
- *
|
|
|
- * => If the key is already present, return its associated value.
|
|
|
- * => Otherwise, on successful insert, return the given value.
|
|
|
- */
|
|
|
-LSUP_rc LSUP_htable_put(HTable *ht, const void *key, const void *val)
|
|
|
-{
|
|
|
- const size_t threshold = APPROX_85_PERCENT(ht->size);
|
|
|
-
|
|
|
- /*
|
|
|
- * If the load factor is more than the threshold, then resize.
|
|
|
- */
|
|
|
- if (UNLIKELY(ht->nitems > threshold)) {
|
|
|
- /*
|
|
|
- * Grow the hash table by doubling its size, but with
|
|
|
- * a limit of MAX_GROWTH_STEP.
|
|
|
- */
|
|
|
- const size_t grow_limit = ht->size + MAX_GROWTH_STEP;
|
|
|
- const size_t newsize = min(ht->size << 1, grow_limit);
|
|
|
- LSUP_htable_resize(ht, newsize);
|
|
|
- }
|
|
|
-
|
|
|
- return LSUP_htable_insert(ht, key, val);
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-int LSUP_htable_get(const HTable *ht, const void *key, void **val_p)
|
|
|
-{
|
|
|
- const uint64_t hash = ht->key_hash_fn(key, ht->ksize, ht->seed);
|
|
|
- htsize_t n = 0, i = fast_rem32(hash, ht->size, ht->divinfo);
|
|
|
-
|
|
|
- if (key == NULL) return LSUP_VALUE_ERR;
|
|
|
-
|
|
|
- /*
|
|
|
- * Lookup is a linear probe.
|
|
|
- */
|
|
|
- for(;;) {
|
|
|
- ASSERT(validate_psl_p(ht, i));
|
|
|
-
|
|
|
- if (ht->key_eq_fn(HT_KEY(ht->data, i), key, ht->ksize)) {
|
|
|
- // Key found within max probe length.
|
|
|
- if (val_p) *val_p = HT_VAL(ht->data, i);
|
|
|
-
|
|
|
- return LSUP_OK;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * Stop probing if we hit an empty bucket; also, if we hit a
|
|
|
- * bucket with PSL lower than the distance from the base location,
|
|
|
- * then it means that we found the "rich" bucket which should
|
|
|
- * have been captured, if the key was inserted -- see the central
|
|
|
- * point of the algorithm in the insertion function.
|
|
|
- */
|
|
|
- if (is_empty_bucket(ht, i) || n > ht->buckets[i].psl) {
|
|
|
- if (val_p) *val_p = NULL;
|
|
|
-
|
|
|
- return LSUP_NORESULT;
|
|
|
- }
|
|
|
-
|
|
|
- n++;
|
|
|
-
|
|
|
- /* Continue to the next bucket. */
|
|
|
- i = fast_rem32(i + 1, ht->size, ht->divinfo);
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-int LSUP_htable_remove(HTable *ht, const void *key)
|
|
|
-{
|
|
|
- const size_t threshold = APPROX_40_PERCENT(ht->size);
|
|
|
- const uint32_t hash = ht->key_hash_fn(key, ht->ksize, ht->seed);
|
|
|
- unsigned n = 0, i = fast_rem32(hash, ht->size, ht->divinfo);
|
|
|
- bucket_t *bucket;
|
|
|
-
|
|
|
- ASSERT(key != NULL);
|
|
|
-
|
|
|
- for(;;) {
|
|
|
- /*
|
|
|
- * The same probing logic as in the lookup function.
|
|
|
- */
|
|
|
- bucket_t *bucket = ht->buckets + i;
|
|
|
- if (is_empty_bucket(ht, i) || n > bucket->psl)
|
|
|
- return LSUP_NOACTION;
|
|
|
-
|
|
|
- ASSERT(validate_psl_p(ht, i));
|
|
|
-
|
|
|
- if (!ht->key_eq_fn(HT_KEY(ht, i), key, ht->ksize)) {
|
|
|
- /* Continue to the next bucket. */
|
|
|
- i = fast_rem32(i + 1, ht->size, ht->divinfo);
|
|
|
- n++;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- ht->nitems--;
|
|
|
-
|
|
|
- /*
|
|
|
- * The probe sequence must be preserved in the deletion case.
|
|
|
- * Use the backwards-shifting method to maintain low variance.
|
|
|
- */
|
|
|
-
|
|
|
- for(;;) {
|
|
|
- bucket_t *nbucket;
|
|
|
-
|
|
|
- memcpy(HT_KEY(ht, i), del_marker, ht->ksize);
|
|
|
-
|
|
|
- i = fast_rem32(i + 1, ht->size, ht->divinfo);
|
|
|
- nbucket = ht->buckets + i;
|
|
|
- ASSERT(validate_psl_p(ht, i));
|
|
|
-
|
|
|
- /*
|
|
|
- * Stop if we reach an empty bucket or hit a key which
|
|
|
- * is in its base (original) location.
|
|
|
- */
|
|
|
- if (is_empty_bucket(ht, i) || nbucket->psl == 0) break;
|
|
|
-
|
|
|
- nbucket->psl--;
|
|
|
- *bucket = *nbucket;
|
|
|
- bucket = nbucket;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * If the load factor is less than threshold, then shrink by
|
|
|
- * halving the size, but not less than 1.
|
|
|
- */
|
|
|
- if (ht->nitems < threshold) {
|
|
|
- size_t newsize = max(ht->size >> 1, 1);
|
|
|
- (void)LSUP_htable_resize(ht, newsize);
|
|
|
- }
|
|
|
-
|
|
|
- return LSUP_OK;
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-LSUP_rc LSUP_htable_iter(
|
|
|
- LSUP_HTable *ht, htsize_t *cur, void **keyp, void **valp)
|
|
|
-{
|
|
|
- while (*cur < ht->size) {
|
|
|
- (*cur)++;
|
|
|
-
|
|
|
- if (is_empty_bucket(ht, *cur)) {
|
|
|
- TRACE("Empty bucket: %d. Skipping.", (*cur) - 1);
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- // Copy key, and if relevant, value.
|
|
|
- if(!keyp) return LSUP_VALUE_ERR;
|
|
|
- *keyp = HT_KEY(ht, *cur);
|
|
|
-
|
|
|
- if (valp != NULL && ht->vsize > 0) *valp = HT_VAL(ht, *cur);
|
|
|
-
|
|
|
- return LSUP_OK;
|
|
|
- }
|
|
|
- return LSUP_END;
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-void LSUP_htable_free(HTable *ht)
|
|
|
-{
|
|
|
- if(LIKELY(ht)) {
|
|
|
- free(ht->buckets);
|
|
|
- free(ht->data);
|
|
|
- free(ht);
|
|
|
- }
|
|
|
-}
|