|
@@ -1,498 +1,490 @@
|
|
|
+#include <stdio.h>
|
|
|
+#include <stdlib.h>
|
|
|
+#include <string.h>
|
|
|
+#include <limits.h>
|
|
|
+#include <assert.h>
|
|
|
+
|
|
|
#include "include/htable.h"
|
|
|
|
|
|
+
|
|
|
#define BUCKET_EMPTY 1 << 0
|
|
|
#define BUCKET_DELETED 1 << 1
|
|
|
|
|
|
-#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
|
|
|
+#if defined(DEBUG)
|
|
|
+#define ASSERT assert
|
|
|
+#else
|
|
|
+#define ASSERT(x)
|
|
|
+#endif
|
|
|
|
|
|
-static const double __ac_HASH_UPPER = 0.77;
|
|
|
+#define MAX_GROWTH_STEP (1024U * 1024)
|
|
|
+
|
|
|
+#define APPROX_85_PERCENT(x) (((x) * 870) >> 10)
|
|
|
+#define APPROX_40_PERCENT(x) (((x) * 409) >> 10)
|
|
|
+
|
|
|
+
|
|
|
+typedef struct {
|
|
|
+ void * key;
|
|
|
+ void * val;
|
|
|
+ uint64_t hash : 32;
|
|
|
+ uint64_t psl : 16;
|
|
|
+} bucket_t;
|
|
|
+
|
|
|
+typedef struct htable_t {
|
|
|
+ unsigned size;
|
|
|
+ unsigned nitems;
|
|
|
+ unsigned flags;
|
|
|
+ uint64_t divinfo;
|
|
|
+ bucket_t * buckets;
|
|
|
+ uint64_t hashkey;
|
|
|
+
|
|
|
+ key_hash_fn_t key_hash_fn;
|
|
|
+ key_eq_fn_t key_eq_fn;
|
|
|
+
|
|
|
+ void * key_data;
|
|
|
+ ksize_t ksize;
|
|
|
+ void * val_data;
|
|
|
+ vsize_t vsize;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Small optimisation for a single element case: allocate one
|
|
|
+ * bucket together with the hashmap structure -- it will generally
|
|
|
+ * fit within the same cache-line.
|
|
|
+ */
|
|
|
+ bucket_t init_bucket;
|
|
|
+} HTable;
|
|
|
|
|
|
-typedef uint8_t flags_t;
|
|
|
|
|
|
-typedef struct HTable {
|
|
|
- ht_size_t n_buckets; // # of buckets. Up to UINT_MAX
|
|
|
- ht_size_t size; // # of entries in the table.
|
|
|
- ht_size_t n_occupied; // # of occupied buckets.
|
|
|
- ht_size_t upper_bound; //
|
|
|
|
|
|
- flags_t *flags; // Flags for each bucket.
|
|
|
+/* * * GENERIC UTILITIES * * */
|
|
|
|
|
|
- key_hash_fn_t key_hash_fn; // Function to compute hash of a key.
|
|
|
- key_eq_fn_t key_eq_fn; // Function to evaluate equality of keys.
|
|
|
+/*
|
|
|
+ * Find first bit.
|
|
|
+ */
|
|
|
+static inline int fls(int x)
|
|
|
+{ return x ? (sizeof(int) * CHAR_BIT) - __builtin_clz(x) : 0; }
|
|
|
|
|
|
- uint32_t ksize; // Key size, in bytes.
|
|
|
- uint32_t vsize; // Value size, in bytes.
|
|
|
|
|
|
- void *keys; // Key data. This is a void pointer to
|
|
|
- // a memory block that is looked up in
|
|
|
- // "steps" determined by the ksize
|
|
|
- // value.
|
|
|
- void *vals; // Value data, same layout as keys.
|
|
|
-} HTable;
|
|
|
+/*
|
|
|
+ * Fast 32bit division and remainder.
|
|
|
+ *
|
|
|
+ * Reference:
|
|
|
+ *
|
|
|
+ * Torbjörn Granlund and Peter L. Montgomery, "Division by Invariant
|
|
|
+ * Integers Using Multiplication", ACM SIGPLAN Notices, Issue 6, Vol 29,
|
|
|
+ * http://gmplib.org/~tege/divcnst-pldi94.pdf, 61-72, June 1994.
|
|
|
+ *
|
|
|
+ * The following example computes q = a / b and r = a % b:
|
|
|
+ *
|
|
|
+ * uint64_t divinfo = fast_div32_init(b);
|
|
|
+ * q = fast_div32(a, b, divinfo);
|
|
|
+ * r = fast_rem32(a, b, divinfo);
|
|
|
+ */
|
|
|
+
|
|
|
+static inline uint64_t
|
|
|
+fast_div32_init(uint32_t div)
|
|
|
+{
|
|
|
+ uint64_t mt;
|
|
|
+ uint8_t s1, s2;
|
|
|
+ int l;
|
|
|
+
|
|
|
+ l = fls(div - 1);
|
|
|
+ mt = (uint64_t)(0x100000000ULL * ((1ULL << l) - div));
|
|
|
+ s1 = (l > 1) ? 1U : (uint8_t)l;
|
|
|
+ s2 = (l == 0) ? 0 : (uint8_t)(l - 1);
|
|
|
+ return (uint64_t)(mt / div + 1) << 32 | (uint32_t)s1 << 8 | s2;
|
|
|
+}
|
|
|
+
|
|
|
+static inline uint32_t
|
|
|
+fast_div32(uint32_t v, uint32_t div, uint64_t divinfo)
|
|
|
+{
|
|
|
+ const uint32_t m = divinfo >> 32;
|
|
|
+ const unsigned s1 = (divinfo & 0x0000ff00) >> 8;
|
|
|
+ const unsigned s2 = (divinfo & 0x000000ff);
|
|
|
+ const uint32_t t = (uint32_t)(((uint64_t)v * m) >> 32);
|
|
|
+ (void)div; // unused
|
|
|
+ return (t + ((v - t) >> s1)) >> s2;
|
|
|
+}
|
|
|
+
|
|
|
|
|
|
+static inline uint32_t
|
|
|
+fast_rem32(uint32_t v, uint32_t div, uint64_t divinfo)
|
|
|
+{ return v - div * fast_div32(v, div, divinfo); }
|
|
|
|
|
|
-/* * * Static prototypes * * */
|
|
|
|
|
|
-static int LSUP_htable_resize(HTable *ht, ht_size_t new_n_buckets);
|
|
|
+//static int __attribute__((__unused__))
|
|
|
+static int
|
|
|
+validate_psl_p(HTable *ht, const bucket_t *bucket, unsigned i)
|
|
|
+{
|
|
|
+ unsigned base_i = fast_rem32(bucket->hash, ht->size, ht->divinfo);
|
|
|
+ unsigned diff = (base_i > i) ? ht->size - base_i + i : i - base_i;
|
|
|
+ return bucket->key == NULL || diff == bucket->psl;
|
|
|
+}
|
|
|
|
|
|
|
|
|
-/* * * API * * */
|
|
|
+/* * * PUBLIC API * * */
|
|
|
|
|
|
+/*
|
|
|
+ * Construct a new hash table.
|
|
|
+ *
|
|
|
+ * => If size is non-zero, then pre-allocate the given number of buckets;
|
|
|
+ * => If size is zero, then a default minimum is used.
|
|
|
+ */
|
|
|
int LSUP_htable_init(
|
|
|
- HTable *ht, ht_size_t size, uint32_t ksize, uint32_t vsize,
|
|
|
- key_hash_fn_t key_hash_fn, key_eq_fn_t key_eq_fn)
|
|
|
+ HTable *ht, htsize_t size, ksize_t ksize, vsize_t vsize,
|
|
|
+ key_hash_fn_t key_hash_fn, key_eq_fn_t key_eq_fn, unsigned flags)
|
|
|
{
|
|
|
- ht->key_hash_fn = key_hash_fn;
|
|
|
- ht->key_eq_fn = key_eq_fn;
|
|
|
-
|
|
|
- ht->ksize = ksize;
|
|
|
- ht->vsize = vsize;
|
|
|
+ ht->flags = flags;
|
|
|
+ if (LSUP_htable_resize(ht, size) != 0) {
|
|
|
+ free(ht);
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+ ASSERT(ht->buckets);
|
|
|
+ ASSERT(ht->size);
|
|
|
|
|
|
- return LSUP_htable_resize(ht, size);
|
|
|
+ return LSUP_OK;
|
|
|
}
|
|
|
|
|
|
|
|
|
-void LSUP_htable_done(HTable *ht)
|
|
|
+HTable *LSUP_htable_new(
|
|
|
+ htsize_t size, ksize_t ksize, vsize_t vsize,
|
|
|
+ key_hash_fn_t key_hash_fn, key_eq_fn_t key_eq_fn, unsigned flags)
|
|
|
{
|
|
|
- if (LIKELY(ht != NULL)) {
|
|
|
- free((void *)ht->keys);
|
|
|
- free(ht->flags);
|
|
|
- free((void *)ht->vals);
|
|
|
- }
|
|
|
+ HTable *ht;
|
|
|
+ CRITICAL(ht = malloc(sizeof(HTable)));
|
|
|
+
|
|
|
+ LSUP_htable_init(ht, size, ksize, vsize, key_hash_fn, key_eq_fn, flags);
|
|
|
+
|
|
|
+ return ht;
|
|
|
}
|
|
|
|
|
|
|
|
|
-ht_size_t LSUP_htable_get(const HTable *ht, void *key)
|
|
|
+/**
|
|
|
+ * Resize a table.
|
|
|
+ */
|
|
|
+int LSUP_htable_resize(HTable *ht, htsize_t newsize)
|
|
|
{
|
|
|
- if (ht->n_buckets > 0) {
|
|
|
- ht_size_t i, last, mask, step = 0;
|
|
|
+ bucket_t *oldbuckets = ht->buckets;
|
|
|
+ const size_t oldsize = ht->size;
|
|
|
+ bucket_t *newbuckets;
|
|
|
+
|
|
|
+ ASSERT(newsize > 0);
|
|
|
+ ASSERT(newsize > ht->nitems);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Check for an overflow and allocate buckets. Also, generate
|
|
|
+ * a new hash key/seed every time we resize the hash table.
|
|
|
+ */
|
|
|
+ if (newsize == 1) {
|
|
|
+ memset(&ht->init_bucket, 0, sizeof(bucket_t));
|
|
|
+ newbuckets = &ht->init_bucket;
|
|
|
+ } else if (newsize > UINT_MAX) {
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
|
|
|
- mask = ht->n_buckets - 1;
|
|
|
- i = ht->key_hash_fn(key) & mask;
|
|
|
+ CRITICAL(ht->buckets = calloc(1, newsize * sizeof(bucket_t)));
|
|
|
+ CRITICAL(ht->key_data = realloc(ht->key_data, newsize * ht->ksize));
|
|
|
+ CRITICAL(ht->val_data = realloc(ht->key_data, newsize * ht->vsize));
|
|
|
|
|
|
- last = i;
|
|
|
+ ht->size = newsize;
|
|
|
+ ht->nitems = 0;
|
|
|
|
|
|
- while (
|
|
|
- !CHK_FLAG(ht->flags[i], BUCKET_EMPTY) &&
|
|
|
- (
|
|
|
- CHK_FLAG(ht->flags[i], BUCKET_DELETED) ||
|
|
|
- !ht->key_eq_fn(ht->keys + i * ht->ksize, key)
|
|
|
- )) {
|
|
|
- i = (i + (++step)) & mask;
|
|
|
+ ht->divinfo = fast_div32_init(newsize);
|
|
|
+ ht->hashkey ^= random() | (random() << 32);
|
|
|
|
|
|
- if (i == last) return ht->n_buckets;
|
|
|
- }
|
|
|
- return CHK_FLAG(ht->flags[i], (BUCKET_EMPTY | BUCKET_DELETED)) ?
|
|
|
- ht->n_buckets : i;
|
|
|
+ for (unsigned i = 0; i < oldsize; i++) {
|
|
|
+ const bucket_t *bucket = &oldbuckets[i];
|
|
|
|
|
|
- } else return 0;
|
|
|
+ /* Skip the empty buckets. */
|
|
|
+ if (!bucket->key) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ LSUP_htable_insert(ht, bucket->key, bucket->val);
|
|
|
+ if ((ht->flags & HTABLE_NOCOPY) == 0) {
|
|
|
+ free(bucket->key);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (oldbuckets && oldbuckets != &ht->init_bucket) {
|
|
|
+ free(oldbuckets);
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
|
|
|
+htsize_t LSUP_htable_capacity(LSUP_HTable *ht)
|
|
|
+{ return ht->size; }
|
|
|
|
|
|
|
|
|
+htsize_t LSUP_htable_size(LSUP_HTable *ht)
|
|
|
+{ return ht->nitems; }
|
|
|
|
|
|
-#ifndef kroundup32
|
|
|
-#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
|
|
-#endif
|
|
|
|
|
|
+/*
|
|
|
+ * Insert without resizing (assuming resizing is already done).
|
|
|
+ */
|
|
|
+int LSUP_htable_insert(HTable *ht, const void *key, void *val)
|
|
|
+{
|
|
|
+ const uint32_t hash = ht->key_hash_fn(key, ht->ksize, ht->hashkey);
|
|
|
+ bucket_t *bucket, entry;
|
|
|
+ unsigned i;
|
|
|
|
|
|
-int LSUP_htable_resize(HTable *ht, ht_size_t new_n_buckets)
|
|
|
-{ /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */
|
|
|
- flags_t *new_flags = 0;
|
|
|
- ht_size_t j = 1;
|
|
|
+ ASSERT(key != NULL);
|
|
|
|
|
|
- kroundup32(new_n_buckets); // TODO make universal (not only 32-bit)
|
|
|
+ /*
|
|
|
+ * Setup the bucket entry.
|
|
|
+ */
|
|
|
+ if (ht->flags & HTABLE_NOCOPY) {
|
|
|
+ entry.key = (void *)(uintptr_t)key;
|
|
|
+ } else {
|
|
|
+ CRITICAL(entry.key = malloc(ht->ksize))
|
|
|
+ memcpy(entry.key, key, ht->ksize);
|
|
|
+ }
|
|
|
+ entry.hash = hash;
|
|
|
+ entry.val = val;
|
|
|
+ entry.psl = 0;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * From the paper: "when inserting, if a record probes a location
|
|
|
+ * that is already occupied, the record that has traveled longer
|
|
|
+ * in its probe sequence keeps the location, and the other one
|
|
|
+ * continues on its probe sequence" (page 12).
|
|
|
+ *
|
|
|
+ * Basically: if the probe sequence length (PSL) of the element
|
|
|
+ * being inserted is greater than PSL of the element in the bucket,
|
|
|
+ * then swap them and continue.
|
|
|
+ */
|
|
|
+ i = fast_rem32(hash, ht->size, ht->divinfo);
|
|
|
+
|
|
|
+ while(1) {
|
|
|
+ bucket = &ht->buckets[i];
|
|
|
+ if (bucket->key) {
|
|
|
+ ASSERT(validate_psl_p(ht, bucket, i));
|
|
|
+
|
|
|
+ // There is a key in the bucket.
|
|
|
+ if (bucket->hash == hash && ht->key_eq_fn(
|
|
|
+ bucket->key, key, ht->ksize)) {
|
|
|
+ // Duplicate key: do nothing.
|
|
|
+ if ((ht->flags & HTABLE_NOCOPY) == 0) {
|
|
|
+ free(entry.key);
|
|
|
+ }
|
|
|
+ return LSUP_NOACTION;
|
|
|
+ }
|
|
|
|
|
|
- if (new_n_buckets < 4) new_n_buckets = 4;
|
|
|
+ /*
|
|
|
+ * We found a "rich" bucket. Capture its location.
|
|
|
+ */
|
|
|
+ if (entry.psl > bucket->psl) {
|
|
|
+ bucket_t tmp;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Place our key-value pair by swapping the "rich"
|
|
|
+ * bucket with our entry. Copy the structures.
|
|
|
+ */
|
|
|
+ tmp = entry;
|
|
|
+ entry = *bucket;
|
|
|
+ *bucket = tmp;
|
|
|
+ }
|
|
|
+ entry.psl++;
|
|
|
|
|
|
- if (ht->size >= (ht_size_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) {
|
|
|
- // requested size is too small
|
|
|
- j = 0;
|
|
|
+ /* Continue to the next bucket. */
|
|
|
+ ASSERT(validate_psl_p(ht, bucket, i));
|
|
|
+ i = fast_rem32(i + 1, ht->size, ht->divinfo);
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- } else {
|
|
|
- // hash table size to be changed (shrink or expand); rehash
|
|
|
- CRITICAL(new_flags = (flags_t*)malloc(new_n_buckets * sizeof(flags_t)));
|
|
|
+ /*
|
|
|
+ * Found a free bucket: insert the entry.
|
|
|
+ */
|
|
|
+ *bucket = entry; // copy
|
|
|
+ ht->nitems++;
|
|
|
|
|
|
- memset(new_flags, 0, new_n_buckets * sizeof(flags_t));
|
|
|
+ ASSERT(validate_psl_p(ht, bucket, i));
|
|
|
|
|
|
- if (ht->n_buckets < new_n_buckets) {
|
|
|
- // Expand.
|
|
|
- CRITICAL(ht->keys = realloc(ht->keys, new_n_buckets * ht->ksize));
|
|
|
+ return LSUP_OK;
|
|
|
+}
|
|
|
|
|
|
- if (ht->vsize > 0) {
|
|
|
- // Not for hash sets.
|
|
|
- CRITICAL(ht->vals = realloc(
|
|
|
- ht->vals, new_n_buckets * ht->vsize));
|
|
|
- }
|
|
|
- }
|
|
|
+
|
|
|
+/*
|
|
|
+ * rhashmap_put: insert a value given the key.
|
|
|
+ *
|
|
|
+ * => If the key is already present, return its associated value.
|
|
|
+ * => Otherwise, on successful insert, return the given value.
|
|
|
+ */
|
|
|
+int LSUP_htable_put(HTable *ht, const void *key, void *val)
|
|
|
+{
|
|
|
+ const size_t threshold = APPROX_85_PERCENT(ht->size);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If the load factor is more than the threshold, then resize.
|
|
|
+ */
|
|
|
+ if (UNLIKELY(ht->nitems > threshold)) {
|
|
|
+ /*
|
|
|
+ * Grow the hash table by doubling its size, but with
|
|
|
+ * a limit of MAX_GROWTH_STEP.
|
|
|
+ */
|
|
|
+ const size_t grow_limit = ht->size + MAX_GROWTH_STEP;
|
|
|
+ const size_t newsize = min(ht->size << 1, grow_limit);
|
|
|
+ LSUP_htable_resize(ht, newsize);
|
|
|
}
|
|
|
|
|
|
- if (j) { /* rehashing is needed */
|
|
|
- for (j = 0; j != ht->n_buckets; ++j) {
|
|
|
- if (__ac_iseither(ht->flags, j) == 0) {
|
|
|
- khkey_t key = ht->keys[j];
|
|
|
- khval_t val;
|
|
|
- khint_t new_mask;
|
|
|
- new_mask = new_n_buckets - 1;
|
|
|
- if (ht->vsize > 0) val = ht->vals[j];
|
|
|
- __ac_set_isdel_true(ht->flags, j);
|
|
|
- while (1) { /* kick-out process; sort of like in Cuckoo hashing */
|
|
|
- khint_t k, i, step = 0;
|
|
|
- k = __hash_func(key);
|
|
|
- i = k & new_mask;
|
|
|
- while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask;
|
|
|
- __ac_set_isempty_false(new_flags, i);
|
|
|
- if (i < ht->n_buckets && __ac_iseither(ht->flags, i) == 0) { /* kick out the existing element */
|
|
|
- { khkey_t tmp = ht->keys[i]; ht->keys[i] = key; key = tmp; }
|
|
|
- if (ht->vsize > 0) { khval_t tmp = ht->vals[i]; ht->vals[i] = val; val = tmp; }
|
|
|
- __ac_set_isdel_true(ht->flags, i); /* mark it as deleted in the old hash table */
|
|
|
- } else { /* write the element and jump out of the loop */
|
|
|
- ht->keys[i] = key;
|
|
|
- if (ht->vsize > 0) ht->vals[i] = val;
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- if (ht->n_buckets > new_n_buckets) { /* shrink the hash table */
|
|
|
- ht->keys = (khkey_t*)realloc((void *)ht->keys, new_n_buckets * sizeof(khkey_t));
|
|
|
- if (ht->vsize > 0) ht->vals = (khval_t*)realloc((void *)ht->vals, new_n_buckets * sizeof(khval_t));
|
|
|
+ return LSUP_htable_insert(ht, key, val);
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+int LSUP_htable_get(const HTable *ht, const void *key, void **valp)
|
|
|
+{
|
|
|
+ const uint64_t hash = ht->key_hash_fn(key, ht->ksize, ht->hashkey);
|
|
|
+ htsize_t n = 0, i = fast_rem32(hash, ht->size, ht->divinfo);
|
|
|
+
|
|
|
+ if (key == NULL) return LSUP_VALUE_ERR;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Lookup is a linear probe.
|
|
|
+ */
|
|
|
+ while (1) {
|
|
|
+ bucket_t *bucket = &ht->buckets[i];
|
|
|
+ ASSERT(validate_psl_p(ht, bucket, i));
|
|
|
+
|
|
|
+ if (bucket->hash == hash && ht->key_eq_fn(
|
|
|
+ bucket->key, key, ht->ksize))
|
|
|
+ if (valp != NULL)
|
|
|
+ *valp = ht->vsize == 0 ? NULL : bucket->val;
|
|
|
+
|
|
|
+ return LSUP_OK;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Stop probing if we hit an empty bucket; also, if we hit a
|
|
|
+ * bucket with PSL lower than the distance from the base location,
|
|
|
+ * then it means that we found the "rich" bucket which should
|
|
|
+ * have been captured, if the key was inserted -- see the central
|
|
|
+ * point of the algorithm in the insertion function.
|
|
|
+ */
|
|
|
+ if (!bucket->key || n > bucket->psl) {
|
|
|
+ valp = NULL;
|
|
|
+ return LSUP_NORESULT;
|
|
|
}
|
|
|
- kfree(ht->flags); /* free the working space */
|
|
|
- ht->flags = new_flags;
|
|
|
- ht->n_buckets = new_n_buckets;
|
|
|
- ht->n_occupied = ht->size;
|
|
|
- ht->upper_bound = (khint_t)(ht->n_buckets * __ac_HASH_UPPER + 0.5);
|
|
|
+ n++;
|
|
|
+
|
|
|
+ /* Continue to the next bucket. */
|
|
|
+ i = fast_rem32(i + 1, ht->size, ht->divinfo);
|
|
|
}
|
|
|
- return 0;
|
|
|
}
|
|
|
-SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret)
|
|
|
+
|
|
|
+
|
|
|
+int LSUP_htable_del(HTable *ht, const void *key)
|
|
|
{
|
|
|
- khint_t x;
|
|
|
- if (h->n_occupied >= h->upper_bound) { /* update the hash table */
|
|
|
- if (h->n_buckets > (h->size<<1)) {
|
|
|
- if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */
|
|
|
- *ret = -1; return h->n_buckets;
|
|
|
- }
|
|
|
- } else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */
|
|
|
- *ret = -1; return h->n_buckets;
|
|
|
+ const size_t threshold = APPROX_40_PERCENT(ht->size);
|
|
|
+ const uint32_t hash = ht->key_hash_fn(key, ht->ksize, ht->hashkey);
|
|
|
+ unsigned n = 0, i = fast_rem32(hash, ht->size, ht->divinfo);
|
|
|
+ bucket_t *bucket;
|
|
|
+
|
|
|
+ ASSERT(key != NULL);
|
|
|
+
|
|
|
+ while(1) {
|
|
|
+ /*
|
|
|
+ * The same probing logic as in the lookup function.
|
|
|
+ */
|
|
|
+ bucket = &ht->buckets[i];
|
|
|
+ if (!bucket->key || n > bucket->psl) {
|
|
|
+ return LSUP_NOACTION;
|
|
|
}
|
|
|
- printf("capacity now %d\n\n", h->n_buckets);
|
|
|
- } /* TODO: to implement automatically shrinking; resize() already support shrinking */
|
|
|
- {
|
|
|
- khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0;
|
|
|
- x = site = h->n_buckets; k = __hash_func(key); i = k & mask;
|
|
|
- if (__ac_isempty(h->flags, i)) x = i; /* for speed up */
|
|
|
- else {
|
|
|
- last = i;
|
|
|
- printf("Duplicate\n");
|
|
|
- printf("Keys[i]: %lx\n", (size_t)h->keys[i]);
|
|
|
- printf("Key: %lx\n", (size_t)key);
|
|
|
- while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) {
|
|
|
- if (__ac_isdel(h->flags, i)) site = i;
|
|
|
- i = (i + (++step)) & mask;
|
|
|
- if (i == last) { x = site; break; }
|
|
|
- }
|
|
|
- if (x == h->n_buckets) {
|
|
|
- if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site;
|
|
|
- else x = i;
|
|
|
- }
|
|
|
+ ASSERT(validate_psl_p(ht, bucket, i));
|
|
|
+
|
|
|
+ if (
|
|
|
+ bucket->hash != hash ||
|
|
|
+ ht->key_eq_fn(bucket->key, key, ht->ksize)) {
|
|
|
+ /* Continue to the next bucket. */
|
|
|
+ i = fast_rem32(i + 1, ht->size, ht->divinfo);
|
|
|
+ n++;
|
|
|
}
|
|
|
}
|
|
|
- if (__ac_isempty(h->flags, x)) { /* not present at all */
|
|
|
- h->keys[x] = key;
|
|
|
- __ac_set_isboth_false(h->flags, x);
|
|
|
- ++h->size; ++h->n_occupied;
|
|
|
- *ret = 1;
|
|
|
- } else if (__ac_isdel(h->flags, x)) { /* deleted */
|
|
|
- h->keys[x] = key;
|
|
|
- __ac_set_isboth_false(h->flags, x);
|
|
|
- ++h->size;
|
|
|
- *ret = 2;
|
|
|
- } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */
|
|
|
- return x;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Free the bucket.
|
|
|
+ */
|
|
|
+ if ((ht->flags & HTABLE_NOCOPY) == 0) {
|
|
|
+ free(bucket->key);
|
|
|
+ }
|
|
|
+ ht->nitems--;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The probe sequence must be preserved in the deletion case.
|
|
|
+ * Use the backwards-shifting method to maintain low variance.
|
|
|
+ */
|
|
|
+ while(1) {
|
|
|
+ bucket_t *nbucket;
|
|
|
+
|
|
|
+ bucket->key = NULL;
|
|
|
+
|
|
|
+ i = fast_rem32(i + 1, ht->size, ht->divinfo);
|
|
|
+ nbucket = &ht->buckets[i];
|
|
|
+ ASSERT(validate_psl_p(ht, nbucket, i));
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Stop if we reach an empty bucket or hit a key which
|
|
|
+ * is in its base (original) location.
|
|
|
+ */
|
|
|
+ if (!nbucket->key || nbucket->psl == 0) {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ nbucket->psl--;
|
|
|
+ *bucket = *nbucket;
|
|
|
+ bucket = nbucket;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If the load factor is less than threshold, then shrink by
|
|
|
+ * halving the size, but not less than 1.
|
|
|
+ */
|
|
|
+ if (ht->nitems < threshold) {
|
|
|
+ size_t newsize = max(ht->size >> 1, 1);
|
|
|
+ (void)LSUP_htable_resize(ht, newsize);
|
|
|
+ }
|
|
|
+
|
|
|
+ return LSUP_OK;
|
|
|
}
|
|
|
-SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x)
|
|
|
+
|
|
|
+
|
|
|
+extern int LSUP_htable_iter(
|
|
|
+ LSUP_HTable *ht, htsize_t *cur, void *key, void **valp)
|
|
|
{
|
|
|
- if (x != h->n_buckets && !__ac_iseither(h->flags, x)) {
|
|
|
- __ac_set_isdel_true(h->flags, x);
|
|
|
- --h->size;
|
|
|
+ while (*cur < ht->size) {
|
|
|
+ bucket_t *bucket = &ht->buckets[*cur];
|
|
|
+
|
|
|
+ *cur++;
|
|
|
+
|
|
|
+ if (!bucket->key) continue;
|
|
|
+
|
|
|
+ memcpy(key, bucket->key, ht->ksize);
|
|
|
+ if (valp != NULL && ht->vsize > 0) *valp = bucket->val;
|
|
|
+
|
|
|
+ return LSUP_OK;
|
|
|
}
|
|
|
+ return LSUP_END;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- printf("Not empty: %d\n", !__ac_isempty(h->flags, i));
|
|
|
- printf("deleted: %d\n", __ac_isdel(h->flags, i));
|
|
|
- printf("Hash does not match: %d\n\n", !__hash_equal(h->keys[i], key));
|
|
|
-*/
|
|
|
-#define KHASH_DECLARE(name, khkey_t, khval_t)
|
|
|
-__KHASH_TYPE(name, khkey_t, khval_t)
|
|
|
-__KHASH_PROTOTYPES(name, khkey_t, khval_t)
|
|
|
-
|
|
|
-#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
|
|
|
-__KHASH_TYPE(name, khkey_t, khval_t)
|
|
|
-__KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
|
|
|
-
|
|
|
-#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
|
|
|
-KHASH_INIT2(name, static kh_inline klib_unused, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
|
|
|
-
|
|
|
-/* --- BEGIN OF HASH FUNCTIONS --- */
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Integer hash function
|
|
|
-@param key The integer [ht_size_t]
|
|
|
-@return The hash value [khint_t]
|
|
|
-*/
|
|
|
-#define kh_int_hash_func(key) (ht_size_t)(key)
|
|
|
-/*! @function
|
|
|
-@abstract Integer comparison function
|
|
|
-*/
|
|
|
-#define kh_int_hash_equal(a, b) ((a) == (b))
|
|
|
-/*! @function
|
|
|
-@abstract 64-bit integer hash function
|
|
|
-@param key The integer [khint64_t]
|
|
|
-@return The hash value [khint_t]
|
|
|
-*/
|
|
|
-#define kh_int64_hash_func(key) (ht_size_t)((key)>>33^(key)^(key)<<11)
|
|
|
-/*! @function
|
|
|
-@abstract 64-bit integer comparison function
|
|
|
-*/
|
|
|
-#define kh_int64_hash_equal(a, b) ((a) == (b))
|
|
|
-/*! @function
|
|
|
-@abstract const char* hash function
|
|
|
-@param s Pointer to a null terminated string
|
|
|
-@return The hash value
|
|
|
-*/
|
|
|
-static kh_inline khint_t __ac_X31_hash_string(const char *s)
|
|
|
+
|
|
|
+void LSUP_htable_done(HTable *ht)
|
|
|
{
|
|
|
-khint_t h = (khint_t)*s;
|
|
|
-if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s;
|
|
|
-return h;
|
|
|
+ if ((ht->flags & HTABLE_NOCOPY) == 0) {
|
|
|
+ for (htsize_t i = 0; i < ht->size; i++) {
|
|
|
+ const bucket_t *bucket = &ht->buckets[i];
|
|
|
+
|
|
|
+ if (bucket->key) {
|
|
|
+ free(bucket->key);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (ht->buckets != &ht->init_bucket) {
|
|
|
+ free(ht->buckets);
|
|
|
+ }
|
|
|
}
|
|
|
-/*! @function
|
|
|
-@abstract Another interface to const char* hash function
|
|
|
-@param key Pointer to a null terminated string [const char*]
|
|
|
-@return The hash value [khint_t]
|
|
|
-*/
|
|
|
-#define kh_str_hash_func(key) __ac_X31_hash_string(key)
|
|
|
-/*! @function
|
|
|
-@abstract Const char* comparison function
|
|
|
-*/
|
|
|
-#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
|
|
|
-
|
|
|
-static kh_inline khint_t __ac_Wang_hash(khint_t key)
|
|
|
+
|
|
|
+
|
|
|
+void LSUP_htable_free(HTable *ht)
|
|
|
{
|
|
|
-key += ~(key << 15);
|
|
|
-key ^= (key >> 10);
|
|
|
-key += (key << 3);
|
|
|
-key ^= (key >> 6);
|
|
|
-key += ~(key << 11);
|
|
|
-key ^= (key >> 16);
|
|
|
-return key;
|
|
|
+ if(LIKELY(ht != NULL)) {
|
|
|
+ LSUP_htable_done(ht);
|
|
|
+ free(ht);
|
|
|
+ }
|
|
|
}
|
|
|
-#define kh_int_hash_func2(key) __ac_Wang_hash((khint_t)key)
|
|
|
-
|
|
|
-/* --- END OF HASH FUNCTIONS --- */
|
|
|
-
|
|
|
-/* Other convenient macros... */
|
|
|
-
|
|
|
-/*!
|
|
|
-@abstract Type of the hash table.
|
|
|
-@param name Name of the hash table [symbol]
|
|
|
-*/
|
|
|
-#define khash_t(name) kh_##name##_t
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Initiate a hash table.
|
|
|
-@param name Name of the hash table [symbol]
|
|
|
-@return Pointer to the hash table [khash_t(name)*]
|
|
|
-*/
|
|
|
-#define kh_init(name) kh_init_##name()
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Destroy a hash table.
|
|
|
-@param name Name of the hash table [symbol]
|
|
|
-@param h Pointer to the hash table [khash_t(name)*]
|
|
|
-*/
|
|
|
-#define kh_destroy(name, h) kh_destroy_##name(h)
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Reset a hash table without deallocating memory.
|
|
|
-@param name Name of the hash table [symbol]
|
|
|
-@param h Pointer to the hash table [khash_t(name)*]
|
|
|
-*/
|
|
|
-#define kh_clear(name, h) kh_clear_##name(h)
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Resize a hash table.
|
|
|
-@param name Name of the hash table [symbol]
|
|
|
-@param h Pointer to the hash table [khash_t(name)*]
|
|
|
-@param s New size [khint_t]
|
|
|
-*/
|
|
|
-#define kh_resize(name, h, s) kh_resize_##name(h, s)
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Insert a key to the hash table.
|
|
|
-@param name Name of the hash table [symbol]
|
|
|
-@param h Pointer to the hash table [khash_t(name)*]
|
|
|
-@param k Key [type of keys]
|
|
|
-@param r Extra return code: -1 if the operation failed;
|
|
|
- 0 if the key is present in the hash table;
|
|
|
- 1 if the bucket is empty (never used); 2 if the element in
|
|
|
- the bucket has been deleted [int*]
|
|
|
-@return Iterator to the inserted element [khint_t]
|
|
|
-*/
|
|
|
-#define kh_put(name, h, k, r) kh_put_##name(h, k, r)
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Retrieve a key from the hash table.
|
|
|
-@param name Name of the hash table [symbol]
|
|
|
-@param h Pointer to the hash table [khash_t(name)*]
|
|
|
-@param k Key [type of keys]
|
|
|
-@return Iterator to the found element, or kh_end(h) if the element is absent [khint_t]
|
|
|
-*/
|
|
|
-#define kh_get(name, h, k) kh_get_##name(h, k)
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Remove a key from the hash table.
|
|
|
-@param name Name of the hash table [symbol]
|
|
|
-@param h Pointer to the hash table [khash_t(name)*]
|
|
|
-@param k Iterator to the element to be deleted [khint_t]
|
|
|
-*/
|
|
|
-#define kh_del(name, h, k) kh_del_##name(h, k)
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Test whether a bucket contains data.
|
|
|
-@param h Pointer to the hash table [khash_t(name)*]
|
|
|
-@param x Iterator to the bucket [khint_t]
|
|
|
-@return 1 if containing data; 0 otherwise [int]
|
|
|
-*/
|
|
|
-#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Get key given an iterator
|
|
|
-@param h Pointer to the hash table [khash_t(name)*]
|
|
|
-@param x Iterator to the bucket [khint_t]
|
|
|
-@return Key [type of keys]
|
|
|
-*/
|
|
|
-#define kh_key(h, x) ((h)->keys[x])
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Get value given an iterator
|
|
|
-@param h Pointer to the hash table [khash_t(name)*]
|
|
|
-@param x Iterator to the bucket [khint_t]
|
|
|
-@return Value [type of values]
|
|
|
-@discussion For hash sets, calling this results in segfault.
|
|
|
-*/
|
|
|
-#define kh_val(h, x) ((h)->vals[x])
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Alias of kh_val()
|
|
|
-*/
|
|
|
-#define kh_value(h, x) ((h)->vals[x])
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Get the start iterator
|
|
|
-@param h Pointer to the hash table [khash_t(name)*]
|
|
|
-@return The start iterator [khint_t]
|
|
|
-*/
|
|
|
-#define kh_begin(h) (khint_t)(0)
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Get the end iterator
|
|
|
-@param h Pointer to the hash table [khash_t(name)*]
|
|
|
-@return The end iterator [khint_t]
|
|
|
-*/
|
|
|
-#define kh_end(h) ((h)->n_buckets)
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Get the number of elements in the hash table
|
|
|
-@param h Pointer to the hash table [khash_t(name)*]
|
|
|
-@return Number of elements in the hash table [khint_t]
|
|
|
-*/
|
|
|
-#define kh_size(h) ((h)->size)
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Get the number of buckets in the hash table
|
|
|
-@param h Pointer to the hash table [khash_t(name)*]
|
|
|
-@return Number of buckets in the hash table [khint_t]
|
|
|
-*/
|
|
|
-#define kh_n_buckets(h) ((h)->n_buckets)
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Iterate over the entries in the hash table
|
|
|
-@param h Pointer to the hash table [khash_t(name)*]
|
|
|
-@param kvar Variable to which key will be assigned
|
|
|
-@param vvar Variable to which value will be assigned
|
|
|
-@param code Block of code to execute
|
|
|
-*/
|
|
|
-#define kh_foreach(h, kvar, vvar, code) { khint_t __i;
|
|
|
-for (__i = kh_begin(h); __i != kh_end(h); ++__i) {
|
|
|
- if (!kh_exist(h,__i)) continue;
|
|
|
- (kvar) = kh_key(h,__i);
|
|
|
- (vvar) = kh_val(h,__i);
|
|
|
- code;
|
|
|
-} }
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Iterate over the values in the hash table
|
|
|
-@param h Pointer to the hash table [khash_t(name)*]
|
|
|
-@param vvar Variable to which value will be assigned
|
|
|
-@param code Block of code to execute
|
|
|
-*/
|
|
|
-#define kh_foreach_value(h, vvar, code) { khint_t __i;
|
|
|
-for (__i = kh_begin(h); __i != kh_end(h); ++__i) {
|
|
|
- if (!kh_exist(h,__i)) continue;
|
|
|
- (vvar) = kh_val(h,__i);
|
|
|
- code;
|
|
|
-} }
|
|
|
-
|
|
|
-/* More convenient interfaces */
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Instantiate a hash set containing integer keys
|
|
|
-@param name Name of the hash table [symbol]
|
|
|
-*/
|
|
|
-#define KHASH_SET_INIT_INT(name)
|
|
|
-KHASH_INIT(name, ht_size_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Instantiate a hash map containing integer keys
|
|
|
-@param name Name of the hash table [symbol]
|
|
|
-@param khval_t Type of values [type]
|
|
|
-*/
|
|
|
-#define KHASH_MAP_INIT_INT(name, khval_t)
|
|
|
-KHASH_INIT(name, ht_size_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Instantiate a hash set containing 64-bit integer keys
|
|
|
-@param name Name of the hash table [symbol]
|
|
|
-*/
|
|
|
-#define KHASH_SET_INIT_INT64(name)
|
|
|
-KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Instantiate a hash map containing 64-bit integer keys
|
|
|
-@param name Name of the hash table [symbol]
|
|
|
-@param khval_t Type of values [type]
|
|
|
-*/
|
|
|
-#define KHASH_MAP_INIT_INT64(name, khval_t)
|
|
|
-KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
|
|
|
-
|
|
|
-typedef const char *kh_cstr_t;
|
|
|
-/*! @function
|
|
|
-@abstract Instantiate a hash map containing const char* keys
|
|
|
-@param name Name of the hash table [symbol]
|
|
|
-*/
|
|
|
-#define KHASH_SET_INIT_STR(name)
|
|
|
-KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
|
|
|
-
|
|
|
-/*! @function
|
|
|
-@abstract Instantiate a hash map containing const char* keys
|
|
|
-@param name Name of the hash table [symbol]
|
|
|
-@param khval_t Type of values [type]
|
|
|
-*/
|
|
|
-#define KHASH_MAP_INIT_STR(name, khval_t)
|
|
|
-KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
|
|
|
-
|