#include #include #include #include #include #include "htable.h" #define BUCKET_EMPTY 1 << 0 #define BUCKET_DELETED 1 << 1 #if defined(DEBUG) #define ASSERT assert #else #define ASSERT(x) #endif #define MAX_GROWTH_STEP (1024U * 1024) #define APPROX_85_PERCENT(x) (((x) * 870) >> 10) #define APPROX_40_PERCENT(x) (((x) * 409) >> 10) #define MIN_HT_SIZE 1 << 3 typedef struct { LSUP_TripleKey key; // TODO Make configurable but // statically allocated via macros void * val; uint64_t hash; uint16_t psl; } bucket_t; typedef struct htable_t { htsize_t size; htsize_t nitems; unsigned flags; uint64_t divinfo; bucket_t * buckets; uint64_t seed; key_hash_fn_t key_hash_fn; key_eq_fn_t key_eq_fn; ksize_t ksize; vsize_t vsize; void * del_marker; // Used to fill deleted buckets. } HTable; /* * * GENERIC UTILITIES * * */ static inline bool is_empty_bucket(const HTable *ht, const bucket_t *bucket) { return memcmp(bucket->key, ht->del_marker, ht->ksize) == 0; } /* * Find first bit. */ static inline int fls(int x) { return x ? (sizeof(int) * CHAR_BIT) - __builtin_clz(x) : 0; } /* * Fast 32bit division and remainder. * * Reference: * * Torbjörn Granlund and Peter L. Montgomery, "Division by Invariant * Integers Using Multiplication", ACM SIGPLAN Notices, Issue 6, Vol 29, * http://gmplib.org/~tege/divcnst-pldi94.pdf, 61-72, June 1994. * * The following example computes q = a / b and r = a % b: * * uint64_t divinfo = fast_div32_init(b); * q = fast_div32(a, b, divinfo); * r = fast_rem32(a, b, divinfo); */ static inline uint64_t fast_div32_init(uint32_t div) { uint64_t mt; uint8_t s1, s2; int l; l = fls(div - 1); mt = (uint64_t)(0x100000000ULL * ((1ULL << l) - div)); s1 = (l > 1) ? 1U : (uint8_t)l; s2 = (l == 0) ? 0 : (uint8_t)(l - 1); return (uint64_t)(mt / div + 1) << 32 | (uint32_t)s1 << 8 | s2; } static inline uint32_t fast_div32(uint32_t v, uint32_t div, uint64_t divinfo) { const uint32_t m = divinfo >> 32; const unsigned s1 = (divinfo & 0x0000ff00) >> 8; const unsigned s2 = (divinfo & 0x000000ff); const uint32_t t = (uint32_t)(((uint64_t)v * m) >> 32); (void)div; // unused return (t + ((v - t) >> s1)) >> s2; } static inline uint32_t fast_rem32(uint32_t v, uint32_t div, uint64_t divinfo) { return v - div * fast_div32(v, div, divinfo); } static int __attribute__((__unused__)) //static int validate_psl_p(const HTable *ht, const bucket_t *bucket, unsigned i) { unsigned base_i = fast_rem32(bucket->hash, ht->size, ht->divinfo); unsigned diff = (base_i > i) ? ht->size - base_i + i : i - base_i; return is_empty_bucket(ht, bucket) || diff == bucket->psl; } /* * * PUBLIC API * * */ HTable *LSUP_htable_new( htsize_t size, ksize_t ksize, vsize_t vsize, key_hash_fn_t key_hash_fn, key_eq_fn_t key_eq_fn, unsigned flags) { HTable *ht; CRITICAL(ht = calloc(1, sizeof(HTable))); ht->ksize = ksize; ht->vsize = vsize; ht->key_hash_fn = key_hash_fn; ht->key_eq_fn = key_eq_fn; ht->flags = flags; ht->size = 0; CRITICAL(ht->del_marker = calloc(1, ksize)); LSUP_htable_resize(ht, size); return ht; } /** * Resize a table. */ int LSUP_htable_resize(HTable *ht, htsize_t newsize) { TRACE("Resizing htable to %lu.", (size_t)newsize); bucket_t *oldbuckets = ht->buckets; const htsize_t oldsize = ht->size; // Clip size to min & max limits. if (newsize < MIN_HT_SIZE) newsize = MIN_HT_SIZE; if (newsize > HTSIZE_MAX) newsize = HTSIZE_MAX; CRITICAL(ht->buckets = calloc(newsize, sizeof(bucket_t))); ht->size = newsize; ht->nitems = 0; ht->divinfo = fast_div32_init(newsize); ht->seed ^= random() | (random() << 32); for (unsigned i = 0; i < oldsize; i++) { const bucket_t *bucket = &oldbuckets[i]; /* Skip the empty buckets. */ if (!is_empty_bucket(ht, bucket)) LSUP_htable_insert(ht, bucket->key, bucket->val); } if (oldbuckets != NULL) free(oldbuckets); return LSUP_OK; } htsize_t LSUP_htable_capacity(LSUP_HTable *ht) { return ht->size; } htsize_t LSUP_htable_size(LSUP_HTable *ht) { return ht->nitems; } /* * Insert without resizing (assuming resizing is already done). */ int LSUP_htable_insert(HTable *ht, const void *key, void *val) { bucket_t *bucket, entry; ASSERT(key != NULL); /* * Setup the bucket entry. */ memcpy(entry.key, key, ht->ksize); //memcpy(entry.val, val, ht->vsize); entry.val = val; entry.hash = ht->key_hash_fn(entry.key, ht->ksize, ht->seed); entry.psl = 0; /* * From the paper: "when inserting, if a record probes a location * that is already occupied, the record that has traveled longer * in its probe sequence keeps the location, and the other one * continues on its probe sequence" (page 12). * * Basically: if the probe sequence length (PSL) of the element * being inserted is greater than PSL of the element in the bucket, * then swap them and continue. */ htsize_t i = fast_rem32(entry.hash, ht->size, ht->divinfo); for(;;) { bucket = ht->buckets + i; if(is_empty_bucket(ht, ht->buckets + i)) break; ASSERT(validate_psl_p(ht, bucket, i)); // There is a key in the bucket. TRACE("Entry key: {%lu, %lu, %lu}; bucket key: {%lu, %lu, %lu}", entry.key[0], entry.key[1], entry.key[2], bucket->key[0], bucket->key[1], bucket->key[2]); if (ht->key_eq_fn(bucket->key, entry.key, ht->ksize)) { // Duplicate key: do nothing. TRACE(STR, "Duplicate key."); return LSUP_NOACTION; } /* * We found a "rich" bucket. Capture its location. */ if (entry.psl > bucket->psl) { //TRACE("Entry PSL: %d; Bucket PSL: %d", entry.psl, bucket->psl); bucket_t tmp; TRACE(STR, "SWAP"); /* * Place our key-value pair by swapping the "rich" * bucket with our entry. Copy the structures. */ tmp = entry; entry = *bucket; *bucket = tmp; } entry.psl++; /* Continue to the next bucket. */ ASSERT(validate_psl_p(ht, bucket, i)); i = fast_rem32(i + 1, ht->size, ht->divinfo); } /* * Found a free bucket: insert the entry. */ TRACE("Inserting {%lu, %lu, %lu} in bucket #%d", entry.key[0], entry.key[1], entry.key[2], i); //*bucket = entry; // copy memcpy(bucket, &entry, sizeof(bucket_t)); // copy ht->nitems++; ASSERT(validate_psl_p(ht, bucket, i)); return LSUP_OK; } /* * rhashmap_put: insert a value given the key. * * => If the key is already present, return its associated value. * => Otherwise, on successful insert, return the given value. */ int LSUP_htable_put(HTable *ht, const void *key, void *val) { const size_t threshold = APPROX_85_PERCENT(ht->size); /* * If the load factor is more than the threshold, then resize. */ if (UNLIKELY(ht->nitems > threshold)) { /* * Grow the hash table by doubling its size, but with * a limit of MAX_GROWTH_STEP. */ const size_t grow_limit = ht->size + MAX_GROWTH_STEP; const size_t newsize = min(ht->size << 1, grow_limit); LSUP_htable_resize(ht, newsize); } return LSUP_htable_insert(ht, key, val); } int LSUP_htable_get(const HTable *ht, const void *key, void **valp) { const uint64_t hash = ht->key_hash_fn(key, ht->ksize, ht->seed); htsize_t n = 0, i = fast_rem32(hash, ht->size, ht->divinfo); if (key == NULL) return LSUP_VALUE_ERR; /* * Lookup is a linear probe. */ for(;;) { bucket_t *bucket = ht->buckets + i; ASSERT(validate_psl_p(ht, bucket, i)); if (ht->key_eq_fn(bucket->key, key, ht->ksize)) { // Key found within max probe length. if (valp != NULL) *valp = bucket->val; return LSUP_OK; } /* * Stop probing if we hit an empty bucket; also, if we hit a * bucket with PSL lower than the distance from the base location, * then it means that we found the "rich" bucket which should * have been captured, if the key was inserted -- see the central * point of the algorithm in the insertion function. */ if (is_empty_bucket(ht, bucket) || n > bucket->psl) { valp = NULL; return LSUP_NORESULT; } n++; /* Continue to the next bucket. */ i = fast_rem32(i + 1, ht->size, ht->divinfo); } } int LSUP_htable_del(HTable *ht, const void *key) { const size_t threshold = APPROX_40_PERCENT(ht->size); const uint32_t hash = ht->key_hash_fn(key, ht->ksize, ht->seed); unsigned n = 0, i = fast_rem32(hash, ht->size, ht->divinfo); bucket_t *bucket; ASSERT(key != NULL); for(;;) { /* * The same probing logic as in the lookup function. */ bucket_t *bucket = ht->buckets + i; if (is_empty_bucket(ht, bucket) || n > bucket->psl) return LSUP_NOACTION; ASSERT(validate_psl_p(ht, bucket, i)); if (!ht->key_eq_fn(bucket->key, key, ht->ksize)) { /* Continue to the next bucket. */ i = fast_rem32(i + 1, ht->size, ht->divinfo); n++; } } ht->nitems--; /* * The probe sequence must be preserved in the deletion case. * Use the backwards-shifting method to maintain low variance. */ while(1) { bucket_t *nbucket; memcpy(bucket->key, ht->del_marker, ht->ksize); i = fast_rem32(i + 1, ht->size, ht->divinfo); nbucket = ht->buckets + i; ASSERT(validate_psl_p(ht, nbucket, i)); /* * Stop if we reach an empty bucket or hit a key which * is in its base (original) location. */ if (is_empty_bucket(ht, nbucket) || nbucket->psl == 0) break; nbucket->psl--; *bucket = *nbucket; bucket = nbucket; } /* * If the load factor is less than threshold, then shrink by * halving the size, but not less than 1. */ if (ht->nitems < threshold) { size_t newsize = max(ht->size >> 1, 1); (void)LSUP_htable_resize(ht, newsize); } return LSUP_OK; } extern int LSUP_htable_iter( LSUP_HTable *ht, htsize_t *cur, void **keyp, void **valp) { while (*cur < ht->size) { bucket_t *bucket = ht->buckets + *cur; (*cur)++; if (is_empty_bucket(ht, bucket)) { TRACE("Empty bucket: %d. Skipping.", (*cur) - 1); continue; } // Copy key, and if relevant, value. *keyp = bucket->key; if (valp != NULL && ht->vsize > 0) *valp = bucket->val; return LSUP_OK; } return LSUP_END; } void LSUP_htable_done(HTable *ht) { if(LIKELY(ht->buckets != NULL)) free(ht->buckets); free(ht->del_marker); } void LSUP_htable_free(HTable *ht) { if(LIKELY(ht != NULL)) { LSUP_htable_done(ht); free(ht); } }