Selaa lähdekoodia

Remove htable.

Stefano Cossu 4 vuotta sitten
vanhempi
commit
3f8d9b4690
2 muutettua tiedostoa jossa 0 lisäystä ja 663 poistoa
  1. 0 190
      include/htable.h
  2. 0 473
      src/htable.c

+ 0 - 190
include/htable.h

@@ -1,190 +0,0 @@
-/**
- * Hash table implementation.
- *
- * This code is hack...ahem, built upon rhashmap:
- * https://github.com/rmind/rhashmap
- *
- * After trying several hash map implementations, none met all the requirements
- * (small, single-file; accept arbitrarily-sized elements; no undebuggable
- * macro spaghetti; reasonably fast), so I decided to expand an existing
- * library and adapt it to a data type agnostic model.
- *
- * This table stores keys and optionally values in a contiguous array of
- * arbitrary, but fixed, data sizes. For small keys / values of unusual size,
- * this is convenient because it avoids creating (and having to manage) a
- * pointer for each key and value. The data types are set by casting on
- * retrieval.
- *
- * For larger or variably-sized keys or values, or ones that are not convenient
- * to copy into the table, pointers can obviously be used by specifying ptr_t
- * key and/or value size.
- */
-
-#ifndef _LSUP_HTABLE_H
-#define _LSUP_HTABLE_H
-
-#include "core.h"
-
-/* Max number of entries in the table and hash size. */
-
-/*
- * This allows a table size limited to size_t, which is probably much more than
- * any current system would want to handle in memory.
- */
-#if defined(HTABLE_HUGE_SIZE)
-typedef size_t ht_hash_t;
-typedef size_t htsize_t;
-#define HTSIZE_MAX SIZE_MAX
-
-/*
- * This allows max UINT_MAX entries (4,294,967,295) and a large hash size to
- * take full advantage of a very large table.
- */
-#elif defined(HTABLE_BIG_SIZE)
-typedef size_t ht_hash_t;
-typedef uint32_t htsize_t;
-#define HTSIZE_MAX UINT32_MAX
-
-/*
- * This allows max UINT_MAX entries but the hash size is smaller, thus it is
- * only recommended for up to a few million entries.
- */
-#else
-typedef uint32_t ht_hash_t;
-typedef uint32_t htsize_t;
-#define HTSIZE_MAX UINT32_MAX
-#endif
-
-// Size of key entries. With HTABLE_BIG_KEY it is 65535 (64Kb). Otherwise,
-// it is 256 bytes.
-#ifdef HTABLE_BIG_KEY
-typedef uint16_t ksize_t;
-#else
-typedef uint8_t ksize_t;
-#endif
-
-// Size of value entries. With HTABLE_BIG_VAL it is 65535 (64Kb). Otherwise,
-// it is 256 bytes. For values that may be larger than 64 Kb, use pointers.
-#ifdef HTABLE_BIG_VAL
-typedef uint16_t vsize_t;
-#else
-typedef uint8_t vsize_t;
-#endif
-
-
-/**
- * Key hashing function.
- *
- * Takes a void pointer, a key length and a seed.
- */
-typedef uint64_t (*key_hash_fn_t)(
-        const void *key, ksize_t size, uint64_t seed);
-
-/**
- * Key equality function (true: keys are equal).
- *
- * Takes two void pointers and a key length (which is constant within the
- * hash table).
- */
-typedef bool (*key_eq_fn_t)(const void *a, const void *b, ksize_t size);
-
-/**
- * Hash table type.
- *
- * By default it should keep a good performance up to a few million entries
- * due to its small hash size.
- *
- * If compiled with -DHTABLE_BIG_SIZE it supports up to UINT_MAX entries (~4
- * billions on most modern machines) for very large in-memory graphs.
- *
- * If compiled with -DHTABLE_HUGE_SIZE it supports up to SIZE_MAX entries
- * (probably more then you will ever want to load in memory).
- */
-typedef struct htable_t LSUP_HTable;
-
-LSUP_rc
-LSUP_htable_new(
-        htsize_t size, ksize_t ksize, vsize_t vsize,
-        key_hash_fn_t key_hash_fn, key_eq_fn_t key_eq_fn, LSUP_HTable **ht);
-
-LSUP_rc
-LSUP_htable_copy(const LSUP_HTable *src, LSUP_HTable **dest);
-
-LSUP_rc
-LSUP_htable_resize(LSUP_HTable *ht, htsize_t newsize);
-
-htsize_t
-LSUP_htable_capacity(LSUP_HTable *ht);
-
-htsize_t
-LSUP_htable_size(LSUP_HTable *ht);
-
-LSUP_rc
-LSUP_htable_insert(LSUP_HTable *ht, const void *key, const void *val);
-
-LSUP_rc
-LSUP_htable_put(LSUP_HTable *ht, const void *key, const void *val);
-
-/**
- * @brief Test the existence of a given key and find its value.
- *
- * @param LSUP_HTable ht[in]: Hash table or set.
- *
- * @param const void *key[in]: Key to look up.
- *
- * @param void *val[out]: Pointer to be set to the address of the value found
- * at the key address, if any. The memory pointed to is owned by the hash
- * table. If NULL is passed, or if the hash table is a set, the value is never
- * populated.
- *
- * @return int: LSUP_OK if the key is found; LSUP_NORESULT if the key is not
- *  found; a negative value on error.
- */
-LSUP_rc
-LSUP_htable_get(const LSUP_HTable *ht, const void *key, void **val);
-
-/*
- * Remove the given key.
- *
- * @param LSUP_HTable ht[in]: Hash table or set.
- *
- * @param const void *key[in]: Key to remove.
- *
- * @return int: LSUP_OK if the key was removed; LSUP_NOACTION if it was not
- *  found.
- *
- */
-LSUP_rc
-LSUP_htable_remove(LSUP_HTable *ht, const void *key);
-
-/**
- * Iterate over a hashmap or set.
- *
- * @param LSUP_HTable ht[in]: Hash table or set.
- *
- * @param htsize_t *cur[in]: an integer used as a cursor. Each successful
- *  iteration of the function increases this value by 1. So the correct use
- *  for this is to initialize a htsize_t variable to zero and passing its
- *  pointer in a loop until necessary.
- *
- * @param void *key[out]: Pointer to be populated with the next key found.
- *
- * @param void **valp[out]: Pointer to the found value address. This can be
- *  used as a normal lvalue. It may be NULL for sets or if the value is not
- *  needed.
- *
- * @return int: LSUP_OK if the key is found; LSUP_END if the end of the data
- *  is reached.
- */
-LSUP_rc
-LSUP_htable_iter(LSUP_HTable *ht, htsize_t *cur, void **keyp, void **valp);
-
-/*
- * Free the memory used by the hash table.
- *
- * It is the responsibility of the caller to free data pointed to if pointers
- * were used for keys or values.
- */
-void LSUP_htable_free(LSUP_HTable *ht);
-
-#endif

+ 0 - 473
src/htable.c

@@ -1,473 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include <assert.h>
-
-#include "htable.h"
-
-
-#define BUCKET_EMPTY        1 << 0
-#define BUCKET_DELETED      1 << 1
-
-#if defined(DEBUG)
-#define    ASSERT        assert
-#else
-#define    ASSERT(x)
-#endif
-
-#define    MAX_GROWTH_STEP          (1024U * 1024)
-
-#define    APPROX_85_PERCENT(x)     (((x) * 870) >> 10)
-#define    APPROX_40_PERCENT(x)     (((x) * 409) >> 10)
-
-#define MIN_HT_SIZE         1 << 3
-
-/** @brief Bucket structure.
- *
- * This struct does only contain bucket metadata which are of predictable size.
- */
-typedef struct {
-    ht_hash_t       hash;
-    uint16_t        psl;
-} bucket_t;
-
-typedef struct htable_t {
-    htsize_t        size;
-    htsize_t        nitems;
-    uint64_t        divinfo;
-
-    bucket_t *      buckets;
-    void *          data;
-    uint64_t        seed;
-
-    key_hash_fn_t   key_hash_fn;
-    key_eq_fn_t     key_eq_fn;
-
-    ksize_t         ksize;
-    vsize_t         vsize;
-} HTable;
-
-// Fill and compare empty buckets.
-static const unsigned char del_marker[sizeof(ksize_t)] = {0};
-
-/** @brief Access bucket key address by bucket index.
- *
- * data is the address of the raw data array, i the bucket index.
- */
-#define HT_KEY(data, i) data + ((ht->ksize + ht->vsize) * i)
-
-/** @brief Access bucket value address by bucket index.
- *
- * data is the address of the raw data array, i the bucket index.
- */
-#define HT_VAL(data, i) data + ((ht->ksize + ht->vsize) * i) + ht->ksize
-
-/* * * GENERIC UTILITIES * * */
-
-static inline bool is_empty_bucket(const HTable *ht, htsize_t i)
-{ return memcmp(HT_KEY(ht, i), del_marker, ht->ksize) == 0; }
-
-/*
- * Find first bit.
- */
-static inline int fls(int x)
-{ return x ? (sizeof(int) * CHAR_BIT) - __builtin_clz(x) : 0; }
-
-
-/*
- * Fast 32bit division and remainder.
- *
- * Reference:
- *
- *    Torbjörn Granlund and Peter L. Montgomery, "Division by Invariant
- *    Integers Using Multiplication", ACM SIGPLAN Notices, Issue 6, Vol 29,
- *    http://gmplib.org/~tege/divcnst-pldi94.pdf, 61-72, June 1994.
- *
- * The following example computes q = a / b and r = a % b:
- *
- *    uint64_t divinfo = fast_div32_init(b);
- *    q = fast_div32(a, b, divinfo);
- *    r = fast_rem32(a, b, divinfo);
- */
-
-static inline uint64_t
-fast_div32_init(uint32_t div)
-{
-    uint64_t mt;
-    uint8_t s1, s2;
-    int l;
-
-    l = fls(div - 1);
-    mt = (uint64_t)(0x100000000ULL * ((1ULL << l) - div));
-    s1 = (l > 1) ? 1U : (uint8_t)l;
-    s2 = (l == 0) ? 0 : (uint8_t)(l - 1);
-    return (uint64_t)(mt / div + 1) << 32 | (uint32_t)s1 << 8 | s2;
-}
-
-static inline uint32_t
-fast_div32(uint32_t v, uint32_t div, uint64_t divinfo)
-{
-    const uint32_t m = divinfo >> 32;
-    const unsigned s1 = (divinfo & 0x0000ff00) >> 8;
-    const unsigned s2 = (divinfo & 0x000000ff);
-    const uint32_t t = (uint32_t)(((uint64_t)v * m) >> 32);
-    (void)div; // unused
-    return (t + ((v - t) >> s1)) >> s2;
-}
-
-
-static inline uint32_t
-fast_rem32(uint32_t v, uint32_t div, uint64_t divinfo)
-{ return v - div * fast_div32(v, div, divinfo); }
-
-
-static int __attribute__((__unused__))
-//static int
-validate_psl_p(const HTable *ht, unsigned i)
-{
-    unsigned base_i = fast_rem32(ht->buckets[i].hash, ht->size, ht->divinfo);
-    unsigned diff = (base_i > i) ? ht->size - base_i + i : i - base_i;
-    return is_empty_bucket(ht, i) || diff == ht->buckets[i].psl;
-}
-
-/* * * PUBLIC API * * */
-
-LSUP_rc LSUP_htable_new(
-        htsize_t size, ksize_t ksize, vsize_t vsize,
-        key_hash_fn_t key_hash_fn, key_eq_fn_t key_eq_fn, LSUP_HTable **ht_p)
-{
-    HTable *ht = calloc(1, sizeof(HTable));
-    if (!ht) return ENOMEM;
-    *ht_p = ht;
-
-    ht->ksize = ksize;
-    ht->vsize = vsize;
-    ht->key_hash_fn = key_hash_fn;
-    ht->key_eq_fn = key_eq_fn;
-    ht->size = 0;
-
-    return LSUP_htable_resize(ht, size);
-}
-
-
-LSUP_rc
-LSUP_htable_copy(const HTable *src, HTable **dest_p)
-{
-    HTable *dest;
-    CRITICAL(dest = calloc(1, sizeof(HTable)));
-
-    dest->size = src->size;
-    dest->nitems = src->nitems;
-    dest->divinfo = src->divinfo;
-
-    CRITICAL(dest->buckets = malloc(sizeof(bucket_t) * dest->size));
-    memcpy(dest->buckets, src->buckets, sizeof(bucket_t) * dest->size);
-
-    CRITICAL(dest->data = malloc(dest->size));
-    memcpy(dest->data, src->data, dest->size);
-
-    dest->seed = src->seed ^ (random() | (random() << sizeof(ht_hash_t)));
-
-    dest->key_hash_fn = src->key_hash_fn;
-    dest->key_eq_fn = src->key_eq_fn;
-
-    dest->ksize = src->ksize;
-    dest->vsize = src->vsize;
-
-    *dest_p = dest;
-
-    return LSUP_OK;
-}
-
-
-/**
- * Resize a table.
- */
-LSUP_rc LSUP_htable_resize(HTable *ht, htsize_t newsize)
-{
-    void *old_data = ht->data;
-    const htsize_t oldsize = ht->size;
-
-    // Clip size to min & max limits.
-    if (newsize < MIN_HT_SIZE) newsize = MIN_HT_SIZE;
-    if (newsize > HTSIZE_MAX) newsize = HTSIZE_MAX;
-
-    TRACE("Resizing htable to %lu.", (size_t)newsize);
-
-    CRITICAL(ht->buckets = calloc(newsize, sizeof(bucket_t)));
-    CRITICAL(ht->data = calloc(
-                (ht->ksize + ht->vsize) * newsize, sizeof(bucket_t)));
-
-    ht->size = newsize;
-    ht->nitems = 0;
-
-    ht->divinfo = fast_div32_init(newsize);
-    ht->seed ^= random() | (random() << sizeof(ht_hash_t));
-
-    for (unsigned i = 0; i < oldsize; i++) {
-        /* Skip the empty buckets. */
-        if (!is_empty_bucket(old_data, i))
-            LSUP_htable_insert(
-                    ht, HT_KEY(old_data, i),
-                    HT_VAL(old_data, i));
-    }
-    free(old_data);
-
-    return LSUP_OK;
-}
-
-
-htsize_t LSUP_htable_capacity(LSUP_HTable *ht)
-{ return ht->size; }
-
-
-htsize_t LSUP_htable_size(LSUP_HTable *ht)
-{ return ht->nitems; }
-
-
-/*
- * Insert without resizing (assuming resizing is already done).
- */
-LSUP_rc LSUP_htable_insert(HTable *ht, const void *key, const void *val)
-{
-    bucket_t entry_s;
-    bucket_t *bucket, *entry = &entry_s;
-
-    ASSERT(key != NULL);
-
-    /*
-     * Setup the bucket entry.
-     */
-    entry->hash = ht->key_hash_fn(key, ht->ksize, ht->seed);
-    entry->psl = 0;
-
-    /*
-     * From the paper: "when inserting, if a record probes a location
-     * that is already occupied, the record that has traveled longer
-     * in its probe sequence keeps the location, and the other one
-     * continues on its probe sequence" (page 12).
-     *
-     * Basically: if the probe sequence length (PSL) of the element
-     * being inserted is greater than PSL of the element in the bucket,
-     * then swap them and continue.
-     */
-    htsize_t i = fast_rem32(entry->hash, ht->size, ht->divinfo);
-
-    // Locate the index to insert the KV into.
-    for(;;) {
-        bucket = ht->buckets + i;
-
-        if(is_empty_bucket(ht, i)) break;
-
-        ASSERT(validate_psl_p(ht, i));
-
-        // There is a key in the bucket.
-        if (ht->key_eq_fn(HT_KEY(ht, i), key, ht->ksize)) {
-            // Duplicate key: do nothing.
-            TRACE(STR, "Duplicate key.");
-            return LSUP_NOACTION;
-        }
-
-        /*
-         * We found a "rich" bucket.  Capture its location.
-         */
-        if (entry->psl > bucket->psl) {
-            //TRACE("Entry PSL: %d; Bucket PSL: %d", entry->psl, bucket->psl);
-            TRACE(STR, "SWAP");
-            /*
-             * Place our key-value pair by swapping the "rich"
-             * bucket with our entry.  Copy the structures.
-             */
-            bucket_t *tmp = entry;
-            entry = bucket;
-            bucket = tmp;
-        }
-
-        entry->psl++;
-
-        /* Continue to the next bucket. */
-        ASSERT(validate_psl_p(ht, i));
-        i = fast_rem32(i + 1, ht->size, ht->divinfo);
-    }
-
-    /*
-     * Found a free bucket: insert the entry.
-     */
-    TRACE("Inserting into bucket #%d", i);
-    memcpy(bucket, entry, sizeof(bucket_t)); // copy
-    memcpy(HT_KEY(ht->data, i), key, ht->ksize);
-    memcpy(HT_VAL(ht->data, i), val, ht->vsize);
-    ht->nitems++;
-
-    ASSERT(validate_psl_p(ht, i));
-
-    return LSUP_OK;
-}
-
-
-/*
- * rhashmap_put: insert a value given the key.
- *
- * => If the key is already present, return its associated value.
- * => Otherwise, on successful insert, return the given value.
- */
-LSUP_rc LSUP_htable_put(HTable *ht, const void *key, const void *val)
-{
-    const size_t threshold = APPROX_85_PERCENT(ht->size);
-
-    /*
-     * If the load factor is more than the threshold, then resize.
-     */
-    if (UNLIKELY(ht->nitems > threshold)) {
-        /*
-         * Grow the hash table by doubling its size, but with
-         * a limit of MAX_GROWTH_STEP.
-         */
-        const size_t grow_limit = ht->size + MAX_GROWTH_STEP;
-        const size_t newsize = min(ht->size << 1, grow_limit);
-        LSUP_htable_resize(ht, newsize);
-    }
-
-    return LSUP_htable_insert(ht, key, val);
-}
-
-
-int LSUP_htable_get(const HTable *ht, const void *key, void **val_p)
-{
-    const uint64_t hash = ht->key_hash_fn(key, ht->ksize, ht->seed);
-    htsize_t n = 0, i = fast_rem32(hash, ht->size, ht->divinfo);
-
-    if (key == NULL) return LSUP_VALUE_ERR;
-
-    /*
-     * Lookup is a linear probe.
-     */
-    for(;;) {
-        ASSERT(validate_psl_p(ht, i));
-
-        if (ht->key_eq_fn(HT_KEY(ht->data, i), key, ht->ksize)) {
-            // Key found within max probe length.
-            if (val_p) *val_p = HT_VAL(ht->data, i);
-
-            return LSUP_OK;
-        }
-
-        /*
-         * Stop probing if we hit an empty bucket; also, if we hit a
-         * bucket with PSL lower than the distance from the base location,
-         * then it means that we found the "rich" bucket which should
-         * have been captured, if the key was inserted -- see the central
-         * point of the algorithm in the insertion function.
-         */
-        if (is_empty_bucket(ht, i) || n > ht->buckets[i].psl) {
-            if (val_p) *val_p = NULL;
-
-            return LSUP_NORESULT;
-        }
-
-        n++;
-
-        /* Continue to the next bucket. */
-        i = fast_rem32(i + 1, ht->size, ht->divinfo);
-    }
-}
-
-
-int LSUP_htable_remove(HTable *ht, const void *key)
-{
-    const size_t threshold = APPROX_40_PERCENT(ht->size);
-    const uint32_t hash = ht->key_hash_fn(key, ht->ksize, ht->seed);
-    unsigned n = 0, i = fast_rem32(hash, ht->size, ht->divinfo);
-    bucket_t *bucket;
-
-    ASSERT(key != NULL);
-
-    for(;;) {
-        /*
-         * The same probing logic as in the lookup function.
-         */
-        bucket_t *bucket = ht->buckets + i;
-        if (is_empty_bucket(ht, i) || n > bucket->psl)
-            return LSUP_NOACTION;
-
-        ASSERT(validate_psl_p(ht, i));
-
-        if (!ht->key_eq_fn(HT_KEY(ht, i), key, ht->ksize)) {
-            /* Continue to the next bucket. */
-            i = fast_rem32(i + 1, ht->size, ht->divinfo);
-            n++;
-        }
-    }
-
-    ht->nitems--;
-
-    /*
-     * The probe sequence must be preserved in the deletion case.
-     * Use the backwards-shifting method to maintain low variance.
-     */
-
-    for(;;) {
-        bucket_t *nbucket;
-
-        memcpy(HT_KEY(ht, i), del_marker, ht->ksize);
-
-        i = fast_rem32(i + 1, ht->size, ht->divinfo);
-        nbucket = ht->buckets + i;
-        ASSERT(validate_psl_p(ht, i));
-
-        /*
-         * Stop if we reach an empty bucket or hit a key which
-         * is in its base (original) location.
-         */
-        if (is_empty_bucket(ht, i) || nbucket->psl == 0) break;
-
-        nbucket->psl--;
-        *bucket = *nbucket;
-        bucket = nbucket;
-    }
-
-    /*
-     * If the load factor is less than threshold, then shrink by
-     * halving the size, but not less than 1.
-     */
-    if (ht->nitems < threshold) {
-        size_t newsize = max(ht->size >> 1, 1);
-        (void)LSUP_htable_resize(ht, newsize);
-    }
-
-    return LSUP_OK;
-}
-
-
-LSUP_rc LSUP_htable_iter(
-        LSUP_HTable *ht, htsize_t *cur, void **keyp, void **valp)
-{
-    while (*cur < ht->size) {
-        (*cur)++;
-
-        if (is_empty_bucket(ht, *cur)) {
-            TRACE("Empty bucket: %d. Skipping.", (*cur) - 1);
-            continue;
-        }
-
-        // Copy key, and if relevant, value.
-        if(!keyp) return LSUP_VALUE_ERR;
-        *keyp = HT_KEY(ht, *cur);
-
-        if (valp != NULL && ht->vsize > 0) *valp = HT_VAL(ht, *cur);
-
-        return LSUP_OK;
-    }
-    return LSUP_END;
-}
-
-
-void LSUP_htable_free(HTable *ht)
-{
-    if(LIKELY(ht)) {
-        free(ht->buckets);
-        free(ht->data);
-        free(ht);
-    }
-}