123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375 |
- #include "store_htable.h"
- // Assume VERY coarsly that the number of unique terms will be in general
- // 1.7 times the number of triples. This is conservative to maintain load
- // factor low.
- #define IDX_SIZE_RATIO 1.7
- /**
- * Callback type for key comparison.
- */
- typedef bool (*LSUP_key_eq_fn_t)(
- const LSUP_Key spok[], const LSUP_Key luk[]);
- typedef struct HTStore {
- LSUP_HTable *keys;
- LSUP_HTable *idx; // Dictionary of keys to serialized terms
- } HTStore;
- typedef struct HTIterator {
- HTStore * store; // Store being iterated.
- LSUP_HTable * ht; // Hash table to look up.
- htsize_t cur; // Lookup cursor.
- LSUP_Key luk[3]; // 0÷3 lookup keys.
- LSUP_key_eq_fn_t eq_fn; // Equality function to test triples.
- int rc; // Return code for *next* result.
- LSUP_Key * spok; // Retrieved SPO key.
- } HTIterator;
- /**
- * Identity hashing function.
- *
- * Since the key is already a strong hash, reuse it for bucket allocation.
- */
- static inline uint64_t id_hash_fn(const void *key, ksize_t size, uint64_t seed)
- { return *(uint64_t*)key; }
- /**
- * General XX64 hash. Strong (non-crypto) and extremely fast.
- */
- static inline uint64_t xx64_hash_fn(
- const void *key, ksize_t size, uint64_t seed)
- { return XXH64(key, size, seed); }
- static inline bool buffer_eq_fn(const void *a, const void *b, ksize_t size)
- { return memcmp(a, b, size) == 0; }
- /* * * CALLBACKS * * */
- /**
- * Dummy callback for queries with all parameters unbound. Returns true.
- */
- static bool lookup_none_eq_fn(
- const LSUP_Key spok[], const LSUP_Key luk[])
- { return true; }
- /**
- * Keyset lookup for S key.
- */
- static bool lookup_sk_eq_fn(
- const LSUP_Key spok[], const LSUP_Key luk[])
- { return spok[0] == luk[0]; }
- /**
- * Keyset lookup for P key.
- */
- static bool lookup_pk_eq_fn(
- const LSUP_Key spok[], const LSUP_Key luk[])
- { return spok[1] == luk[0]; }
- /**
- * Keyset lookup for O key.
- */
- static bool lookup_ok_eq_fn(
- const LSUP_Key spok[], const LSUP_Key luk[])
- { return spok[2] == luk[0]; }
- /**
- * Keyset lookup for S and P keys.
- */
- static bool lookup_spk_eq_fn(
- const LSUP_Key spok[], const LSUP_Key luk[])
- { return spok[0] == luk[0] && spok[1] == luk[1]; }
- /**
- * Keyset lookup for S and O keys.
- */
- static bool lookup_sok_eq_fn(
- const LSUP_Key spok[], const LSUP_Key luk[])
- { return spok[0] == luk[0] && spok[2] == luk[1]; }
- /**
- * Keyset lookup for P and O keys.
- */
- static bool lookup_pok_eq_fn(
- const LSUP_Key spok[], const LSUP_Key luk[])
- { return spok[1] == luk[0] && spok[2] == luk[1]; }
- /* * * Other prototypes. * * */
- static inline LSUP_rc htiter_next_key(HTIterator *it);
- /* * * API * * */
- LSUP_rc
- LSUP_htstore_new(size_t capacity, HTStore **ht_p)
- {
- HTStore *ht;
- CRITICAL(ht = malloc(sizeof(HTStore)));
- *ht_p = ht;
- LSUP_rc rc = LSUP_htable_new(
- capacity, TRP_KLEN, 0, xx64_hash_fn, buffer_eq_fn, &ht->keys);
- if (rc != LSUP_OK) return rc;
- rc = LSUP_htable_new(
- capacity * IDX_SIZE_RATIO, sizeof(uint64_t), sizeof(uintptr_t),
- xx64_hash_fn, buffer_eq_fn, &ht->idx);
- return rc;
- }
- void
- LSUP_htstore_free(HTStore *ht)
- {
- if (!ht) return;
- LSUP_htable_free(ht->keys);
- // Free up index entries and index.
- htsize_t cur = 0;
- LSUP_TripleKey spok;
- LSUP_Buffer *sterm;
- while(LSUP_htable_iter(
- ht->idx, &cur, (void**)&spok, (void**)&sterm) == LSUP_OK) {
- TRACE("Freeing indexed term buffer #%d at %p", cur, sterm);
- LSUP_buffer_done(sterm);
- }
- LSUP_htable_free(ht->idx);
- free(ht);
- }
- htsize_t
- LSUP_htstore_size(LSUP_HTStore *ht)
- { return LSUP_htable_size(ht->keys); }
- htsize_t
- LSUP_htstore_capacity(const LSUP_HTStore *ht)
- { return LSUP_htable_capacity(ht->keys); }
- LSUP_rc
- LSUP_htstore_resize(HTStore *ht, htsize_t size)
- {
- LSUP_rc rc = LSUP_htable_resize(ht->keys, size);
- if (rc != LSUP_OK) return rc;
- return LSUP_htable_resize(ht->idx, size * IDX_SIZE_RATIO);
- }
- LSUP_rc
- LSUP_htstore_add(HTStore *store, const LSUP_SerTriple *sspo)
- {
- LSUP_TripleKey spok = NULL_TRP;
- // Add term to index.
- for (int i = 0; i < 3; i++) {
- spok[i] = LSUP_sterm_to_key(LSUP_striple_pos(sspo, i));
- TRACE("Indexing term key %lu\n", spok[i]);
- // If term is already in the index, discard and free it.
- if (LSUP_htable_get(store->idx, spok + i, NULL) == LSUP_OK)
- LSUP_htable_put(store->idx, spok + i, LSUP_striple_pos(sspo, i));
- }
- // Add triple.
- TRACE("Inserting spok: {%lx, %lx, %lx}", spok[0], spok[1], spok[2]);
- return LSUP_htable_put(store->keys, spok, NULL);
- }
- LSUP_rc
- LSUP_htstore_remove(
- LSUP_HTStore *store, const LSUP_SerTriple *sspo, size_t *ct)
- {
- LSUP_HTIterator *it;
- LSUP_rc rc = LSUP_htstore_lookup(store, sspo, &it, ct);
- if (UNLIKELY (rc != LSUP_OK)) return rc;
- *ct = 0;
- while (htiter_next_key (it)) {
- rc = LSUP_htable_remove(store->keys, it->spok);
- if (UNLIKELY (rc < 0)) return rc;
- (*ct) ++;
- }
- // TODO clean up orphan indices in separate function.
- return LSUP_OK;
- }
- LSUP_rc
- LSUP_htstore_lookup(
- HTStore *store, const LSUP_SerTriple *sspo,
- HTIterator **it_p, size_t *ct)
- {
- if (LSUP_htable_size(store->keys) == 0)
- return LSUP_NOACTION;
- LSUP_TripleKey spok = {
- LSUP_sterm_to_key(sspo->s),
- LSUP_sterm_to_key(sspo->p),
- LSUP_sterm_to_key(sspo->o),
- };
- HTIterator *it;
- CRITICAL(it = malloc(sizeof(HTIterator)));
- it->store = store;
- it->cur = 0;
- *it_p = it;
- // s p o
- if (spok[0] != NULL_KEY && spok[1] != NULL_KEY && spok[2] != NULL_KEY) {
- memcpy(it->luk, spok, sizeof(LSUP_TripleKey));
- it->eq_fn = NULL;
- } else if (spok[0] != NULL_KEY) {
- it->luk[0] = spok[0];
- // s p ?
- if (spok[1] != NULL_KEY) {
- it->luk[1] = spok[1];
- it->eq_fn = lookup_spk_eq_fn;
- // s ? o
- } else if (spok[2] != NULL_KEY) {
- it->luk[1] = spok[2];
- it->eq_fn = lookup_sok_eq_fn;
- // s ? ?
- } else {
- it->eq_fn = lookup_sk_eq_fn;
- }
- } else if (spok[1] != NULL_KEY) {
- it->luk[0] = spok[1];
- // ? p o
- if (spok[2] != NULL_KEY) {
- it->luk[1] = spok[2];
- it->eq_fn = lookup_pok_eq_fn;
- // ? p ?
- } else it->eq_fn = lookup_pk_eq_fn;
- // ? ? o
- } else if (spok[2] != NULL_KEY) {
- it->luk[0] = spok[2];
- it->eq_fn = lookup_ok_eq_fn;
- // ? ? ?
- } else it->eq_fn = lookup_none_eq_fn;
- it->rc = LSUP_htable_iter(
- it->store->keys, &it->cur, (void**)&it->spok, NULL);
- return it->rc >= 0 ? LSUP_OK : it->rc;
- }
- static inline LSUP_rc
- htiter_next_key(HTIterator *it)
- {
- for (;;) {
- if (it->rc != LSUP_OK) return it->rc;
- if (it->eq_fn(it->spok, it->luk)) return LSUP_OK;
- it->rc = LSUP_htable_iter(
- it->store->keys, &it->cur, (void**)&it->spok, NULL);
- }
- }
- LSUP_rc
- LSUP_htiter_next(HTIterator *it, LSUP_SerTriple *sspo)
- {
- LSUP_rc rc = htiter_next_key(it);
- if (UNLIKELY (rc != LSUP_OK)) return rc;
- for (int i = 0; i < 3; i++)
- LSUP_htable_get(
- it->store->idx, (void*)it->spok[i],
- (void**)(LSUP_striple_pos(sspo, i)));
- return rc;
- }
- void
- LSUP_htiter_free(LSUP_HTIterator *it)
- { free(it); }
- LSUP_rc
- LSUP_htstore_bool_op(
- const LSUP_bool_op op, const HTStore *s1, const HTStore *s2,
- HTStore **dest_p)
- {
- HTStore *dest;
- htsize_t cur;
- void *key, *val;
- LSUP_htstore_new(0, &dest);
- *dest_p = dest;
- if (UNLIKELY (
- op != LSUP_BOOL_UNION
- && op != LSUP_BOOL_SUBTRACTION
- && op != LSUP_BOOL_INTERSECTION
- && op != LSUP_BOOL_XOR)) return LSUP_VALUE_ERR;
- if (op == LSUP_BOOL_UNION) {
- LSUP_htable_copy(s1->keys, &dest->keys);
- while (LSUP_htable_iter(s2->keys, &cur, &key, NULL) != LSUP_END)
- LSUP_htable_put(dest->keys, key, NULL);
- LSUP_htable_copy(s1->idx, &dest->idx);
- while (LSUP_htable_iter(s2->idx, &cur, &key, &val) != LSUP_END)
- LSUP_htable_put(dest->idx, key, val);
- } else {
- if (op == LSUP_BOOL_XOR) {
- while (LSUP_htable_iter(s2->keys, &cur, &key, NULL) != LSUP_END) {
- LSUP_rc get_rc = LSUP_htable_get(s1->keys, key, NULL);
- if (get_rc == LSUP_NORESULT) {
- LSUP_htable_put(dest->keys, key, NULL);
- if (LSUP_htable_get(s2->idx, key, &val) == LSUP_OK)
- LSUP_htable_put(dest->idx, key, val);
- } else if (UNLIKELY(get_rc < 0)) return get_rc;
- }
- }
- while (LSUP_htable_iter(s1->keys, &cur, &key, NULL) != LSUP_END) {
- LSUP_rc get_rc = LSUP_htable_get(s2->keys, key, NULL);
- if (
- (op == LSUP_BOOL_INTERSECTION && get_rc == LSUP_OK)
- || get_rc == LSUP_NORESULT
- ) {
- LSUP_htable_put(dest->keys, key, NULL);
- if (LSUP_htable_get(s1->idx, key, &val) == LSUP_OK)
- LSUP_htable_put(dest->idx, key, val);
- } else if (UNLIKELY(get_rc < 0)) return get_rc;
- }
- }
- return LSUP_OK;
- }
|