#include "store_htable.h" // Assume VERY coarsly that the number of unique terms will be in general // 1.7 times the number of triples. This is conservative to maintain load // factor low. #define IDX_SIZE_RATIO 1.7 /** * Callback type for key comparison. */ typedef bool (*LSUP_key_eq_fn_t)( const LSUP_Key spok[], const LSUP_Key luk[]); typedef struct HTStore { LSUP_HTable *keys; LSUP_HTable *idx; // Dictionary of keys to serialized terms } HTStore; typedef struct HTIterator { HTStore * store; // Store being iterated. LSUP_HTable * ht; // Hash table to look up. htsize_t cur; // Lookup cursor. LSUP_Key luk[3]; // 0รท3 lookup keys. LSUP_key_eq_fn_t eq_fn; // Equality function to test triples. int rc; // Return code for *next* result. LSUP_Key * spok; // Retrieved SPO key. } HTIterator; /** * Identity hashing function. * * Since the key is already a strong hash, reuse it for bucket allocation. */ static inline uint64_t id_hash_fn(const void *key, ksize_t size, uint64_t seed) { return *(uint64_t*)key; } /** * General XX64 hash. Strong (non-crypto) and extremely fast. */ static inline uint64_t xx64_hash_fn( const void *key, ksize_t size, uint64_t seed) { return XXH64(key, size, seed); } static inline bool buffer_eq_fn(const void *a, const void *b, ksize_t size) { return memcmp(a, b, size) == 0; } /* * * CALLBACKS * * */ /** * Dummy callback for queries with all parameters unbound. Returns true. */ static bool lookup_none_eq_fn( const LSUP_Key spok[], const LSUP_Key luk[]) { return true; } /** * Keyset lookup for S key. */ static bool lookup_sk_eq_fn( const LSUP_Key spok[], const LSUP_Key luk[]) { return spok[0] == luk[0]; } /** * Keyset lookup for P key. */ static bool lookup_pk_eq_fn( const LSUP_Key spok[], const LSUP_Key luk[]) { return spok[1] == luk[0]; } /** * Keyset lookup for O key. */ static bool lookup_ok_eq_fn( const LSUP_Key spok[], const LSUP_Key luk[]) { return spok[2] == luk[0]; } /** * Keyset lookup for S and P keys. */ static bool lookup_spk_eq_fn( const LSUP_Key spok[], const LSUP_Key luk[]) { return spok[0] == luk[0] && spok[1] == luk[1]; } /** * Keyset lookup for S and O keys. */ static bool lookup_sok_eq_fn( const LSUP_Key spok[], const LSUP_Key luk[]) { return spok[0] == luk[0] && spok[2] == luk[1]; } /** * Keyset lookup for P and O keys. */ static bool lookup_pok_eq_fn( const LSUP_Key spok[], const LSUP_Key luk[]) { return spok[1] == luk[0] && spok[2] == luk[1]; } /* * * Other prototypes. * * */ static inline LSUP_rc htiter_next_key(HTIterator *it); /* * * API * * */ HTStore * LSUP_htstore_new(size_t capacity) { HTStore *ht; CRITICAL(ht = malloc (sizeof (*ht))); ht->keys = LSUP_htable_new( capacity, TRP_KLEN, 0, xx64_hash_fn, buffer_eq_fn); ht->idx = LSUP_htable_new( capacity * IDX_SIZE_RATIO, sizeof(uint64_t), sizeof(uintptr_t), xx64_hash_fn, buffer_eq_fn); return ht; } void LSUP_htstore_free(HTStore *ht) { if (!ht) return; LSUP_htable_free(ht->keys); // Free up index entries and index. htsize_t cur = 0; LSUP_TripleKey spok; LSUP_Buffer *sterm; while(LSUP_htable_iter( ht->idx, &cur, (void**)&spok, (void**)&sterm) == LSUP_OK) { TRACE("Freeing indexed term buffer #%d at %p", cur, sterm); LSUP_buffer_done(sterm); } LSUP_htable_free(ht->idx); free(ht); } htsize_t LSUP_htstore_size(LSUP_HTStore *ht) { return LSUP_htable_size(ht->keys); } htsize_t LSUP_htstore_capacity(const LSUP_HTStore *ht) { return LSUP_htable_capacity(ht->keys); } LSUP_rc LSUP_htstore_resize(HTStore *ht, htsize_t size) { LSUP_rc rc = LSUP_htable_resize(ht->keys, size); if (rc != LSUP_OK) return rc; return LSUP_htable_resize(ht->idx, size * IDX_SIZE_RATIO); } LSUP_rc LSUP_htstore_add(HTStore *store, const LSUP_SerTriple *sspo) { LSUP_TripleKey spok = NULL_TRP; // Add term to index. for (int i = 0; i < 3; i++) { spok[i] = LSUP_sterm_to_key(LSUP_striple_pos(sspo, i)); TRACE("Indexing term key %lu\n", spok[i]); // If term is already in the index, discard and free it. if (LSUP_htable_get(store->idx, spok + i, NULL) == LSUP_OK) LSUP_htable_put(store->idx, spok + i, LSUP_striple_pos(sspo, i)); } // Add triple. TRACE("Inserting spok: {%lx, %lx, %lx}", spok[0], spok[1], spok[2]); return LSUP_htable_put(store->keys, spok, NULL); } LSUP_rc LSUP_htstore_remove( LSUP_HTStore *store, const LSUP_SerTriple *sspo, size_t *ct) { LSUP_HTIterator *it = LSUP_htstore_lookup(store, sspo, ct); if (UNLIKELY (!it)) return LSUP_DB_ERR; *ct = 0; while (htiter_next_key (it)) { LSUP_rc rc = LSUP_htable_remove(store->keys, it->spok); if (UNLIKELY (rc < 0)) return rc; (*ct) ++; } // TODO clean up orphan indices in separate function. return LSUP_OK; } HTIterator * LSUP_htstore_lookup(HTStore *store, const LSUP_SerTriple *sspo, size_t *ct) { HTIterator *it; CRITICAL(it = malloc (sizeof (*it))); it->store = store; it->cur = 0; it->rc = LSUP_END; if (LSUP_htable_size(store->keys) == 0) return it; LSUP_TripleKey spok = { LSUP_sterm_to_key(sspo->s), LSUP_sterm_to_key(sspo->p), LSUP_sterm_to_key(sspo->o), }; // s p o if (spok[0] != NULL_KEY && spok[1] != NULL_KEY && spok[2] != NULL_KEY) { memcpy(it->luk, spok, sizeof(LSUP_TripleKey)); it->eq_fn = NULL; } else if (spok[0] != NULL_KEY) { it->luk[0] = spok[0]; // s p ? if (spok[1] != NULL_KEY) { it->luk[1] = spok[1]; it->eq_fn = lookup_spk_eq_fn; // s ? o } else if (spok[2] != NULL_KEY) { it->luk[1] = spok[2]; it->eq_fn = lookup_sok_eq_fn; // s ? ? } else { it->eq_fn = lookup_sk_eq_fn; } } else if (spok[1] != NULL_KEY) { it->luk[0] = spok[1]; // ? p o if (spok[2] != NULL_KEY) { it->luk[1] = spok[2]; it->eq_fn = lookup_pok_eq_fn; // ? p ? } else it->eq_fn = lookup_pk_eq_fn; // ? ? o } else if (spok[2] != NULL_KEY) { it->luk[0] = spok[2]; it->eq_fn = lookup_ok_eq_fn; // ? ? ? } else it->eq_fn = lookup_none_eq_fn; it->rc = LSUP_htable_iter( it->store->keys, &it->cur, (void**)&it->spok, NULL); return it->rc >= 0 ? it : NULL; } static inline LSUP_rc htiter_next_key(HTIterator *it) { for (;;) { if (it->rc != LSUP_OK) return it->rc; if (it->eq_fn(it->spok, it->luk)) return LSUP_OK; it->rc = LSUP_htable_iter( it->store->keys, &it->cur, (void**)&it->spok, NULL); } } LSUP_rc LSUP_htiter_next(HTIterator *it, LSUP_SerTriple *sspo) { LSUP_rc rc = htiter_next_key(it); if (UNLIKELY (rc != LSUP_OK)) return rc; for (int i = 0; i < 3; i++) LSUP_htable_get( it->store->idx, (void*)it->spok[i], (void**)(LSUP_striple_pos(sspo, i))); return rc; } void LSUP_htiter_free(LSUP_HTIterator *it) { free(it); } HTStore * LSUP_htstore_bool_op( const LSUP_bool_op op, const HTStore *s1, const HTStore *s2) { HTStore *dest; htsize_t cur; void *key, *val; dest = LSUP_htstore_new(0); if (UNLIKELY ( op != LSUP_BOOL_UNION && op != LSUP_BOOL_SUBTRACTION && op != LSUP_BOOL_INTERSECTION && op != LSUP_BOOL_XOR)) { fprintf(stderr, "Operation not supported.\n"); goto fail; } if (op == LSUP_BOOL_UNION) { dest->keys = LSUP_htable_copy(s1->keys); while (LSUP_htable_iter(s2->keys, &cur, &key, NULL) != LSUP_END) LSUP_htable_put(dest->keys, key, NULL); dest->idx = LSUP_htable_copy(s1->idx); while (LSUP_htable_iter(s2->idx, &cur, &key, &val) != LSUP_END) LSUP_htable_put(dest->idx, key, val); } else { if (op == LSUP_BOOL_XOR) { while (LSUP_htable_iter(s2->keys, &cur, &key, NULL) != LSUP_END) { LSUP_rc get_rc = LSUP_htable_get(s1->keys, key, NULL); if (get_rc == LSUP_NORESULT) { LSUP_htable_put(dest->keys, key, NULL); if (LSUP_htable_get(s2->idx, key, &val) == LSUP_OK) LSUP_htable_put(dest->idx, key, val); } else if (UNLIKELY(get_rc < 0)) goto fail; } } while (LSUP_htable_iter(s1->keys, &cur, &key, NULL) != LSUP_END) { LSUP_rc get_rc = LSUP_htable_get(s2->keys, key, NULL); if ( (op == LSUP_BOOL_INTERSECTION && get_rc == LSUP_OK) || get_rc == LSUP_NORESULT ) { LSUP_htable_put(dest->keys, key, NULL); if (LSUP_htable_get(s1->idx, key, &val) == LSUP_OK) LSUP_htable_put(dest->idx, key, val); } else if (UNLIKELY(get_rc < 0)) goto fail; } } return dest; fail: LSUP_htstore_free(dest); return NULL; }