#include "uthash.h" #include "namespace.h" #include "store_htable.h" /** * Callback type for key comparison. */ typedef bool (*LSUP_key_eq_fn_t)( const LSUP_Key spok[], const LSUP_Key luk[]); typedef struct triple_entry_t { LSUP_TripleKey key; UT_hash_handle hh; } TripleEntry; typedef struct idx_entry_t { LSUP_Key key; LSUP_Buffer * sterm; UT_hash_handle hh; } IndexEntry; typedef struct ht_store_t { TripleEntry * keys; // Triple keys (set). IndexEntry * idx; // Dictionary of keys to serialized terms. } HTStore; typedef struct ht_iterator_t { HTStore * store; // Store being iterated. size_t i; // Number of records found at any point of // a lookup iteration, or number of records // added at any point of an add loop. LSUP_Key luk[3]; // 0รท3 lookup keys. LSUP_key_eq_fn_t eq_fn; // Equality function to test triples. int rc; // Return code for *next* result. TripleEntry * entry; // Retrieved SPO key. } HTIterator; /** * Identity hashing function. * * Since the key is already a strong hash, reuse it for entry allocation. */ //#define HASH_FUNCTION (s,len,hashv) (hashv) = (unsigned)(s) /* * * CALLBACKS * * */ /** * Dummy callback for queries with all parameters unbound. Returns true. */ static bool lookup_none_eq_fn( const LSUP_Key spok[], const LSUP_Key luk[]) { return true; } /** * Keyset lookup for S key. */ static bool lookup_sk_eq_fn( const LSUP_Key spok[], const LSUP_Key luk[]) { return spok[0] == luk[0]; } /** * Keyset lookup for P key. */ static bool lookup_pk_eq_fn( const LSUP_Key spok[], const LSUP_Key luk[]) { return spok[1] == luk[0]; } /** * Keyset lookup for O key. */ static bool lookup_ok_eq_fn( const LSUP_Key spok[], const LSUP_Key luk[]) { return spok[2] == luk[0]; } /** * Keyset lookup for S and P keys. */ static bool lookup_spk_eq_fn( const LSUP_Key spok[], const LSUP_Key luk[]) { return spok[0] == luk[0] && spok[1] == luk[1]; } /** * Keyset lookup for S and O keys. */ static bool lookup_sok_eq_fn( const LSUP_Key spok[], const LSUP_Key luk[]) { return spok[0] == luk[0] && spok[2] == luk[1]; } /** * Keyset lookup for P and O keys. */ static bool lookup_pok_eq_fn( const LSUP_Key spok[], const LSUP_Key luk[]) { return spok[1] == luk[0] && spok[2] == luk[1]; } /** * Keyset lookup for S, P and O keys. */ static bool lookup_spok_eq_fn( const LSUP_Key spok[], const LSUP_Key luk[]) { return spok[0] == luk[0] && spok[1] == luk[1] && spok[2] == luk[2]; } /* * * Other prototypes. * * */ static inline LSUP_rc tkey_to_strp ( const HTStore *store, const LSUP_Key spok[], LSUP_SerTriple *sspo); /* * * API * * */ HTStore * LSUP_htstore_new (void) { HTStore *ht; MALLOC_GUARD (ht, NULL); ht->keys = NULL; ht->idx = NULL; return ht; } HTStore * LSUP_htstore_copy (const HTStore *src) { HTStore *dest = LSUP_htstore_new(); if (UNLIKELY (!dest)) return NULL; if (LSUP_htstore_copy_contents (dest, src) < 0) { LSUP_htstore_free (dest); return NULL; } return dest; } LSUP_rc LSUP_htstore_copy_contents (HTStore *dest, const HTStore *src) { TripleEntry *dest_trp = malloc ( HASH_COUNT (src->keys) * sizeof (*dest_trp)); if (UNLIKELY (!dest_trp)) return LSUP_MEM_ERR; IndexEntry *dest_idx = malloc ( HASH_COUNT (src->idx) * sizeof (*dest_idx)); if (UNLIKELY (!dest_idx)) { free (dest_trp); return LSUP_MEM_ERR; } TripleEntry *src_trp; IndexEntry *src_idx; unsigned i; // Copy triple data. for ( src_trp = src->keys, i = 0; src_trp != NULL; src_trp = src->keys->hh.next ) { memcpy (dest_trp + i, src_trp, sizeof (*dest_trp)); HASH_ADD (hh, dest->keys, key, TRP_KLEN, dest_trp + i); i++; } // Copy index data. for ( src_idx = src->idx, i = 0; src_idx != NULL; src_idx = src->idx->hh.next ) { memcpy (dest_idx + i, src_idx, sizeof (*dest_idx)); HASH_ADD (hh, dest->idx, key, KLEN, dest_idx + i); i++; } return LSUP_OK; } HTStore * LSUP_htstore_bool_op( const LSUP_bool_op op, const HTStore *s1, const HTStore *s2) { HTStore *dest; TripleEntry *trp_cur; dest = LSUP_htstore_new(); if (UNLIKELY ( op != LSUP_BOOL_UNION && op != LSUP_BOOL_SUBTRACTION && op != LSUP_BOOL_INTERSECTION && op != LSUP_BOOL_XOR)) { fprintf (stderr, "Operation not supported.\n"); goto fail; } if (op == LSUP_BOOL_UNION) { dest = LSUP_htstore_copy (s1); if (UNLIKELY (!dest) || LSUP_htstore_copy_contents (dest, s2) < 0) goto fail; return dest; } LSUP_SerTriple *sspo = STRP_DUMMY; LSUP_HTIterator *it = LSUP_htstore_add_init(dest); if (op == LSUP_BOOL_XOR) { // Add triples from s2 if not found in s1. for ( trp_cur = s2->keys; trp_cur != NULL; trp_cur = s2->keys->hh.next ) { TripleEntry *ins = NULL; HASH_FIND (hh, s1->keys, trp_cur->key, TRP_KLEN, ins); if (!ins) { tkey_to_strp (s2, trp_cur->key, sspo); LSUP_htstore_add_iter (it, sspo); } } } for ( trp_cur = s1->keys; trp_cur != NULL; trp_cur = s1->keys->hh.next ) { TripleEntry *ins = NULL; HASH_FIND (hh, s2->keys, trp_cur->key, TRP_KLEN, ins); // For XOR and subtraction, add if not found. // For intersection, add if found. if ((op == LSUP_BOOL_INTERSECTION) ^ (ins == NULL)) { tkey_to_strp (s1, trp_cur->key, sspo); LSUP_htstore_add_iter (it, sspo); } } LSUP_striple_free (sspo); LSUP_htstore_add_done (it); return dest; fail: LSUP_htstore_free (dest); return NULL; } void LSUP_htstore_free (HTStore *ht) { IndexEntry *idx_entry, *idx_tmp; size_t ct = 0; HASH_ITER (hh, ht->idx, idx_entry, idx_tmp) { HASH_DEL (ht->idx, idx_entry); LSUP_buffer_free (idx_entry->sterm); free (idx_entry); ct++; } TripleEntry *trp_entry, *trp_tmp; HASH_ITER (hh, ht->keys, trp_entry, trp_tmp) { HASH_DEL (ht->keys, trp_entry); free (trp_entry); } free (ht); } size_t LSUP_htstore_size (LSUP_HTStore *ht) { return HASH_COUNT (ht->keys); } LSUP_HTIterator * LSUP_htstore_add_init (HTStore *store) { HTIterator *it; MALLOC_GUARD (it, NULL); it->store = store; it->i = 0; return it; } LSUP_rc LSUP_htstore_add_iter (HTIterator *it, const LSUP_SerTriple *sspo) { LSUP_TripleKey spok = { LSUP_buffer_hash (sspo->s), LSUP_buffer_hash (sspo->p), LSUP_buffer_hash (sspo->o), }; // Add triple. TRACE ("Inserting spok: {%lx, %lx, %lx}", spok[0], spok[1], spok[2]); TripleEntry *k_ins = NULL; HASH_FIND (hh, it->store->keys, spok, TRP_KLEN, k_ins); if (k_ins == NULL) { TRACE (STR, "Triple not found, inserting."); MALLOC_GUARD (k_ins, LSUP_MEM_ERR); memcpy (k_ins->key, spok, TRP_KLEN); HASH_ADD (hh, it->store->keys, key, TRP_KLEN, k_ins); it->i++; } else { TRACE (STR, "Triple found. Skipping."); return LSUP_NOACTION; } // Add terms to index. for (int i = 0; i < 3; i++) { spok[i] = LSUP_buffer_hash (LSUP_striple_pos (sspo, i)); //TRACE ("Indexing term key %lx\n", spok[i]); IndexEntry *ins = NULL; HASH_FIND (hh, it->store->idx, spok + i, KLEN, ins); if (ins == NULL) { MALLOC_GUARD (ins, LSUP_MEM_ERR); ins->key = spok[i]; ins->sterm = LSUP_buffer_new ( (LSUP_striple_pos (sspo, i))->size, (LSUP_striple_pos (sspo, i))->addr); HASH_ADD (hh, it->store->idx, key, KLEN, ins); } } return LSUP_OK; } void LSUP_htstore_add_done (HTIterator *it) { LSUP_htiter_free (it); } LSUP_rc LSUP_htstore_remove( LSUP_HTStore *store, const LSUP_SerTriple *sspo, size_t *ct) { LSUP_HTIterator *it = LSUP_htstore_lookup (store, sspo); if (UNLIKELY (!it)) return LSUP_DB_ERR; *ct = 0; TripleEntry *tmp = malloc (sizeof (*tmp)); HASH_ITER (hh, store->keys, it->entry, tmp) { if (it->eq_fn (it->entry->key, it->luk)) { HASH_DEL (store->keys, it->entry); free (it->entry); (*ct)++; } } // TODO clean up orphan indices in separate (async, scheduled) function. return LSUP_OK; } HTIterator * LSUP_htstore_lookup (HTStore *store, const LSUP_SerTriple *sspo) { HTIterator *it; MALLOC_GUARD (it, NULL); it->store = store; //it->cur = 0; it->rc = LSUP_END; if (HASH_COUNT (store->keys) == 0) return it; LSUP_TripleKey spok = { LSUP_buffer_hash (sspo->s), LSUP_buffer_hash (sspo->p), LSUP_buffer_hash (sspo->o), }; // s p o if (spok[0] != NULL_KEY && spok[1] != NULL_KEY && spok[2] != NULL_KEY) { memcpy (it->luk, spok, sizeof (LSUP_TripleKey)); it->eq_fn = lookup_spok_eq_fn; } else if (spok[0] != NULL_KEY) { it->luk[0] = spok[0]; // s p ? if (spok[1] != NULL_KEY) { it->luk[1] = spok[1]; it->eq_fn = lookup_spk_eq_fn; // s ? o } else if (spok[2] != NULL_KEY) { it->luk[1] = spok[2]; it->eq_fn = lookup_sok_eq_fn; // s ? ? } else { it->eq_fn = lookup_sk_eq_fn; } //it->cur = 0; } else if (spok[1] != NULL_KEY) { it->luk[0] = spok[1]; // ? p o if (spok[2] != NULL_KEY) { it->luk[1] = spok[2]; it->eq_fn = lookup_pok_eq_fn; // ? p ? } else it->eq_fn = lookup_pk_eq_fn; // ? ? o } else if (spok[2] != NULL_KEY) { it->luk[0] = spok[2]; it->eq_fn = lookup_ok_eq_fn; // ? ? ? } else it->eq_fn = lookup_none_eq_fn; it->entry = it->store->keys; // First record in hash table. it->rc = it->entry == NULL ? LSUP_END : LSUP_OK; it->i = 0; return it; } void LSUP_htiter_free (LSUP_HTIterator *it) { free (it); } size_t LSUP_htiter_cur (LSUP_HTIterator *it) { return it->i; } LSUP_rc LSUP_htiter_next (HTIterator *it, LSUP_SerTriple *sspo) { // If the previous iteration hit the end, return. if (it->rc != LSUP_OK) return it->rc; it->rc = LSUP_NORESULT; while (it->rc == LSUP_NORESULT) { if (!it->entry) it->rc = LSUP_END; else { if (it->eq_fn (it->entry->key, it->luk)) { tkey_to_strp (it->store, it->entry->key, sspo); if (!sspo->s || !sspo->p || !sspo->o) return LSUP_DB_ERR; TRACE ( "Found spok: {%lx, %lx, %lx}", it->entry->key[0], it->entry->key[1], it->entry->key[2] ); it->rc = LSUP_OK; it->i++; } it->entry = it->entry->hh.next; } } return it->rc; } /* * * Statics * * */ inline static LSUP_rc tkey_to_strp ( const HTStore *store, const LSUP_Key spok[], LSUP_SerTriple *sspo) { // owned by the store. IndexEntry *tmp = NULL; HASH_FIND (hh, store->idx, spok + 0, KLEN, tmp); sspo->s = tmp->sterm; sspo->s->size = tmp->sterm->size; HASH_FIND (hh, store->idx, spok + 1, KLEN, tmp); sspo->p = tmp->sterm; sspo->s->size = tmp->sterm->size; HASH_FIND (hh, store->idx, spok + 2, KLEN, tmp); if (UNLIKELY (!tmp)) return LSUP_ERROR; sspo->o = tmp->sterm; return LSUP_OK; }