#include "htable.h" #include "graph.h" // Initial size of lookup graph. It will double each time capacity is reached. #define LOOKUP_GR_INIT_SIZE 64 // Assume VERY coarsly that the number of unique terms will be in general // 1.7 times the number of triples. This is conservative to maintain load // factor low. #define IDX_SIZE_RATIO 1.7 typedef enum KSetFlag { LSUP_KS_NONE = 0, LSUP_KS_CHECK_CAP = 1 << 0, LSUP_KS_CHECK_DUP = 1 << 1, } KSetFlag; /** * Static handles. */ static const char *default_ctx_label = "urn:lsup:default"; static LSUP_Buffer *default_ctx = NULL; // Default context URI for quad store. /** * Identity hashing function. * * Since the key is already a strong hash, reuse it for bucket allocation. */ static inline uint64_t id_hash_fn(const void *key, ksize_t size, uint64_t seed) { return *(uint64_t*)key; } /** * General XX64 hash. Strong (non-crypto) and extremely fast. */ static inline uint64_t xx64_hash_fn( const void *key, ksize_t size, uint64_t seed) { return XXH64(key, size, seed); } static inline bool buffer_eq_fn(const void *a, const void *b, ksize_t size) { return memcmp(a, b, size) == 0; } typedef struct Graph { LSUP_store_type store_type; // In-memory or MDB-backed LSUP_Term *uri; // Graph "name" (URI) LSUP_HTable *keys; LSUP_HTable *idx; // Dictionary of keys to serialized terms } Graph; /** * Extern inline functions. */ size_t LSUP_graph_size(const LSUP_Graph *gr); size_t LSUP_graph_capacity(const LSUP_Graph *gr); /** * Callback type for key comparison. */ typedef bool (*LSUP_key_cmp_fn_t)( const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2); /** * Dummy callback for queries with all parameters unbound. Returns true. static bool lookup_none_cmp_fn( const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2) { return true; } */ /** * Keyset lookup for S key. */ static bool lookup_sk_cmp_fn( const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2) { return spok[0][0] == k1; } /** * Keyset lookup for P key. */ static bool lookup_pk_cmp_fn( const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2) { return spok[0][1] == k1; } /** * Keyset lookup for O key. */ static bool lookup_ok_cmp_fn( const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2) { return spok[0][2] == k1; } /** * Keyset lookup for S and P keys. */ static bool lookup_skpk_cmp_fn( const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2) { return spok[0][0] == k1 && spok[0][1] == k2; } /** * Keyset lookup for S and O keys. */ static bool lookup_skok_cmp_fn( const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2) { return spok[0][0] == k1 && spok[0][2] == k2; } /** * Keyset lookup for P and O keys. */ static bool lookup_pkok_cmp_fn( const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2) { return spok[0][1] == k1 && spok[0][2] == k2; } /* * * Post-lookup callback prototypes * * */ int match_add_fn( LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok, void *ctx); int match_rm_fn( LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok, void *ctx); /* * * KEYSETS * * */ static inline bool is_null_trp(const LSUP_TripleKey *trp) { return ( *trp[0] == NULL_KEY && *trp[1] == NULL_KEY && *trp[2] == NULL_KEY); } /* * * GRAPH * * */ int LSUP_graph_init( LSUP_Graph *gr, size_t capacity, char *uri_str, LSUP_store_type store_type) { if (uri_str == NULL) { gr->uri = LSUP_term_new( LSUP_TERM_URI, LSUP_term_gen_random_str(), NULL, NULL); } else { gr->uri = LSUP_term_new(LSUP_TERM_URI, uri_str, NULL, NULL); } gr->keys = LSUP_htable_new( capacity, TRP_KLEN, 0, xx64_hash_fn, buffer_eq_fn, 0); switch (store_type ) { case LSUP_STORE_MEM: gr->idx = LSUP_htable_new( capacity * IDX_SIZE_RATIO, sizeof(uint64_t), sizeof(uintptr_t), xx64_hash_fn, buffer_eq_fn, 0); break; case LSUP_STORE_MDB: // TODO default: return -1; } return LSUP_OK; } LSUP_Graph * LSUP_graph_new(size_t capacity, char *uri_str, LSUP_store_type store_type) { LSUP_Graph *gr; CRITICAL(gr = malloc(sizeof(LSUP_Graph))); LSUP_graph_init(gr, capacity, uri_str, store_type); return gr; } /** * Copy triples from a source graph into a destination one. * * The destination graph is not initialized, so the copy is cumulative. */ static int graph_copy_contents(LSUP_Graph *src, LSUP_Graph *dest) { LSUP_Triple trp; trp.s = NULL; trp.p = NULL; trp.o = NULL; return LSUP_graph_match_callback( src, dest, &trp, &match_add_fn, true, NULL); } int LSUP_graph_copy(LSUP_Graph *dest, LSUP_Graph *src) { LSUP_graph_init(dest, LSUP_graph_size(src), NULL, src->store_type); return graph_copy_contents(src, dest); } int LSUP_graph_resize(LSUP_Graph *gr, size_t size) { LSUP_htable_resize(gr->keys, size); LSUP_htable_resize(gr->idx, size * IDX_SIZE_RATIO); return LSUP_OK; } size_t LSUP_graph_capacity(const LSUP_Graph *gr) { return LSUP_htable_capacity(gr->keys); } LSUP_Term * LSUP_graph_uri(const LSUP_Graph *gr) { return gr->uri; } size_t LSUP_graph_size(const LSUP_Graph *gr) { return LSUP_htable_size(gr->keys); } int LSUP_graph_add_triple(LSUP_Graph *gr, const LSUP_Triple *spo) { LSUP_SerTerm sspo[3]; LSUP_term_serialize(spo->s, sspo); LSUP_term_serialize(spo->p, sspo + 1); LSUP_term_serialize(spo->o, sspo + 2); LSUP_TripleKey spok = NULL_TRP; // Add term to index. for (int i = 0; i < 3; i++) { spok[i] = LSUP_sterm_to_key(sspo + i); TRACE("Indexing term key %lu\n", spok[i]); // If term is already in the index, discard and free it. if (LSUP_htable_get(gr->idx, spok + i, NULL) == LSUP_OK) { //LSUP_SerTerm *sterm = sspo + i; //CRITICAL(sterm = malloc(sizeof(LSUP_Buffer))); LSUP_htable_put(gr->idx, spok + i, sspo + i); } else { TRACE("%s", "Term is already indexed."); LSUP_buffer_done(sspo + i); } } // Add triple. TRACE("Inserting spok: {%lx, %lx, %lx}", spok[0], spok[1], spok[2]); return LSUP_htable_put(gr->keys, spok, NULL); } int LSUP_graph_add(LSUP_Graph *gr, const LSUP_Triple data[], size_t data_size) { // TODO Decouple this and build interface for memory and MDB integration. // Resize all at once if needed. if (LSUP_graph_capacity(gr) < LSUP_graph_size(gr) + data_size) LSUP_graph_resize(gr, LSUP_graph_size(gr) + data_size); int rc = LSUP_NOACTION; for (size_t i = 0; i < data_size; i++) { TRACE("Inserting triple #%lu\n", i); if (LIKELY(LSUP_graph_add_triple(gr, data + i) == LSUP_OK)) rc = LSUP_OK; } return rc; } bool LSUP_graph_contains(const LSUP_Graph *gr, const LSUP_Triple *spo) { LSUP_TripleKey spok = { LSUP_term_to_key(spo->s), LSUP_term_to_key(spo->p), LSUP_term_to_key(spo->o), }; return LSUP_htable_get(gr->keys, spok, NULL) == LSUP_OK; } int LSUP_graph_match_callback( LSUP_Graph *gr, LSUP_Graph *res, const LSUP_Triple *spo, keyset_match_fn_t callback_fn, bool match_cond, void *ctx) { if (LSUP_htable_size(gr->keys) == 0) return LSUP_NOACTION; htsize_t cur = 0; LSUP_Key k1, k2; LSUP_key_cmp_fn_t cmp_fn; LSUP_TripleKey i_spok; LSUP_TripleKey spok = { LSUP_term_to_key(spo->s), LSUP_term_to_key(spo->p), LSUP_term_to_key(spo->o), }; if (spok[0] != NULL_KEY && spok[1] != NULL_KEY && spok[2] != NULL_KEY) { if (match_cond == true) { // Shortcut for 3-term match—only if match_cond is true. LSUP_graph_init(res, 1, NULL, LSUP_STORE_MEM); int rc = LSUP_htable_get(gr->keys, spok, NULL); if(rc == LSUP_OK) { callback_fn(gr, res, &spok, ctx); return LSUP_OK; } else { return LSUP_NOACTION; } } else { // For negative condition (i.e. "apply this function to all triples // except the matching one") int rc = LSUP_NOACTION; while (LSUP_htable_iter( gr->keys, &cur, (void**)&i_spok, NULL) == LSUP_OK) { if (LIKELY( i_spok[2] != spok[2] || i_spok[0] != spok[0] || i_spok[1] != spok[1] )) { rc = callback_fn(gr, res, &i_spok, ctx); } } return rc; } } else if (spok[0] != NULL_KEY) { k1 = spok[0]; if (spok[1] != NULL_KEY) { // s p ? k2 = spok[1]; cmp_fn = lookup_skpk_cmp_fn; } else if (spok[2] != NULL_KEY) { // s ? o k2 = spok[2]; cmp_fn = lookup_skok_cmp_fn; } else { // s ? ? cmp_fn = lookup_sk_cmp_fn; } } else if (spok[1] != NULL_KEY) { k1 = spok[1]; if (spok[2] != NULL_KEY) { // ? p o k2 = spok[2]; cmp_fn = lookup_pkok_cmp_fn; } else { // ? p ? cmp_fn = lookup_pk_cmp_fn; } } else if (spok[2] != NULL_KEY) { // ? ? o k1 = spok[2]; cmp_fn = lookup_ok_cmp_fn; } else { printf("WARNING: no bound terms, making a compact copy.\n"); return LSUP_graph_copy(res, gr); } while (LSUP_htable_iter(gr->keys, &cur, (void**)&i_spok, NULL) == LSUP_OK) { if (cmp_fn(&i_spok, k1, k2) == match_cond) callback_fn(gr, res, &i_spok, ctx); } return LSUP_OK; } int LSUP_graph_lookup(LSUP_Graph *gr, LSUP_Graph *res, const LSUP_Triple *spo) { LSUP_graph_init(res, LOOKUP_GR_INIT_SIZE, NULL, LSUP_STORE_MEM); return LSUP_graph_match_callback(gr, res, spo, &match_add_fn, true, NULL); } int LSUP_graph_join(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res) { LSUP_graph_copy(res, gr1); return graph_copy_contents(gr2, res); } int LSUP_graph_subtract(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res) { if (LSUP_htable_size(gr2->keys) == 0) return LSUP_graph_copy(gr1, res); LSUP_graph_init(res, LSUP_graph_capacity(gr1), NULL, LSUP_STORE_MEM); if (LSUP_htable_size(gr1->keys) == 0) return LSUP_OK; htsize_t cur = 0; LSUP_TripleKey spok; while(LSUP_htable_iter(gr1->keys, &cur, (void**)&spok, NULL) == LSUP_OK) { if (LSUP_htable_get(gr2->keys, (void**)&spok, NULL) == LSUP_NORESULT) match_add_fn(res, gr1, &spok, NULL); } return LSUP_OK; } int LSUP_graph_intersect(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res) { LSUP_graph_init(res, LSUP_graph_capacity(gr1), NULL, LSUP_STORE_MEM); if (LSUP_htable_size(gr1->keys) == 0 || LSUP_htable_size(gr2->keys) == 0) return LSUP_OK; htsize_t cur = 0; LSUP_TripleKey spok; while(LSUP_htable_iter(gr1->keys, &cur, (void**)&spok, NULL) == LSUP_OK) { if (LSUP_htable_get(gr2->keys, (void**)&spok, NULL) == LSUP_OK) match_add_fn(res, gr1, &spok, NULL); } return LSUP_OK; } int LSUP_graph_xor(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res) { if (LSUP_htable_size(gr1->keys) == 0) return LSUP_graph_copy(gr2, res); if (LSUP_htable_size(gr2->keys) == 0) return LSUP_graph_copy(gr1, res); LSUP_graph_init( res, min(LSUP_graph_capacity(gr1), LSUP_graph_capacity(gr2)), NULL, LSUP_STORE_MEM); htsize_t cur = 0; LSUP_TripleKey spok; while(LSUP_htable_iter(gr1->keys, &cur, (void**)&spok, NULL) == LSUP_OK) { if (LSUP_htable_get(gr2->keys, (void**)&spok, NULL) == LSUP_NORESULT) match_add_fn(res, gr1, &spok, NULL); } cur = 0; while(LSUP_htable_iter(gr2->keys, &cur, (void**)&spok, NULL) == LSUP_OK) { if (LSUP_htable_get(gr1->keys, (void**)&spok, NULL) == LSUP_NORESULT) match_add_fn(res, gr2, &spok, NULL); } return LSUP_OK; } void LSUP_graph_free(LSUP_Graph *gr) { if (LIKELY(gr != NULL)) { LSUP_term_free(gr->uri); // Free up triples. LSUP_htable_free(gr->keys); // Free up index entries and index. htsize_t cur = 0; LSUP_TripleKey spok; LSUP_Buffer *sterm; while(LSUP_htable_iter( gr->idx, &cur, (void**)&spok, (void**)&sterm) == LSUP_OK) { TRACE("Freeing indexed term buffer #%d at %p", cur, sterm); LSUP_buffer_done(sterm); } LSUP_htable_free(gr->idx); free(gr); } } /* * CALLBACKS * */ /** * Callback for adding a matched triple. * * Adds the current triple in src to dest. No duplicate check. * * The source graph cursor must be set to the triple to be copied. */ int match_add_fn( LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok, void *ctx) { // Add term to index. for (int i = 0; i < 3; i++) { // Index terms if not yet presents in destination. void *src_val, *dest_val; if(LSUP_htable_get(src->idx, *spok + i, &src_val) == LSUP_OK) { CRITICAL(dest_val = malloc(sizeof(LSUP_Buffer))); LSUP_buffer_copy(dest_val, src_val); LSUP_htable_put(dest->idx, *spok + i, dest_val); } } // Add triple. return LSUP_htable_put(dest->keys, spok, NULL); } /** * Callback for removing a matched triple. */ int match_rm_fn( LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok, void *ctx) { return LSUP_htable_del(dest->keys, spok); }