123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545 |
- #include "htable.h"
- #include "graph.h"
- // Initial size of lookup graph. It will double each time capacity is reached.
- #define LOOKUP_GR_INIT_SIZE 64
- // Assume VERY coarsly that the number of unique terms will be in general
- // 1.7 times the number of triples. This is conservative to maintain load
- // factor low.
- #define IDX_SIZE_RATIO 1.7
- typedef enum KSetFlag {
- LSUP_KS_NONE = 0,
- LSUP_KS_CHECK_CAP = 1 << 0,
- LSUP_KS_CHECK_DUP = 1 << 1,
- } KSetFlag;
- /**
- * Static handles.
- */
- static const char *default_ctx_label = "urn:lsup:default";
- static LSUP_Buffer *default_ctx = NULL; // Default context URI for quad store.
- /**
- * Identity hashing function.
- *
- * Since the key is already a strong hash, reuse it for bucket allocation.
- */
- static inline uint64_t id_hash_fn(const void *key, ksize_t size, uint64_t seed)
- { return *(uint64_t*)key; }
- /**
- * General XX64 hash. Strong (non-crypto) and extremely fast.
- */
- static inline uint64_t xx64_hash_fn(
- const void *key, ksize_t size, uint64_t seed)
- { return XXH64(key, size, seed); }
- static inline bool buffer_eq_fn(const void *a, const void *b, ksize_t size)
- { return memcmp(a, b, size) == 0; }
- typedef struct Graph {
- LSUP_store_type store_type; // In-memory or MDB-backed
- LSUP_Term *uri; // Graph "name" (URI)
- LSUP_HTable *keys;
- LSUP_HTable *idx; // Dictionary of keys to serialized terms
- } Graph;
- /**
- * Extern inline functions.
- */
- size_t LSUP_graph_size(const LSUP_Graph *gr);
- size_t LSUP_graph_capacity(const LSUP_Graph *gr);
- /**
- * Callback type for key comparison.
- */
- typedef bool (*LSUP_key_cmp_fn_t)(
- const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2);
- /**
- * Dummy callback for queries with all parameters unbound. Returns true.
- static bool lookup_none_cmp_fn(
- const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
- { return true; }
- */
- /**
- * Keyset lookup for S key.
- */
- static bool lookup_sk_cmp_fn(
- const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
- { return spok[0][0] == k1; }
- /**
- * Keyset lookup for P key.
- */
- static bool lookup_pk_cmp_fn(
- const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
- { return spok[0][1] == k1; }
- /**
- * Keyset lookup for O key.
- */
- static bool lookup_ok_cmp_fn(
- const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
- { return spok[0][2] == k1; }
- /**
- * Keyset lookup for S and P keys.
- */
- static bool lookup_skpk_cmp_fn(
- const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
- { return spok[0][0] == k1 && spok[0][1] == k2; }
- /**
- * Keyset lookup for S and O keys.
- */
- static bool lookup_skok_cmp_fn(
- const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
- { return spok[0][0] == k1 && spok[0][2] == k2; }
- /**
- * Keyset lookup for P and O keys.
- */
- static bool lookup_pkok_cmp_fn(
- const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
- { return spok[0][1] == k1 && spok[0][2] == k2; }
- /* * * Post-lookup callback prototypes * * */
- int match_add_fn(
- LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
- void *ctx);
- int match_rm_fn(
- LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
- void *ctx);
- /* * * KEYSETS * * */
- static inline bool is_null_trp(const LSUP_TripleKey *trp)
- {
- return (
- *trp[0] == NULL_KEY &&
- *trp[1] == NULL_KEY &&
- *trp[2] == NULL_KEY);
- }
- /* * * GRAPH * * */
- int
- LSUP_graph_init(
- LSUP_Graph *gr, size_t capacity, char *uri_str,
- LSUP_store_type store_type)
- {
- if (uri_str == NULL) {
- gr->uri = LSUP_term_new(
- LSUP_TERM_URI, LSUP_term_gen_random_str(), NULL, NULL);
- } else {
- gr->uri = LSUP_term_new(LSUP_TERM_URI, uri_str, NULL, NULL);
- }
- gr->keys = LSUP_htable_new(
- capacity, TRP_KLEN, 0, xx64_hash_fn, buffer_eq_fn, 0);
- switch (store_type ) {
- case LSUP_STORE_MEM:
- gr->idx = LSUP_htable_new(
- capacity * IDX_SIZE_RATIO, sizeof(uint64_t), sizeof(uintptr_t),
- xx64_hash_fn, buffer_eq_fn, 0);
- break;
- case LSUP_STORE_MDB:
- // TODO
- default:
- return -1;
- }
- return LSUP_OK;
- }
- LSUP_Graph *
- LSUP_graph_new(size_t capacity, char *uri_str, LSUP_store_type store_type)
- {
- LSUP_Graph *gr;
- CRITICAL(gr = malloc(sizeof(LSUP_Graph)));
- LSUP_graph_init(gr, capacity, uri_str, store_type);
- return gr;
- }
- /**
- * Copy triples from a source graph into a destination one.
- *
- * The destination graph is not initialized, so the copy is cumulative.
- */
- static int graph_copy_contents(LSUP_Graph *src, LSUP_Graph *dest)
- {
- LSUP_Triple trp;
- trp.s = NULL;
- trp.p = NULL;
- trp.o = NULL;
- return LSUP_graph_match_callback(
- src, dest, &trp, &match_add_fn, true, NULL);
- }
- int
- LSUP_graph_copy(LSUP_Graph *dest, LSUP_Graph *src)
- {
- LSUP_graph_init(dest, LSUP_graph_size(src), NULL, src->store_type);
- return graph_copy_contents(src, dest);
- }
- int
- LSUP_graph_resize(LSUP_Graph *gr, size_t size)
- {
- LSUP_htable_resize(gr->keys, size);
- LSUP_htable_resize(gr->idx, size * IDX_SIZE_RATIO);
- return LSUP_OK;
- }
- size_t
- LSUP_graph_capacity(const LSUP_Graph *gr)
- { return LSUP_htable_capacity(gr->keys); }
- LSUP_Term *
- LSUP_graph_uri(const LSUP_Graph *gr) { return gr->uri; }
- size_t
- LSUP_graph_size(const LSUP_Graph *gr) { return LSUP_htable_size(gr->keys); }
- int
- LSUP_graph_add_triple(LSUP_Graph *gr, const LSUP_Triple *spo)
- {
- LSUP_SerTerm sspo[3];
- LSUP_term_serialize(spo->s, sspo);
- LSUP_term_serialize(spo->p, sspo + 1);
- LSUP_term_serialize(spo->o, sspo + 2);
- LSUP_TripleKey spok = NULL_TRP;
- // Add term to index.
- for (int i = 0; i < 3; i++) {
- spok[i] = LSUP_sterm_to_key(sspo + i);
- TRACE("Indexing term key %lu\n", spok[i]);
- // If term is already in the index, discard and free it.
- if (LSUP_htable_get(gr->idx, spok + i, NULL) == LSUP_OK) {
- //LSUP_SerTerm *sterm = sspo + i;
- //CRITICAL(sterm = malloc(sizeof(LSUP_Buffer)));
- LSUP_htable_put(gr->idx, spok + i, sspo + i);
- } else {
- TRACE("%s", "Term is already indexed.");
- LSUP_buffer_done(sspo + i);
- }
- }
- // Add triple.
- TRACE("Inserting spok: {%lx, %lx, %lx}", spok[0], spok[1], spok[2]);
- return LSUP_htable_put(gr->keys, spok, NULL);
- }
- int
- LSUP_graph_add(LSUP_Graph *gr, const LSUP_Triple data[], size_t data_size)
- {
- // TODO Decouple this and build interface for memory and MDB integration.
- // Resize all at once if needed.
- if (LSUP_graph_capacity(gr) < LSUP_graph_size(gr) + data_size)
- LSUP_graph_resize(gr, LSUP_graph_size(gr) + data_size);
- int rc = LSUP_NOACTION;
- for (size_t i = 0; i < data_size; i++) {
- TRACE("Inserting triple #%lu\n", i);
- if (LIKELY(LSUP_graph_add_triple(gr, data + i) == LSUP_OK))
- rc = LSUP_OK;
- }
- return rc;
- }
- bool
- LSUP_graph_contains(const LSUP_Graph *gr, const LSUP_Triple *spo)
- {
- LSUP_TripleKey spok = {
- LSUP_term_to_key(spo->s),
- LSUP_term_to_key(spo->p),
- LSUP_term_to_key(spo->o),
- };
- return LSUP_htable_get(gr->keys, spok, NULL) == LSUP_OK;
- }
- int
- LSUP_graph_match_callback(
- LSUP_Graph *gr, LSUP_Graph *res, const LSUP_Triple *spo,
- keyset_match_fn_t callback_fn, bool match_cond, void *ctx)
- {
- if (LSUP_htable_size(gr->keys) == 0)
- return LSUP_NOACTION;
- htsize_t cur = 0;
- LSUP_Key k1, k2;
- LSUP_key_cmp_fn_t cmp_fn;
- LSUP_TripleKey i_spok;
- LSUP_TripleKey spok = {
- LSUP_term_to_key(spo->s),
- LSUP_term_to_key(spo->p),
- LSUP_term_to_key(spo->o),
- };
- if (spok[0] != NULL_KEY && spok[1] != NULL_KEY && spok[2] != NULL_KEY) {
- if (match_cond == true) {
- // Shortcut for 3-term match—only if match_cond is true.
- LSUP_graph_init(res, 1, NULL, LSUP_STORE_MEM);
- int rc = LSUP_htable_get(gr->keys, spok, NULL);
- if(rc == LSUP_OK) {
- callback_fn(gr, res, &spok, ctx);
- return LSUP_OK;
- } else {
- return LSUP_NOACTION;
- }
- } else {
- // For negative condition (i.e. "apply this function to all triples
- // except the matching one")
- int rc = LSUP_NOACTION;
- while (LSUP_htable_iter(
- gr->keys, &cur, (void**)&i_spok, NULL) == LSUP_OK) {
- if (LIKELY(
- i_spok[2] != spok[2] ||
- i_spok[0] != spok[0] ||
- i_spok[1] != spok[1]
- )) {
- rc = callback_fn(gr, res, &i_spok, ctx);
- }
- }
- return rc;
- }
- } else if (spok[0] != NULL_KEY) {
- k1 = spok[0];
- if (spok[1] != NULL_KEY) { // s p ?
- k2 = spok[1];
- cmp_fn = lookup_skpk_cmp_fn;
- } else if (spok[2] != NULL_KEY) { // s ? o
- k2 = spok[2];
- cmp_fn = lookup_skok_cmp_fn;
- } else { // s ? ?
- cmp_fn = lookup_sk_cmp_fn;
- }
- } else if (spok[1] != NULL_KEY) {
- k1 = spok[1];
- if (spok[2] != NULL_KEY) { // ? p o
- k2 = spok[2];
- cmp_fn = lookup_pkok_cmp_fn;
- } else { // ? p ?
- cmp_fn = lookup_pk_cmp_fn;
- }
- } else if (spok[2] != NULL_KEY) { // ? ? o
- k1 = spok[2];
- cmp_fn = lookup_ok_cmp_fn;
- } else {
- printf("WARNING: no bound terms, making a compact copy.\n");
- return LSUP_graph_copy(res, gr);
- }
- while (LSUP_htable_iter(gr->keys, &cur, (void**)&i_spok, NULL) == LSUP_OK) {
- if (cmp_fn(&i_spok, k1, k2) == match_cond)
- callback_fn(gr, res, &i_spok, ctx);
- }
- return LSUP_OK;
- }
- int LSUP_graph_lookup(LSUP_Graph *gr, LSUP_Graph *res, const LSUP_Triple *spo)
- {
- LSUP_graph_init(res, LOOKUP_GR_INIT_SIZE, NULL, LSUP_STORE_MEM);
- return LSUP_graph_match_callback(gr, res, spo, &match_add_fn, true, NULL);
- }
- int LSUP_graph_join(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
- {
- LSUP_graph_copy(res, gr1);
- return graph_copy_contents(gr2, res);
- }
- int LSUP_graph_subtract(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
- {
- if (LSUP_htable_size(gr2->keys) == 0) return LSUP_graph_copy(gr1, res);
- LSUP_graph_init(res, LSUP_graph_capacity(gr1), NULL, LSUP_STORE_MEM);
- if (LSUP_htable_size(gr1->keys) == 0) return LSUP_OK;
- htsize_t cur = 0;
- LSUP_TripleKey spok;
- while(LSUP_htable_iter(gr1->keys, &cur, (void**)&spok, NULL) == LSUP_OK) {
- if (LSUP_htable_get(gr2->keys, (void**)&spok, NULL) == LSUP_NORESULT)
- match_add_fn(res, gr1, &spok, NULL);
- }
- return LSUP_OK;
- }
- int LSUP_graph_intersect(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
- {
- LSUP_graph_init(res, LSUP_graph_capacity(gr1), NULL, LSUP_STORE_MEM);
- if (LSUP_htable_size(gr1->keys) == 0 || LSUP_htable_size(gr2->keys) == 0)
- return LSUP_OK;
- htsize_t cur = 0;
- LSUP_TripleKey spok;
- while(LSUP_htable_iter(gr1->keys, &cur, (void**)&spok, NULL) == LSUP_OK) {
- if (LSUP_htable_get(gr2->keys, (void**)&spok, NULL) == LSUP_OK)
- match_add_fn(res, gr1, &spok, NULL);
- }
- return LSUP_OK;
- }
- int LSUP_graph_xor(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
- {
- if (LSUP_htable_size(gr1->keys) == 0) return LSUP_graph_copy(gr2, res);
- if (LSUP_htable_size(gr2->keys) == 0) return LSUP_graph_copy(gr1, res);
- LSUP_graph_init(
- res, min(LSUP_graph_capacity(gr1), LSUP_graph_capacity(gr2)),
- NULL, LSUP_STORE_MEM);
- htsize_t cur = 0;
- LSUP_TripleKey spok;
- while(LSUP_htable_iter(gr1->keys, &cur, (void**)&spok, NULL) == LSUP_OK) {
- if (LSUP_htable_get(gr2->keys, (void**)&spok, NULL) == LSUP_NORESULT)
- match_add_fn(res, gr1, &spok, NULL);
- }
- cur = 0;
- while(LSUP_htable_iter(gr2->keys, &cur, (void**)&spok, NULL) == LSUP_OK) {
- if (LSUP_htable_get(gr1->keys, (void**)&spok, NULL) == LSUP_NORESULT)
- match_add_fn(res, gr2, &spok, NULL);
- }
- return LSUP_OK;
- }
- void
- LSUP_graph_free(LSUP_Graph *gr)
- {
- if (LIKELY(gr != NULL)) {
- LSUP_term_free(gr->uri);
- // Free up triples.
- LSUP_htable_free(gr->keys);
- // Free up index entries and index.
- htsize_t cur = 0;
- LSUP_TripleKey spok;
- LSUP_Buffer *sterm;
- while(LSUP_htable_iter(
- gr->idx, &cur, (void**)&spok, (void**)&sterm) == LSUP_OK) {
- TRACE("Freeing indexed term buffer #%d at %p", cur, sterm);
- LSUP_buffer_done(sterm);
- }
- LSUP_htable_free(gr->idx);
- free(gr);
- }
- }
- /* * CALLBACKS * */
- /**
- * Callback for adding a matched triple.
- *
- * Adds the current triple in src to dest. No duplicate check.
- *
- * The source graph cursor must be set to the triple to be copied.
- */
- int match_add_fn(
- LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
- void *ctx)
- {
- // Add term to index.
- for (int i = 0; i < 3; i++) {
- // Index terms if not yet presents in destination.
- void *src_val, *dest_val;
- if(LSUP_htable_get(src->idx, *spok + i, &src_val) == LSUP_OK) {
- CRITICAL(dest_val = malloc(sizeof(LSUP_Buffer)));
- LSUP_buffer_copy(dest_val, src_val);
- LSUP_htable_put(dest->idx, *spok + i, dest_val);
- }
- }
- // Add triple.
- return LSUP_htable_put(dest->keys, spok, NULL);
- }
- /**
- * Callback for removing a matched triple.
- */
- int match_rm_fn(
- LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
- void *ctx)
- { return LSUP_htable_del(dest->keys, spok); }
|