graph.pyx 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951
  1. import logging
  2. from functools import wraps
  3. from rdflib import Graph
  4. from rdflib.term import Node
  5. from lakesuperior import env
  6. from libc.stdint cimport uint32_t, uint64_t
  7. from libc.string cimport memcmp, memcpy
  8. from libc.stdlib cimport free
  9. from cymem.cymem cimport Pool
  10. from lakesuperior.cy_include cimport cylmdb as lmdb
  11. from lakesuperior.cy_include cimport collections as cc
  12. from lakesuperior.model.graph cimport term
  13. from lakesuperior.store.ldp_rs.lmdb_triplestore cimport (
  14. KLEN, DBL_KLEN, TRP_KLEN, TripleKey)
  15. from lakesuperior.model.structures.hash cimport term_hash_seed32
  16. from lakesuperior.model.structures.keyset cimport Keyset
  17. from lakesuperior.model.base cimport Buffer
  18. from lakesuperior.model.graph.triple cimport BufferTriple
  19. from lakesuperior.model.structures.hash cimport hash64
  20. cdef extern from 'spookyhash_api.h':
  21. uint64_t spookyhash_64(const void *input, size_t input_size, uint64_t seed)
  22. logger = logging.getLogger(__name__)
  23. def use_data(fn):
  24. """
  25. Decorator to indicate that a set operation between two SimpleGraph
  26. instances should use the ``data`` property of the second term. The second
  27. term can also be a simple set.
  28. """
  29. @wraps(fn)
  30. def _wrapper(self, other):
  31. if isinstance(other, SimpleGraph):
  32. other = other.data
  33. return _wrapper
  34. cdef int term_cmp_fn(const void* key1, const void* key2):
  35. """
  36. Compare function for two Buffer objects.
  37. :rtype: int
  38. :return: 0 if the byte streams are the same, another integer otherwise.
  39. """
  40. b1 = <Buffer *>key1
  41. b2 = <Buffer *>key2
  42. if b1.sz != b2.sz:
  43. logger.info(f'Sizes differ: {b1.sz} != {b2.sz}. Return 1.')
  44. return 1
  45. cdef int cmp = memcmp(b1.addr, b2.addr, b1.sz)
  46. logger.info(f'term memcmp: {cmp}')
  47. return cmp
  48. cdef int trp_cmp_fn(const void* key1, const void* key2):
  49. """
  50. Compare function for two triples in a set.
  51. Here, pointers to terms are compared for s, p, o. The pointers should be
  52. guaranteed to point to unique values (i.e. no two pointers have the same
  53. term value within a graph).
  54. :rtype: int
  55. :return: 0 if the addresses of all terms are the same, 1 otherwise.
  56. """
  57. t1 = <BufferTriple *>key1
  58. t2 = <BufferTriple *>key2
  59. print('Comparing: <0x{:02x}> <0x{:02x}> <0x{:02x}>'.format(
  60. <unsigned long>t1.s, <unsigned long>t1.p, <unsigned long>t1.o))
  61. print('With: <0x{:02x}> <0x{:02x}> <0x{:02x}>'.format(
  62. <unsigned long>t2.s, <unsigned long>t2.p, <unsigned long>t2.o))
  63. cdef int is_not_equal = (
  64. t1.s.addr != t2.s.addr or
  65. t1.p.addr != t2.p.addr or
  66. t1.o.addr != t2.o.addr
  67. )
  68. logger.info(f'Triples are NOT equal and will be added: {is_not_equal}')
  69. return is_not_equal
  70. cdef size_t term_hash_fn(const void* key, int l, uint32_t seed):
  71. """
  72. Hash function for serialized terms (:py:class:`Buffer` objects)
  73. """
  74. return <size_t>spookyhash_64((<Buffer*>key).addr, (<Buffer*>key).sz, seed)
  75. cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
  76. """
  77. Hash function for sets of (serialized) triples.
  78. This function computes the hash of the concatenated pointer values in the
  79. s, p, o members of the triple. The triple structure is treated as a byte
  80. string. This is safe in spite of byte-wise struct evaluation being a
  81. frowned-upon practice (due to padding issues), because it is assumed that
  82. the input value is always the same type of structure.
  83. """
  84. return <size_t>spookyhash_64(key, l, seed)
  85. cdef size_t hash_ptr_passthrough(const void* key, int l, uint32_t seed):
  86. """
  87. No-op function that takes a pointer and does *not* hash it.
  88. The pointer value is used as the "hash".
  89. """
  90. return <size_t>key
  91. cdef inline bint lookup_none_cmp_fn(
  92. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  93. ):
  94. """
  95. Dummy callback for queries with all parameters unbound.
  96. This function always returns ``True``
  97. """
  98. return True
  99. cdef inline bint lookup_s_cmp_fn(
  100. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  101. ):
  102. """
  103. Lookup callback compare function for a given s in a triple.
  104. The function returns ``True`` if ``t1`` matches the first term.
  105. ``t2`` is not used and is declared only for compatibility with the
  106. other interchangeable functions.
  107. """
  108. return term_cmp_fn(t1, trp[0].s)
  109. cdef inline bint lookup_p_cmp_fn(
  110. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  111. ):
  112. return term_cmp_fn(t1, trp[0].p)
  113. cdef inline bint lookup_o_cmp_fn(
  114. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  115. ):
  116. return term_cmp_fn(t1, trp[0].o)
  117. cdef inline bint lookup_sp_cmp_fn(
  118. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  119. ):
  120. return (
  121. term_cmp_fn(t1, trp[0].s)
  122. and term_cmp_fn(t2, trp[0].p))
  123. cdef inline bint lookup_so_cmp_fn(
  124. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  125. ):
  126. return (
  127. term_cmp_fn(t1, trp[0].s)
  128. and term_cmp_fn(t2, trp[0].o))
  129. cdef inline bint lookup_po_cmp_fn(
  130. const BufferTriple *trp, const Buffer *t1, const Buffer *t2
  131. ):
  132. return (
  133. term_cmp_fn(t1, trp[0].p)
  134. and term_cmp_fn(t2, trp[0].o))
  135. cdef class SimpleGraph:
  136. """
  137. Fast and simple implementation of a graph.
  138. Most functions should mimic RDFLib's graph with less overhead. It uses
  139. the same funny but functional slicing notation. No lookup functions within
  140. the graph are available at this time.
  141. Instances of this class hold a set of
  142. :py:class:`~lakesuperior.store.ldp_rs.term.Term` structures that stores
  143. unique terms within the graph, and a set of
  144. :py:class:`~lakesuperior.store.ldp_rs.triple.Triple` structures referencing
  145. those terms. Therefore, no data duplication occurs and the storage is quite
  146. sparse.
  147. A graph can be instantiated from a store lookup.
  148. A SimpleGraph can also be obtained from a
  149. :py:class:`lakesuperior.store.keyset.Keyset` which is convenient bacause
  150. a Keyset can be obtained very efficiently from querying a store, then also
  151. very efficiently filtered and eventually converted into a set of meaningful
  152. terms.
  153. An instance of this class can also be converted to and from a
  154. ``rdflib.Graph`` instance. TODO verify that this frees Cython pointers.
  155. """
  156. def __cinit__(
  157. self, Keyset keyset=None, store=None, set data=set(), *args, **kwargs):
  158. """
  159. Initialize the graph with pre-existing data or by looking up a store.
  160. One of ``keyset``, or ``data`` can be provided. If more than
  161. one of these is provided, precedence is given in the mentioned order.
  162. If none of them is specified, an empty graph is initialized.
  163. :param rdflib.URIRef uri: The graph URI.
  164. This will serve as the subject for some queries.
  165. :param Keyset keyset: Keyset to create the graph from. Keys will be
  166. converted to set elements.
  167. :param lakesuperior.store.ldp_rs.LmdbTripleStore store: store to
  168. look up the keyset. Only used if ``keyset`` is specified. If not
  169. set, the environment store is used.
  170. :param set data: Initial data as a set of 3-tuples of RDFLib terms.
  171. :param tuple lookup: tuple of a 3-tuple of lookup terms, and a context.
  172. E.g. ``((URIRef('urn:ns:a'), None, None), URIRef('urn:ns:ctx'))``.
  173. Any and all elements may be ``None``.
  174. :param lmdbStore store: the store to look data up.
  175. """
  176. cdef:
  177. cc.HashSetConf terms_conf, trp_conf
  178. cc.hashset_conf_init(&terms_conf)
  179. terms_conf.load_factor = 0.85
  180. terms_conf.hash = &term_hash_fn
  181. terms_conf.hash_seed = term_hash_seed32
  182. terms_conf.key_compare = &term_cmp_fn
  183. terms_conf.key_length = sizeof(Buffer*)
  184. cc.hashset_conf_init(&trp_conf)
  185. trp_conf.load_factor = 0.75
  186. trp_conf.hash = &trp_hash_fn
  187. trp_conf.hash_seed = term_hash_seed32
  188. trp_conf.key_compare = &trp_cmp_fn
  189. trp_conf.key_length = sizeof(BufferTriple)
  190. cc.hashset_new_conf(&terms_conf, &self._terms)
  191. cc.hashset_new_conf(&trp_conf, &self._triples)
  192. self.store = store or env.app_globals.rdf_store
  193. self._pool = Pool()
  194. # Initialize empty data set.
  195. if keyset:
  196. # Populate with triples extracted from provided key set.
  197. self._data_from_keyset(keyset)
  198. elif data is not None:
  199. # Populate with provided Python set.
  200. for s, p, o in data:
  201. self._add_from_rdflib(s, p, o)
  202. print(len(self))
  203. print('SimpleGraph cinit complete.')
  204. def __dealloc__(self):
  205. """
  206. Free the triple pointers. TODO use a Cymem pool
  207. """
  208. free(self._triples)
  209. free(self._terms)
  210. @property
  211. def data(self):
  212. """
  213. Triple data as a Python set.
  214. :rtype: set
  215. """
  216. return self._to_pyset()
  217. # # # BASIC SET OPERATIONS # # #
  218. cpdef SimpleGraph union(self, SimpleGraph other):
  219. """
  220. Perform set union resulting in a new SimpleGraph instance.
  221. TODO Allow union of multiple graphs at a time.
  222. :param SimpleGraph other: The other graph to merge.
  223. :rtype: SimpleGraph
  224. :return: A new SimpleGraph instance.
  225. """
  226. cdef:
  227. void *cur
  228. cc.HashSetIter it
  229. SimpleGraph new_gr = SimpleGraph()
  230. BufferTriple *trp
  231. new_gr.store = self.store
  232. for gr in (self, other):
  233. cc.hashset_iter_init(&it, gr._triples)
  234. while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
  235. bt = <BufferTriple*>cur
  236. new_gr._add_triple(bt.s, bt.p, bt.o)
  237. return new_gr
  238. cpdef void ip_union(self, SimpleGraph other):
  239. """
  240. Perform an in-place set union that adds triples to this instance
  241. TODO Allow union of multiple graphs at a time.
  242. :param SimpleGraph other: The other graph to merge.
  243. :rtype: void
  244. """
  245. cdef:
  246. void *cur
  247. cc.HashSetIter it
  248. BufferTriple *trp
  249. cc.hashset_iter_init(&it, other._triples)
  250. while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
  251. bt = <BufferTriple*>cur
  252. self._add_triple(bt.s, bt.p, bt.o)
  253. cdef void _data_from_lookup(self, tuple trp_ptn, ctx=None) except *:
  254. """
  255. Look up triples in the triplestore and load them into ``data``.
  256. :param tuple lookup: 3-tuple of RDFlib terms or ``None``.
  257. :param LmdbTriplestore store: Reference to a LMDB triplestore. This
  258. is normally set to ``lakesuperior.env.app_globals.rdf_store``.
  259. """
  260. cdef:
  261. size_t i
  262. unsigned char spok[TRP_KLEN]
  263. with self.store.txn_ctx():
  264. keyset = self.store.triple_keys(trp_ptn, ctx)
  265. self.data_from_keyset(keyset)
  266. cdef void _data_from_keyset(self, Keyset data) except *:
  267. """Populate a graph from a Keyset."""
  268. cdef TripleKey spok
  269. while data.next(spok):
  270. self._add_from_spok(spok)
  271. cdef inline void _add_from_spok(self, TripleKey spok) except *:
  272. """
  273. Add a triple from a TripleKey of term keys.
  274. """
  275. cdef:
  276. SPOBuffer s_spo
  277. BufferTriple trp
  278. s_spo = <SPOBuffer>self._pool.alloc(3, sizeof(Buffer))
  279. self.store.lookup_term(spok, s_spo)
  280. self.store.lookup_term(spok + KLEN, s_spo + 1)
  281. self.store.lookup_term(spok + DBL_KLEN, s_spo + 2)
  282. self._add_triple(s_spo, s_spo + 1, s_spo + 2)
  283. cdef inline void _add_triple(
  284. self, BufferPtr ss, BufferPtr sp, BufferPtr so
  285. ) except *:
  286. """
  287. Add a triple from 3 (TPL) serialized terms.
  288. Each of the terms is added to the term set if not existing. The triple
  289. also is only added if not existing.
  290. """
  291. trp = <BufferTriple *>self._pool.alloc(1, sizeof(BufferTriple))
  292. logger.info('ss: {}'.format((<unsigned char *>ss.addr)[:ss.sz]))
  293. logger.info('sp: {}'.format((<unsigned char *>sp.addr)[:sp.sz]))
  294. logger.info('so: {}'.format((<unsigned char *>so.addr)[:so.sz]))
  295. logger.info('Inserting terms.')
  296. logger.info(f'ss addr: {<unsigned long>ss.addr}')
  297. logger.info(f'ss sz: {ss.sz}')
  298. #logger.info('ss:')
  299. #logger.info((<unsigned char *>ss.addr)[:ss.sz])
  300. print('Insert ss: @0x{:02x}'.format(<unsigned long>ss))
  301. cc.hashset_add_or_get(self._terms, <void **>&ss)
  302. print('Now ss is: @0x{:02x}'.format(<unsigned long>ss))
  303. print('Insert sp: @0x{:02x}'.format(<unsigned long>sp))
  304. cc.hashset_add_or_get(self._terms, <void **>&sp)
  305. print('Now sp is: @0x{:02x}'.format(<unsigned long>sp))
  306. print('Insert so: @0x{:02x}'.format(<unsigned long>so))
  307. cc.hashset_add_or_get(self._terms, <void **>&so)
  308. print('Now so is: @0x{:02x}'.format(<unsigned long>so))
  309. logger.info('inserted terms.')
  310. cdef size_t terms_sz = cc.hashset_size(self._terms)
  311. logger.info(f'Terms set size: {terms_sz}')
  312. #cdef cc.HashSetIter ti
  313. #cdef Buffer *t
  314. #cc.hashset_iter_init(&ti, self._terms)
  315. #while calg.set_iter_has_more(&ti):
  316. # t = <Buffer *>calg.set_iter_next(&ti)
  317. # # # Test area
  318. #cdef:
  319. # cc.HashSet* testset
  320. # cc.HashSetConf testconf
  321. # int i = 24
  322. # size_t sz
  323. #cc.hashset_conf_init(&testconf)
  324. #testconf.hash = &hash_ptr_passthrough # spookyhash_64?
  325. #testconf.hash_seed = term_hash_seed32
  326. #testconf.key_length = sizeof(int*)
  327. #testconf.key_compare = &trp_cmp_fn
  328. #testconf.key_length = sizeof(BufferTriple*)
  329. #cc.hashset_new_conf(&testconf, &testset)
  330. #sz = cc.hashset_size(testset)
  331. #print(f'Test set size (start): {sz}')
  332. #cc.hashset_add(testset, &i)
  333. #sz = cc.hashset_size(testset)
  334. #print(f'Test set size (1st insert): {sz}')
  335. #cc.hashset_add(testset, &i)
  336. #sz = cc.hashset_size(testset)
  337. #print(f'Test set size (2nd insert): {sz}')
  338. # # # END test area
  339. trp.s = ss
  340. trp.p = sp
  341. trp.o = so
  342. cdef size_t trp_sz = cc.hashset_size(self._triples)
  343. logger.info(f'Triples set size before adding: {trp_sz}')
  344. r = cc.hashset_add(self._triples, trp)
  345. print('Insert triple result:')
  346. print(r)
  347. trp_sz = cc.hashset_size(self._triples)
  348. logger.info(f'Triples set size after adding: {trp_sz}')
  349. cdef:
  350. cc.HashSetIter ti
  351. BufferTriple *test_trp
  352. void *cur
  353. cc.hashset_iter_init(&ti, self._triples)
  354. while cc.hashset_iter_next(&ti, &cur) != cc.CC_ITER_END:
  355. test_trp = <BufferTriple *>cur
  356. print('Triple in set: 0x{:02x} 0x{:02x} 0x{:02x}'.format(
  357. <size_t>test_trp.s, <size_t>test_trp.p, <size_t>test_trp.o))
  358. #cdef BufferTriple *tt
  359. #calg.set_iterate(self._triples, &ti)
  360. #while calg.set_iter_has_more(&ti):
  361. # tt = <BufferTriple *>calg.set_iter_next(&ti)
  362. cdef set _to_pyset(self):
  363. """
  364. Convert triple data to a Python set.
  365. :rtype: set
  366. """
  367. cdef:
  368. void *void_p
  369. cc.HashSetIter ti
  370. BufferTriple *trp
  371. term.Term s, p, o
  372. graph_set = set()
  373. cc.hashset_iter_init(&ti, self._triples)
  374. while cc.hashset_iter_next(&ti, &void_p) == cc.CC_OK:
  375. if void_p == NULL:
  376. logger.warn('Triple is NULL!')
  377. break
  378. trp = <BufferTriple *>void_p
  379. graph_set.add((
  380. term.deserialize_to_rdflib(trp.s),
  381. term.deserialize_to_rdflib(trp.p),
  382. term.deserialize_to_rdflib(trp.o),
  383. ))
  384. return graph_set
  385. # Basic set operations.
  386. def add(self, trp):
  387. """
  388. Add triples to the graph.
  389. :param iterable triples: Set, list or tuple of 3-tuple triples.
  390. """
  391. cdef size_t cur = 0
  392. trp_ct = len(trp)
  393. trp_buf = <SPOBuffer>self._pool.alloc(3 * trp_ct, sizeof(Buffer))
  394. for s, p, o in trp:
  395. term.serialize_from_rdflib(s, trp_buf + cur, self._pool)
  396. term.serialize_from_rdflib(p, trp_buf + cur + 1, self._pool)
  397. term.serialize_from_rdflib(o, trp_buf + cur + 2, self._pool)
  398. self._add_triple(
  399. trp_buf + cur,
  400. trp_buf + cur + 1,
  401. trp_buf + cur + 2
  402. )
  403. cur += 3
  404. def len_terms(self):
  405. """ Number of triples in the graph. """
  406. return cc.hashset_size(self._terms)
  407. def remove(self, trp):
  408. """
  409. Remove one item from the graph.
  410. :param tuple item: A 3-tuple of RDFlib terms. Only exact terms, i.e.
  411. wildcards are not accepted.
  412. """
  413. self.data.remove(trp)
  414. def __len__(self):
  415. """ Number of triples in the graph. """
  416. return cc.hashset_size(self._triples)
  417. @use_data
  418. def __eq__(self, other):
  419. """ Equality operator between ``SimpleGraph`` instances. """
  420. return self.data == other
  421. def __repr__(self):
  422. """
  423. String representation of the graph.
  424. It provides the number of triples in the graph and memory address of
  425. the instance.
  426. """
  427. return (f'<{self.__class__.__name__} @{hex(id(self))} '
  428. f'length={len(self.data)}>')
  429. def __str__(self):
  430. """ String dump of the graph triples. """
  431. return str(self.data)
  432. @use_data
  433. def __sub__(self, other):
  434. """ Set subtraction. """
  435. return self.data - other
  436. @use_data
  437. def __isub__(self, other):
  438. """ In-place set subtraction. """
  439. self.data -= other
  440. return self
  441. @use_data
  442. def __and__(self, other):
  443. """ Set intersection. """
  444. return self.data & other
  445. @use_data
  446. def __iand__(self, other):
  447. """ In-place set intersection. """
  448. self.data &= other
  449. return self
  450. @use_data
  451. def __or__(self, other):
  452. """ Set union. """
  453. return self.data | other
  454. @use_data
  455. def __ior__(self, other):
  456. """ In-place set union. """
  457. self.data |= other
  458. return self
  459. @use_data
  460. def __xor__(self, other):
  461. """ Set exclusive intersection (XOR). """
  462. return self.data ^ other
  463. @use_data
  464. def __ixor__(self, other):
  465. """ In-place set exclusive intersection (XOR). """
  466. self.data ^= other
  467. return self
  468. def __contains__(self, item):
  469. """
  470. Whether the graph contains a triple.
  471. :rtype: boolean
  472. """
  473. return item in self.data
  474. def __iter__(self):
  475. """ Graph iterator. It iterates over the set triples. """
  476. return self.data.__iter__()
  477. # Slicing.
  478. def __getitem__(self, item):
  479. """
  480. Slicing function.
  481. It behaves similarly to `RDFLib graph slicing
  482. <https://rdflib.readthedocs.io/en/stable/utilities.html#slicing-graphs>`__
  483. """
  484. if isinstance(item, slice):
  485. s, p, o = item.start, item.stop, item.step
  486. return self._slice(s, p, o)
  487. else:
  488. raise TypeError(f'Wrong slice format: {item}.')
  489. cpdef void set(self, tuple trp) except *:
  490. """
  491. Set a single value for subject and predicate.
  492. Remove all triples matching ``s`` and ``p`` before adding ``s p o``.
  493. """
  494. if None in trp:
  495. raise ValueError(f'Invalid triple: {trp}')
  496. self.remove_triples((trp[0], trp[1], None))
  497. self.add((trp,))
  498. cpdef void remove_triples(self, pattern) except *:
  499. """
  500. Remove triples by pattern.
  501. The pattern used is similar to :py:meth:`LmdbTripleStore.delete`.
  502. """
  503. s, p, o = pattern
  504. for match in self.lookup(s, p, o):
  505. logger.debug(f'Removing from graph: {match}.')
  506. self.data.remove(match)
  507. cpdef object as_rdflib(self):
  508. """
  509. Return the data set as an RDFLib Graph.
  510. :rtype: rdflib.Graph
  511. """
  512. gr = Graph()
  513. for trp in self.data:
  514. gr.add(trp)
  515. return gr
  516. def _slice(self, s, p, o):
  517. """
  518. Return terms filtered by other terms.
  519. This behaves like the rdflib.Graph slicing policy.
  520. """
  521. _data = self.data
  522. logger.debug(f'Slicing graph by: {s}, {p}, {o}.')
  523. if s is None and p is None and o is None:
  524. return _data
  525. elif s is None and p is None:
  526. return {(r[0], r[1]) for r in _data if r[2] == o}
  527. elif s is None and o is None:
  528. return {(r[0], r[2]) for r in _data if r[1] == p}
  529. elif p is None and o is None:
  530. return {(r[1], r[2]) for r in _data if r[0] == s}
  531. elif s is None:
  532. return {r[0] for r in _data if r[1] == p and r[2] == o}
  533. elif p is None:
  534. return {r[1] for r in _data if r[0] == s and r[2] == o}
  535. elif o is None:
  536. return {r[2] for r in _data if r[0] == s and r[1] == p}
  537. else:
  538. # all given
  539. return (s,p,o) in _data
  540. def lookup(self, s, p, o):
  541. """
  542. Look up triples by a pattern.
  543. This function converts RDFLib terms into the serialized format stored
  544. in the graph's internal structure and compares them bytewise.
  545. Any and all of the lookup terms can be ``None``.
  546. """
  547. cdef:
  548. void *void_p
  549. BufferTriple trp
  550. BufferTriple *trp_p
  551. cc.HashSetIter ti
  552. Buffer t1
  553. Buffer t2
  554. lookup_fn_t fn
  555. res = set()
  556. # Decide comparison logic outside the loop.
  557. if s is not None and p is not None and o is not None:
  558. # Return immediately if 3-term match is requested.
  559. term.serialize_from_rdflib(s, trp.s, self._pool)
  560. term.serialize_from_rdflib(p, trp.p, self._pool)
  561. term.serialize_from_rdflib(o, trp.o, self._pool)
  562. if cc.hashset_contains(self._triples, &trp):
  563. res.add((s, p, o))
  564. return res
  565. elif s is not None:
  566. term.serialize_from_rdflib(s, &t1)
  567. if p is not None:
  568. fn = lookup_sp_cmp_fn
  569. term.serialize_from_rdflib(p, &t2)
  570. elif o is not None:
  571. fn = lookup_so_cmp_fn
  572. term.serialize_from_rdflib(o, &t2)
  573. else:
  574. fn = lookup_s_cmp_fn
  575. elif p is not None:
  576. term.serialize_from_rdflib(p, &t1)
  577. if o is not None:
  578. fn = lookup_po_cmp_fn
  579. term.serialize_from_rdflib(o, &t2)
  580. else:
  581. fn = lookup_p_cmp_fn
  582. elif o is not None:
  583. fn = lookup_o_cmp_fn
  584. term.serialize_from_rdflib(o, &t1)
  585. else:
  586. fn = lookup_none_cmp_fn
  587. # Iterate over serialized triples.
  588. cc.hashset_iter_init(&ti, self._triples)
  589. while cc.hashset_iter_next(&ti, &void_p) == cc.CC_OK:
  590. if void_p == NULL:
  591. trp_p = <BufferTriple *>void_p
  592. res.add((
  593. term.deserialize_to_rdflib(trp_p[0].s),
  594. term.deserialize_to_rdflib(trp_p[0].p),
  595. term.deserialize_to_rdflib(trp_p[0].o),
  596. ))
  597. return res
  598. cpdef set terms(self, str type):
  599. """
  600. Get all terms of a type: subject, predicate or object.
  601. :param str type: One of ``s``, ``p`` or ``o``.
  602. """
  603. i = 'spo'.index(type)
  604. return {r[i] for r in self.data}
  605. cdef class Imr(SimpleGraph):
  606. """
  607. In-memory resource data container.
  608. This is an extension of :py:class:`~SimpleGraph` that adds a subject URI to
  609. the data set and some convenience methods.
  610. An instance of this class can be converted to a ``rdflib.Resource``
  611. instance.
  612. Some set operations that produce a new object (``-``, ``|``, ``&``, ``^``)
  613. will create a new ``Imr`` instance with the same subject URI.
  614. """
  615. def __init__(self, uri, *args, **kwargs):
  616. """
  617. Initialize the graph with pre-existing data or by looking up a store.
  618. Either ``data``, or ``lookup`` *and* ``store``, can be provide.
  619. ``lookup`` and ``store`` have precedence. If none of them is specified,
  620. an empty graph is initialized.
  621. :param rdflib.URIRef uri: The graph URI.
  622. This will serve as the subject for some queries.
  623. :param set data: Initial data as a set of 3-tuples of RDFLib terms.
  624. :param tuple lookup: tuple of a 3-tuple of lookup terms, and a context.
  625. E.g. ``((URIRef('urn:ns:a'), None, None), URIRef('urn:ns:ctx'))``.
  626. Any and all elements may be ``None``.
  627. :param lmdbStore store: the store to look data up.
  628. """
  629. print(len(self))
  630. self.uri = str(uri)
  631. @property
  632. def identifier(self):
  633. """
  634. IMR URI. For compatibility with RDFLib Resource.
  635. :rtype: string
  636. """
  637. return self.uri
  638. @property
  639. def graph(self):
  640. """
  641. Return a SimpleGraph with the same data.
  642. :rtype: SimpleGraph
  643. """
  644. return SimpleGraph(self.data)
  645. def __repr__(self):
  646. """
  647. String representation of an Imr.
  648. This includes the subject URI, number of triples contained and the
  649. memory address of the instance.
  650. """
  651. return (f'<{self.__class__.__name__} @{hex(id(self))} uri={self.uri}, '
  652. f'length={len(self.data)}>')
  653. @use_data
  654. def __sub__(self, other):
  655. """
  656. Set difference. This creates a new Imr with the same subject URI.
  657. """
  658. return self.__class__(uri=self.uri, data=self.data - other)
  659. @use_data
  660. def __and__(self, other):
  661. """
  662. Set intersection. This creates a new Imr with the same subject URI.
  663. """
  664. return self.__class__(uri=self.uri, data=self.data & other)
  665. @use_data
  666. def __or__(self, other):
  667. """
  668. Set union. This creates a new Imr with the same subject URI.
  669. """
  670. return self.__class__(uri=self.uri, data=self.data | other)
  671. @use_data
  672. def __xor__(self, other):
  673. """
  674. Set exclusive OR (XOR). This creates a new Imr with the same subject
  675. URI.
  676. """
  677. return self.__class__(uri=self.uri, data=self.data ^ other)
  678. def __getitem__(self, item):
  679. """
  680. Supports slicing notation.
  681. """
  682. if isinstance(item, slice):
  683. s, p, o = item.start, item.stop, item.step
  684. return self._slice(s, p, o)
  685. elif isinstance(item, Node):
  686. # If a Node is given, return all values for that predicate.
  687. return {
  688. r[2] for r in self.data
  689. if r[0] == self.uri and r[1] == item}
  690. else:
  691. raise TypeError(f'Wrong slice format: {item}.')
  692. def value(self, p, strict=False):
  693. """
  694. Get an individual value.
  695. :param rdflib.termNode p: Predicate to search for.
  696. :param bool strict: If set to ``True`` the method raises an error if
  697. more than one value is found. If ``False`` (the default) only
  698. the first found result is returned.
  699. :rtype: rdflib.term.Node
  700. """
  701. values = self[p]
  702. if strict and len(values) > 1:
  703. raise RuntimeError('More than one value found for {}, {}.'.format(
  704. self.uri, p))
  705. for ret in values:
  706. return ret
  707. return None
  708. cpdef as_rdflib(self):
  709. """
  710. Return the IMR as a RDFLib Resource.
  711. :rtype: rdflib.Resource
  712. """
  713. gr = Graph()
  714. for trp in self.data:
  715. gr.add(trp)
  716. return gr.resource(identifier=self.uri)