graph.pyx 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850
  1. import logging
  2. from functools import wraps
  3. from rdflib import Graph
  4. from rdflib.term import Node
  5. from lakesuperior import env
  6. from libc.stdint cimport uint32_t, uint64_t
  7. from libc.string cimport memcmp, memcpy
  8. from libc.stdlib cimport free
  9. from cymem.cymem cimport Pool
  10. from lakesuperior.cy_include cimport cylmdb as lmdb
  11. from lakesuperior.cy_include cimport collections as cc
  12. from lakesuperior.cy_include.collections cimport (
  13. CC_OK,
  14. HashSet, HashSetConf, HashSetIter, TableEntry,
  15. hashset_add, hashset_conf_init, hashset_contains, hashset_iter_init,
  16. hashset_iter_next, hashset_new_conf, hashtable_hash_ptr, hashset_size,
  17. get_table_index,
  18. )
  19. from lakesuperior.model.graph cimport term
  20. from lakesuperior.store.ldp_rs.lmdb_triplestore cimport (
  21. KLEN, DBL_KLEN, TRP_KLEN, TripleKey)
  22. from lakesuperior.model.structures.hash cimport term_hash_seed32
  23. from lakesuperior.model.structures.keyset cimport Keyset
  24. from lakesuperior.model.base cimport Buffer
  25. from lakesuperior.model.graph.triple cimport BufferTriple
  26. from lakesuperior.model.structures.hash cimport hash64
  27. cdef extern from 'spookyhash_api.h':
  28. uint64_t spookyhash_64(const void *input, size_t input_size, uint64_t seed)
  29. logger = logging.getLogger(__name__)
  30. def use_data(fn):
  31. """
  32. Decorator to indicate that a set operation between two SimpleGraph
  33. instances should use the ``data`` property of the second term. The second
  34. term can also be a simple set.
  35. """
  36. @wraps(fn)
  37. def _wrapper(self, other):
  38. if isinstance(other, SimpleGraph):
  39. other = other.data
  40. return _wrapper
  41. cdef int term_cmp_fn(const void* key1, const void* key2):
  42. """
  43. Compare function for two Buffer objects.
  44. """
  45. b1 = <Buffer *>key1
  46. b2 = <Buffer *>key2
  47. if b1.sz != b2.sz:
  48. return False
  49. #print('Term A:')
  50. #print((<unsigned char *>b1.addr)[:b1.sz])
  51. #print('Term b:')
  52. #print((<unsigned char *>b2.addr)[:b2.sz])
  53. cdef int cmp = memcmp(b1.addr, b2.addr, b1.sz)
  54. logger.info(f'term memcmp: {cmp}')
  55. return cmp == 0
  56. cdef int triple_cmp_fn(const void* key1, const void* key2):
  57. """
  58. Compare function for two triples in a CAlg set.
  59. Here, pointers to terms are compared for s, p, o. The pointers should be
  60. guaranteed to point to unique values (i.e. no two pointers have the same
  61. term value within a graph).
  62. """
  63. t1 = <BufferTriple *>key1
  64. t2 = <BufferTriple *>key2
  65. return(
  66. t1.s.addr == t2.s.addr and
  67. t1.p.addr == t2.p.addr and
  68. t1.o.addr == t2.o.addr)
  69. cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
  70. """
  71. Hash function for sets of (serialized) triples.
  72. This function computes the hash of the concatenated pointer values in the
  73. s, p, o members of the triple. The triple structure is treated as a byte
  74. string. This is safe in spite of byte-wise struct evaluation being a
  75. frowned-upon practice (due to padding issues), because it is assumed that
  76. the input value is always the same type of structure.
  77. """
  78. return <size_t>spookyhash_64(key, l, seed)
  79. cdef size_t hash_ptr_passthrough(const void* key, int l, uint32_t seed):
  80. """
  81. No-op function that takes a pointer and does *not* hash it.
  82. The pointer value is used as the "hash".
  83. """
  84. return <size_t>key
  85. cdef inline bint lookup_none_cmp_fn(
  86. BufferTriple *trp, Buffer *t1, Buffer *t2):
  87. """
  88. Dummy callback for queries with all parameters unbound.
  89. This function always returns ``True``
  90. """
  91. return True
  92. cdef inline bint lookup_s_cmp_fn(BufferTriple *trp, Buffer *t1, Buffer *t2):
  93. """
  94. Lookup callback compare function for a given s in a triple.
  95. The function returns ``True`` if ``t1`` matches the first term.
  96. ``t2`` is not used and is declared only for compatibility with the
  97. other interchangeable functions.
  98. """
  99. return term_cmp_fn(t1, trp[0].s)
  100. cdef inline bint lookup_p_cmp_fn(BufferTriple *trp, Buffer *t1, Buffer *t2):
  101. return term_cmp_fn(t1, trp[0].p)
  102. cdef inline bint lookup_o_cmp_fn(BufferTriple *trp, Buffer *t1, Buffer *t2):
  103. return term_cmp_fn(t1, trp[0].o)
  104. cdef inline bint lookup_sp_cmp_fn(BufferTriple *trp, Buffer *t1, Buffer *t2):
  105. return (
  106. term_cmp_fn(t1, trp[0].s)
  107. and term_cmp_fn(t2, trp[0].p))
  108. cdef inline bint lookup_so_cmp_fn(BufferTriple *trp, Buffer *t1, Buffer *t2):
  109. return (
  110. term_cmp_fn(t1, trp[0].s)
  111. and term_cmp_fn(t2, trp[0].o))
  112. cdef inline bint lookup_po_cmp_fn(BufferTriple *trp, Buffer *t1, Buffer *t2):
  113. return (
  114. term_cmp_fn(t1, trp[0].p)
  115. and term_cmp_fn(t2, trp[0].o))
  116. cdef class SimpleGraph:
  117. """
  118. Fast and simple implementation of a graph.
  119. Most functions should mimic RDFLib's graph with less overhead. It uses
  120. the same funny but functional slicing notation. No lookup functions within
  121. the graph are available at this time.
  122. Instances of this class hold a set of
  123. :py:class:`~lakesuperior.store.ldp_rs.term.Term` structures that stores
  124. unique terms within the graph, and a set of
  125. :py:class:`~lakesuperior.store.ldp_rs.triple.Triple` structures referencing
  126. those terms. Therefore, no data duplication occurs and the storage is quite
  127. sparse.
  128. A graph can be instantiated from a store lookup.
  129. A SimpleGraph can also be obtained from a
  130. :py:class:`lakesuperior.store.keyset.Keyset` which is convenient bacause
  131. a Keyset can be obtained very efficiently from querying a store, then also
  132. very efficiently filtered and eventually converted into a set of meaningful
  133. terms.
  134. An instance of this class can also be converted to and from a
  135. ``rdflib.Graph`` instance. TODO verify that this frees Cython pointers.
  136. """
  137. def __cinit__(
  138. self, Keyset keyset=None, store=None, set data=set()):
  139. """
  140. Initialize the graph with pre-existing data or by looking up a store.
  141. One of ``keyset``, or ``data`` can be provided. If more than
  142. one of these is provided, precedence is given in the mentioned order.
  143. If none of them is specified, an empty graph is initialized.
  144. :param rdflib.URIRef uri: The graph URI.
  145. This will serve as the subject for some queries.
  146. :param Keyset keyset: Keyset to create the graph from. Keys will be
  147. converted to set elements.
  148. :param lakesuperior.store.ldp_rs.LmdbTripleStore store: store to
  149. look up the keyset. Only used if ``keyset`` is specified. If not
  150. set, the environment store is used.
  151. :param set data: Initial data as a set of 3-tuples of RDFLib terms.
  152. :param tuple lookup: tuple of a 3-tuple of lookup terms, and a context.
  153. E.g. ``((URIRef('urn:ns:a'), None, None), URIRef('urn:ns:ctx'))``.
  154. Any and all elements may be ``None``.
  155. :param lmdbStore store: the store to look data up.
  156. """
  157. cdef:
  158. HashSetConf terms_conf
  159. HashSetConf trp_conf
  160. hashset_conf_init(&terms_conf)
  161. terms_conf.load_factor = 0.85
  162. terms_conf.hash = &hash_ptr_passthrough # spookyhash_64?
  163. terms_conf.hash_seed = term_hash_seed32
  164. terms_conf.key_compare = &term_cmp_fn
  165. terms_conf.key_length = sizeof(void*)
  166. hashset_conf_init(&trp_conf)
  167. trp_conf.load_factor = 0.75
  168. trp_conf.hash = &hash_ptr_passthrough # spookyhash_64?
  169. trp_conf.hash_seed = term_hash_seed32
  170. trp_conf.key_compare = &triple_cmp_fn
  171. trp_conf.key_length = sizeof(void*)
  172. hashset_new_conf(&terms_conf, &self._terms)
  173. hashset_new_conf(&trp_conf, &self._triples)
  174. print(f'Terms member: {self._terms.dummy[0]}')
  175. print(f'Triples member: {self._triples.dummy[0]}')
  176. self.store = store or env.app_globals.rdf_store
  177. self._pool = Pool()
  178. cdef:
  179. size_t i = 0
  180. TripleKey spok
  181. term.Buffer pk_t
  182. # Initialize empty data set.
  183. if keyset:
  184. # Populate with triples extracted from provided key set.
  185. self._data_from_keyset(keyset)
  186. elif data is not None:
  187. # Populate with provided Python set.
  188. for s, p, o in data:
  189. self._add_from_rdflib(s, p, o)
  190. def __dealloc__(self):
  191. """
  192. Free the triple pointers. TODO use a Cymem pool
  193. """
  194. free(self._triples)
  195. free(self._terms)
  196. @property
  197. def data(self):
  198. """
  199. Triple data as a Python set.
  200. :rtype: set
  201. """
  202. return self._data_as_set()
  203. cdef void _data_from_lookup(self, tuple trp_ptn, ctx=None) except *:
  204. """
  205. Look up triples in the triplestore and load them into ``data``.
  206. :param tuple lookup: 3-tuple of RDFlib terms or ``None``.
  207. :param LmdbTriplestore store: Reference to a LMDB triplestore. This
  208. is normally set to ``lakesuperior.env.app_globals.rdf_store``.
  209. """
  210. cdef:
  211. size_t i
  212. unsigned char spok[TRP_KLEN]
  213. with self.store.txn_ctx():
  214. keyset = self.store.triple_keys(trp_ptn, ctx)
  215. self.data_from_keyset(keyset)
  216. cdef void _data_from_keyset(self, Keyset data) except *:
  217. """Populate a graph from a Keyset."""
  218. cdef TripleKey spok
  219. while data.next(spok):
  220. self._add_from_spok(spok)
  221. cdef inline void _add_from_spok(self, TripleKey spok) except *:
  222. """
  223. Add a triple from a TripleKey of term keys.
  224. """
  225. cdef:
  226. SPOBuffer s_spo
  227. BufferTriple trp
  228. s_spo = <SPOBuffer>self._pool.alloc(3, sizeof(Buffer))
  229. self.store.lookup_term(spok, s_spo)
  230. self.store.lookup_term(spok + KLEN, s_spo + 1)
  231. self.store.lookup_term(spok + DBL_KLEN, s_spo + 2)
  232. self._add_triple(s_spo, s_spo + 1, s_spo + 2)
  233. cdef inline void _add_triple(
  234. self, BufferPtr ss, BufferPtr sp, BufferPtr so
  235. ) except *:
  236. """
  237. Add a triple from 3 (TPL) serialized terms.
  238. Each of the terms is added to the term set if not existing. The triple
  239. also is only added if not existing.
  240. """
  241. trp = <BufferTriple *>self._pool.alloc(1, sizeof(BufferTriple))
  242. logger.info('Inserting terms.')
  243. logger.info(f'ss addr: {<unsigned long>ss.addr}')
  244. logger.info(f'ss sz: {ss.sz}')
  245. #logger.info('ss:')
  246. #logger.info((<unsigned char *>ss.addr)[:ss.sz])
  247. logger.info('Insert ss @:')
  248. print(<unsigned long>ss)
  249. self._add_or_get_term(&ss)
  250. logger.info('Now ss is @:')
  251. print(<unsigned long>ss)
  252. logger.info('Insert sp')
  253. self._add_or_get_term(&sp)
  254. logger.info('Insert so')
  255. self._add_or_get_term(&so)
  256. logger.info('inserted terms.')
  257. cdef size_t terms_sz = hashset_size(self._terms)
  258. logger.info('Terms set size: {terms_sz}')
  259. #cdef HashSetIter ti
  260. #cdef Buffer *t
  261. #hashset_iter_init(&ti, self._terms)
  262. #while calg.set_iter_has_more(&ti):
  263. # t = <Buffer *>calg.set_iter_next(&ti)
  264. trp.s = ss
  265. trp.p = sp
  266. trp.o = so
  267. r = hashset_add(self._triples, trp)
  268. print('Insert triple result:')
  269. print(r)
  270. #cdef BufferTriple *tt
  271. #calg.set_iterate(self._triples, &ti)
  272. #while calg.set_iter_has_more(&ti):
  273. # tt = <BufferTriple *>calg.set_iter_next(&ti)
  274. cdef int _add_or_get_term(self, Buffer **data) except -1:
  275. """
  276. Insert a term in the terms set, or get one that already exists.
  277. If the new term is inserted, its address is stored in the memory pool
  278. and persists with the :py:class:`SimpleGraph` instance carrying it.
  279. Otherwise, the overwritten term is garbage collected as soon as the
  280. calling function exits.
  281. The return value gives an indication of whether the term was added or
  282. not.
  283. """
  284. cdef TableEntry *entry
  285. table = self._terms.table
  286. entry = table.buckets[get_table_index(table, data[0].addr)]
  287. while entry:
  288. if table.key_cmp(data[0].addr, entry.key) == 0:
  289. # If the term is found, assign the address of entry.key
  290. # to the data parameter.
  291. data[0] = <Buffer *>entry.key
  292. return 1
  293. entry = entry.next
  294. # If the term is not found, add it.
  295. # TODO This is inefficient because it searches for the term again.
  296. # TODO It would be best to break down the hashset_add function and
  297. # TODO remove the check.
  298. return hashset_add(self._terms, data[0])
  299. cdef set _data_as_set(self):
  300. """
  301. Convert triple data to a Python set.
  302. :rtype: set
  303. """
  304. cdef:
  305. void *void_p
  306. HashSetIter ti
  307. BufferTriple *trp
  308. term.Term s, p, o
  309. graph_set = set()
  310. hashset_iter_init(&ti, self._triples)
  311. while hashset_iter_next(&ti, &void_p) == CC_OK:
  312. if void_p == NULL:
  313. logger.warn('Triple is NULL!')
  314. break
  315. trp = <BufferTriple *>void_p
  316. graph_set.add((
  317. term.deserialize_to_rdflib(trp.s),
  318. term.deserialize_to_rdflib(trp.p),
  319. term.deserialize_to_rdflib(trp.o),
  320. ))
  321. return graph_set
  322. # Basic set operations.
  323. def add(self, triple):
  324. """ Add one triple to the graph. """
  325. ss = <Buffer *>self._pool.alloc(1, sizeof(Buffer))
  326. sp = <Buffer *>self._pool.alloc(1, sizeof(Buffer))
  327. so = <Buffer *>self._pool.alloc(1, sizeof(Buffer))
  328. s, p, o = triple
  329. term.serialize_from_rdflib(s, ss, self._pool)
  330. term.serialize_from_rdflib(p, sp, self._pool)
  331. term.serialize_from_rdflib(o, so, self._pool)
  332. self._add_triple(ss, sp, so)
  333. def remove(self, item):
  334. """
  335. Remove one item from the graph.
  336. :param tuple item: A 3-tuple of RDFlib terms. Only exact terms, i.e.
  337. wildcards are not accepted.
  338. """
  339. self.data.remove(item)
  340. def __len__(self):
  341. """ Number of triples in the graph. """
  342. #return calg.set_num_entries(self._triples)
  343. return len(self.data)
  344. @use_data
  345. def __eq__(self, other):
  346. """ Equality operator between ``SimpleGraph`` instances. """
  347. return self.data == other
  348. def __repr__(self):
  349. """
  350. String representation of the graph.
  351. It provides the number of triples in the graph and memory address of
  352. the instance.
  353. """
  354. return (f'<{self.__class__.__name__} @{hex(id(self))} '
  355. f'length={len(self.data)}>')
  356. def __str__(self):
  357. """ String dump of the graph triples. """
  358. return str(self.data)
  359. @use_data
  360. def __sub__(self, other):
  361. """ Set subtraction. """
  362. return self.data - other
  363. @use_data
  364. def __isub__(self, other):
  365. """ In-place set subtraction. """
  366. self.data -= other
  367. return self
  368. @use_data
  369. def __and__(self, other):
  370. """ Set intersection. """
  371. return self.data & other
  372. @use_data
  373. def __iand__(self, other):
  374. """ In-place set intersection. """
  375. self.data &= other
  376. return self
  377. @use_data
  378. def __or__(self, other):
  379. """ Set union. """
  380. return self.data | other
  381. @use_data
  382. def __ior__(self, other):
  383. """ In-place set union. """
  384. self.data |= other
  385. return self
  386. @use_data
  387. def __xor__(self, other):
  388. """ Set exclusive intersection (XOR). """
  389. return self.data ^ other
  390. @use_data
  391. def __ixor__(self, other):
  392. """ In-place set exclusive intersection (XOR). """
  393. self.data ^= other
  394. return self
  395. def __contains__(self, item):
  396. """
  397. Whether the graph contains a triple.
  398. :rtype: boolean
  399. """
  400. return item in self.data
  401. def __iter__(self):
  402. """ Graph iterator. It iterates over the set triples. """
  403. return self.data.__iter__()
  404. # Slicing.
  405. def __getitem__(self, item):
  406. """
  407. Slicing function.
  408. It behaves similarly to `RDFLib graph slicing
  409. <https://rdflib.readthedocs.io/en/stable/utilities.html#slicing-graphs>`__
  410. """
  411. if isinstance(item, slice):
  412. s, p, o = item.start, item.stop, item.step
  413. return self._slice(s, p, o)
  414. else:
  415. raise TypeError(f'Wrong slice format: {item}.')
  416. cpdef void set(self, tuple trp) except *:
  417. """
  418. Set a single value for subject and predicate.
  419. Remove all triples matching ``s`` and ``p`` before adding ``s p o``.
  420. """
  421. if None in trp:
  422. raise ValueError(f'Invalid triple: {trp}')
  423. self.remove_triples((trp[0], trp[1], None))
  424. self.add(trp)
  425. cpdef void remove_triples(self, pattern) except *:
  426. """
  427. Remove triples by pattern.
  428. The pattern used is similar to :py:meth:`LmdbTripleStore.delete`.
  429. """
  430. s, p, o = pattern
  431. for match in self.lookup(s, p, o):
  432. logger.debug(f'Removing from graph: {match}.')
  433. self.data.remove(match)
  434. cpdef object as_rdflib(self):
  435. """
  436. Return the data set as an RDFLib Graph.
  437. :rtype: rdflib.Graph
  438. """
  439. gr = Graph()
  440. for trp in self.data:
  441. gr.add(trp)
  442. return gr
  443. def _slice(self, s, p, o):
  444. """
  445. Return terms filtered by other terms.
  446. This behaves like the rdflib.Graph slicing policy.
  447. """
  448. _data = self.data
  449. logger.debug(f'Slicing graph by: {s}, {p}, {o}.')
  450. if s is None and p is None and o is None:
  451. return _data
  452. elif s is None and p is None:
  453. return {(r[0], r[1]) for r in _data if r[2] == o}
  454. elif s is None and o is None:
  455. return {(r[0], r[2]) for r in _data if r[1] == p}
  456. elif p is None and o is None:
  457. return {(r[1], r[2]) for r in _data if r[0] == s}
  458. elif s is None:
  459. return {r[0] for r in _data if r[1] == p and r[2] == o}
  460. elif p is None:
  461. return {r[1] for r in _data if r[0] == s and r[2] == o}
  462. elif o is None:
  463. return {r[2] for r in _data if r[0] == s and r[1] == p}
  464. else:
  465. # all given
  466. return (s,p,o) in _data
  467. def lookup(self, s, p, o):
  468. """
  469. Look up triples by a pattern.
  470. This function converts RDFLib terms into the serialized format stored
  471. in the graph's internal structure and compares them bytewise.
  472. Any and all of the lookup terms can be ``None``.
  473. """
  474. cdef:
  475. void *void_p
  476. BufferTriple trp
  477. BufferTriple *trp_p
  478. HashSetIter ti
  479. Buffer t1
  480. Buffer t2
  481. lookup_fn_t fn
  482. res = set()
  483. # Decide comparison logic outside the loop.
  484. if s is not None and p is not None and o is not None:
  485. # Return immediately if 3-term match is requested.
  486. term.serialize_from_rdflib(s, trp.s)
  487. term.serialize_from_rdflib(p, trp.p)
  488. term.serialize_from_rdflib(o, trp.o)
  489. if hashset_contains(self._triples, &trp):
  490. res.add((s, p, o))
  491. return res
  492. elif s is not None:
  493. term.serialize_from_rdflib(s, &t1)
  494. if p is not None:
  495. fn = lookup_sp_cmp_fn
  496. term.serialize_from_rdflib(p, &t2)
  497. elif o is not None:
  498. fn = lookup_so_cmp_fn
  499. term.serialize_from_rdflib(o, &t2)
  500. else:
  501. fn = lookup_s_cmp_fn
  502. elif p is not None:
  503. term.serialize_from_rdflib(p, &t1)
  504. if o is not None:
  505. fn = lookup_po_cmp_fn
  506. term.serialize_from_rdflib(o, &t2)
  507. else:
  508. fn = lookup_p_cmp_fn
  509. elif o is not None:
  510. fn = lookup_o_cmp_fn
  511. term.serialize_from_rdflib(o, &t1)
  512. else:
  513. fn = lookup_none_cmp_fn
  514. # Iterate over serialized triples.
  515. hashset_iter_init(&ti, self._triples)
  516. while hashset_iter_next(&ti, &void_p) == CC_OK:
  517. if void_p == NULL:
  518. trp_p = <BufferTriple *>void_p
  519. res.add((
  520. term.deserialize_to_rdflib(trp_p[0].s),
  521. term.deserialize_to_rdflib(trp_p[0].p),
  522. term.deserialize_to_rdflib(trp_p[0].o),
  523. ))
  524. return res
  525. cpdef set terms(self, str type):
  526. """
  527. Get all terms of a type: subject, predicate or object.
  528. :param str type: One of ``s``, ``p`` or ``o``.
  529. """
  530. i = 'spo'.index(type)
  531. return {r[i] for r in self.data}
  532. cdef class Imr(SimpleGraph):
  533. """
  534. In-memory resource data container.
  535. This is an extension of :py:class:`~SimpleGraph` that adds a subject URI to
  536. the data set and some convenience methods.
  537. An instance of this class can be converted to a ``rdflib.Resource``
  538. instance.
  539. Some set operations that produce a new object (``-``, ``|``, ``&``, ``^``)
  540. will create a new ``Imr`` instance with the same subject URI.
  541. """
  542. def __init__(self, str uri, *args, **kwargs):
  543. """
  544. Initialize the graph with pre-existing data or by looking up a store.
  545. Either ``data``, or ``lookup`` *and* ``store``, can be provide.
  546. ``lookup`` and ``store`` have precedence. If none of them is specified,
  547. an empty graph is initialized.
  548. :param rdflib.URIRef uri: The graph URI.
  549. This will serve as the subject for some queries.
  550. :param set data: Initial data as a set of 3-tuples of RDFLib terms.
  551. :param tuple lookup: tuple of a 3-tuple of lookup terms, and a context.
  552. E.g. ``((URIRef('urn:ns:a'), None, None), URIRef('urn:ns:ctx'))``.
  553. Any and all elements may be ``None``.
  554. :param lmdbStore store: the store to look data up.
  555. """
  556. super().__init__(*args, **kwargs)
  557. self.uri = uri
  558. @property
  559. def identifier(self):
  560. """
  561. IMR URI. For compatibility with RDFLib Resource.
  562. :rtype: string
  563. """
  564. return self.uri
  565. @property
  566. def graph(self):
  567. """
  568. Return a SimpleGraph with the same data.
  569. :rtype: SimpleGraph
  570. """
  571. return SimpleGraph(self.data)
  572. def __repr__(self):
  573. """
  574. String representation of an Imr.
  575. This includes the subject URI, number of triples contained and the
  576. memory address of the instance.
  577. """
  578. return (f'<{self.__class__.__name__} @{hex(id(self))} uri={self.uri}, '
  579. f'length={len(self.data)}>')
  580. @use_data
  581. def __sub__(self, other):
  582. """
  583. Set difference. This creates a new Imr with the same subject URI.
  584. """
  585. return self.__class__(uri=self.uri, data=self.data - other)
  586. @use_data
  587. def __and__(self, other):
  588. """
  589. Set intersection. This creates a new Imr with the same subject URI.
  590. """
  591. return self.__class__(uri=self.uri, data=self.data & other)
  592. @use_data
  593. def __or__(self, other):
  594. """
  595. Set union. This creates a new Imr with the same subject URI.
  596. """
  597. return self.__class__(uri=self.uri, data=self.data | other)
  598. @use_data
  599. def __xor__(self, other):
  600. """
  601. Set exclusive OR (XOR). This creates a new Imr with the same subject
  602. URI.
  603. """
  604. return self.__class__(uri=self.uri, data=self.data ^ other)
  605. def __getitem__(self, item):
  606. """
  607. Supports slicing notation.
  608. """
  609. if isinstance(item, slice):
  610. s, p, o = item.start, item.stop, item.step
  611. return self._slice(s, p, o)
  612. elif isinstance(item, Node):
  613. # If a Node is given, return all values for that predicate.
  614. return {
  615. r[2] for r in self.data
  616. if r[0] == self.uri and r[1] == item}
  617. else:
  618. raise TypeError(f'Wrong slice format: {item}.')
  619. def value(self, p, strict=False):
  620. """
  621. Get an individual value.
  622. :param rdflib.termNode p: Predicate to search for.
  623. :param bool strict: If set to ``True`` the method raises an error if
  624. more than one value is found. If ``False`` (the default) only
  625. the first found result is returned.
  626. :rtype: rdflib.term.Node
  627. """
  628. values = self[p]
  629. if strict and len(values) > 1:
  630. raise RuntimeError('More than one value found for {}, {}.'.format(
  631. self.uri, p))
  632. for ret in values:
  633. return ret
  634. return None
  635. cpdef as_rdflib(self):
  636. """
  637. Return the IMR as a RDFLib Resource.
  638. :rtype: rdflib.Resource
  639. """
  640. gr = Graph()
  641. for trp in self.data:
  642. gr.add(trp)
  643. return gr.resource(identifier=self.uri)