graph.pyx 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833
  1. import logging
  2. from functools import wraps
  3. from rdflib import Graph
  4. from rdflib.term import Node
  5. from lakesuperior import env
  6. from libc.string cimport memcmp, memcpy
  7. from libc.stdlib cimport free
  8. from cymem.cymem cimport Pool
  9. from lakesuperior.cy_include cimport calg
  10. from lakesuperior.cy_include cimport cylmdb as lmdb
  11. from lakesuperior.model.graph cimport term
  12. from lakesuperior.store.ldp_rs.lmdb_triplestore cimport (
  13. KLEN, DBL_KLEN, TRP_KLEN, TripleKey)
  14. from lakesuperior.model.structures.keyset cimport Keyset
  15. from lakesuperior.model.base cimport Buffer
  16. from lakesuperior.model.graph.triple cimport BufferTriple
  17. from lakesuperior.model.structures.hash cimport Hash32, hash32
  18. #BUF_PTR_SZ = sizeof(Buffer *)
  19. logger = logging.getLogger(__name__)
  20. def use_data(fn):
  21. """
  22. Decorator to indicate that a set operation between two SimpleGraph
  23. instances should use the ``data`` property of the second term. The second
  24. term can also be a simple set.
  25. """
  26. @wraps(fn)
  27. def _wrapper(self, other):
  28. if isinstance(other, SimpleGraph):
  29. other = other.data
  30. return _wrapper
  31. cdef unsigned int term_hash_fn(const calg.SetValue data):
  32. """
  33. Hash function for sets of terms.
  34. https://fragglet.github.io/c-algorithms/doc/set_8h.html#6c7986a2a80d7a3cb7b9d74e1c6fef97
  35. :param SetValue *data: Pointer to a Buffer structure.
  36. """
  37. cdef:
  38. Hash32 hash
  39. hash32(<const Buffer *>&data, &hash)
  40. return hash
  41. cdef unsigned int trp_hash_fn(calg.SetValue btrp):
  42. """
  43. Hash function for sets of (serialized) triples.
  44. https://fragglet.github.io/c-algorithms/doc/set_8h.html#6c7986a2a80d7a3cb7b9d74e1c6fef97
  45. This function computes the hash of the concatenated pointer values in the
  46. s, p, o members of the triple. The triple structure is treated as a byte
  47. string. This is safe in spite of byte-wise struct evaluation being a
  48. frowned-upon practice (due to padding issues), because it is assumed that
  49. the input value is always the same type of structure.
  50. :param SetItem *data: Pointer to a BufferTriple structure.
  51. """
  52. cdef:
  53. Buffer data
  54. Hash32 hash
  55. data.addr = &btrp
  56. data.sz = sizeof(btrp)
  57. hash32(&data, &hash)
  58. return hash
  59. cdef bint buffer_cmp_fn(const calg.SetValue v1, const calg.SetValue v2):
  60. """
  61. Compare function for two Buffer objects.
  62. https://fragglet.github.io/c-algorithms/doc/set_8h.html#40fa2c86d5b003c1b0b0e8dd1e4df9f4
  63. """
  64. # No-cast option.
  65. #if v1[0].sz != v2[0].sz:
  66. # return False
  67. #return memcmp(v1[0].addr, v2[0].addr, v1[0].sz) == 0
  68. cdef:
  69. Buffer b1 = (<Buffer *>v1)[0]
  70. Buffer b2 = (<Buffer *>v2)[0]
  71. if b1.sz != b2.sz:
  72. return False
  73. return memcmp(b1.addr, b2.addr, b1.sz) == 0
  74. cdef bint triple_cmp_fn(const calg.SetValue v1, const calg.SetValue v2):
  75. """
  76. Compare function for two triples in a CAlg set.
  77. Here, pointers to terms are compared for s, p, o. The pointers should be
  78. guaranteed to point to unique values (i.e. no two pointers have the same
  79. term value within a graph).
  80. https://fragglet.github.io/c-algorithms/doc/set_8h.html#40fa2c86d5b003c1b0b0e8dd1e4df9f4
  81. """
  82. cdef:
  83. BufferTriple t1 = (<BufferTriple *>v1)[0]
  84. BufferTriple t2 = (<BufferTriple *>v2)[0]
  85. return(
  86. t1.s == t2.s and
  87. t1.p == t2.p and
  88. t1.o == t2.o)
  89. cdef inline bint lookup_none_cmp_fn(
  90. const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
  91. return True
  92. cdef inline bint lookup_s_cmp_fn(
  93. const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
  94. """
  95. Lookup callback compare function for a given s in a triple.
  96. The function returns ``True`` if ``t1`` matches the first term.
  97. ``t2`` is not used and is declared only for compatibility with the
  98. other interchangeable functions.
  99. """
  100. return buffer_cmp_fn(t1, trp[0].s)
  101. cdef inline bint lookup_p_cmp_fn(
  102. const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
  103. return buffer_cmp_fn(t1, trp[0].p)
  104. cdef inline bint lookup_o_cmp_fn(
  105. const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
  106. return buffer_cmp_fn(t1, trp[0].o)
  107. cdef inline bint lookup_sp_cmp_fn(
  108. const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
  109. return (
  110. buffer_cmp_fn(t1, trp[0].s)
  111. and buffer_cmp_fn(t2, trp[0].p))
  112. cdef inline bint lookup_so_cmp_fn(
  113. const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
  114. return (
  115. buffer_cmp_fn(t1, trp[0].s)
  116. and buffer_cmp_fn(t2, trp[0].o))
  117. cdef inline bint lookup_po_cmp_fn(
  118. const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
  119. return (
  120. buffer_cmp_fn(t1, trp[0].p)
  121. and buffer_cmp_fn(t2, trp[0].o))
  122. cdef class SimpleGraph:
  123. """
  124. Fast and simple implementation of a graph.
  125. Most functions should mimic RDFLib's graph with less overhead. It uses
  126. the same funny but functional slicing notation. No lookup functions within
  127. the graph are available at this time.
  128. Instances of this class hold a set of
  129. :py:class:`~lakesuperior.store.ldp_rs.term.Term` structures that stores
  130. unique terms within the graph, and a set of
  131. :py:class:`~lakesuperior.store.ldp_rs.triple.Triple` structures referencing
  132. those terms. Therefore, no data duplication occurs and the storage is quite
  133. sparse.
  134. A graph can be instantiated from a store lookup.
  135. A SimpleGraph can also be obtained from a
  136. :py:class:`lakesuperior.store.keyset.Keyset` which is convenient bacause
  137. a Keyset can be obtained very efficiently from querying a store, then also
  138. very efficiently filtered and eventually converted into a set of meaningful
  139. terms.
  140. An instance of this class can also be converted to and from a
  141. ``rdflib.Graph`` instance. TODO verify that this frees Cython pointers.
  142. """
  143. def __cinit__(
  144. self, Keyset keyset=None, store=None, set data=set()):
  145. """
  146. Initialize the graph with pre-existing data or by looking up a store.
  147. One of ``keyset``, or ``data`` can be provided. If more than
  148. one of these is provided, precedence is given in the mentioned order.
  149. If none of them is specified, an empty graph is initialized.
  150. :param rdflib.URIRef uri: The graph URI.
  151. This will serve as the subject for some queries.
  152. :param Keyset keyset: Keyset to create the graph from. Keys will be
  153. converted to set elements.
  154. :param lakesuperior.store.ldp_rs.LmdbTripleStore store: store to
  155. look up the keyset. Only used if ``keyset`` is specified. If not
  156. set, the environment store is used.
  157. :param set data: Initial data as a set of 3-tuples of RDFLib terms.
  158. :param tuple lookup: tuple of a 3-tuple of lookup terms, and a context.
  159. E.g. ``((URIRef('urn:ns:a'), None, None), URIRef('urn:ns:ctx'))``.
  160. Any and all elements may be ``None``.
  161. :param lmdbStore store: the store to look data up.
  162. """
  163. self.store = store or env.app_globals.rdf_store
  164. self._terms = calg.set_new(term_hash_fn, buffer_cmp_fn)
  165. self._triples = calg.set_new(trp_hash_fn, triple_cmp_fn)
  166. self._pool = Pool()
  167. cdef:
  168. size_t i = 0
  169. TripleKey spok
  170. term.Buffer pk_t
  171. # Initialize empty data set.
  172. if keyset:
  173. # Populate with triples extracted from provided key set.
  174. self._data_from_keyset(keyset)
  175. elif data is not None:
  176. # Populate with provided Python set.
  177. for s, p, o in data:
  178. self._add_from_rdflib(s, p, o)
  179. def __dealloc__(self):
  180. """
  181. Free the triple pointers. TODO use a Cymem pool
  182. """
  183. free(self._triples)
  184. free(self._terms)
  185. @property
  186. def data(self):
  187. """
  188. Triple data as a Python set.
  189. :rtype: set
  190. """
  191. return self._data_as_set()
  192. cdef void _data_from_lookup(self, tuple trp_ptn, ctx=None) except *:
  193. """
  194. Look up triples in the triplestore and load them into ``data``.
  195. :param tuple lookup: 3-tuple of RDFlib terms or ``None``.
  196. :param LmdbTriplestore store: Reference to a LMDB triplestore. This
  197. is normally set to ``lakesuperior.env.app_globals.rdf_store``.
  198. """
  199. cdef:
  200. size_t i
  201. unsigned char spok[TRP_KLEN]
  202. with self.store.txn_ctx():
  203. keyset = self.store.triple_keys(trp_ptn, ctx)
  204. self.data_from_keyset(keyset)
  205. cdef void _data_from_keyset(self, Keyset data) except *:
  206. """Populate a graph from a Keyset."""
  207. cdef TripleKey spok
  208. while data.next(spok):
  209. self._add_from_spok(spok)
  210. cdef inline void _add_from_spok(self, const TripleKey spok) except *:
  211. """
  212. Add a triple from a TripleKey of term keys.
  213. """
  214. cdef:
  215. SPOBuffer s_spo
  216. BufferTriple trp
  217. s_spo = <SPOBuffer>self._pool.alloc(3, sizeof(Buffer))
  218. self.store.lookup_term(spok, s_spo)
  219. self.store.lookup_term(spok + KLEN, s_spo + 1)
  220. self.store.lookup_term(spok + DBL_KLEN, s_spo + 2)
  221. self._add_triple(s_spo, s_spo + 1, s_spo + 2)
  222. cdef inline void _add_triple(
  223. self, const Buffer *ss, const Buffer *sp, const Buffer *so
  224. ) except *:
  225. """
  226. Add a triple from 3 (TPL) serialized terms.
  227. Each of the terms is added to the term set if not existing. The triple
  228. also is only added if not existing.
  229. """
  230. trp = <BufferTriple *>self._pool.alloc(1, sizeof(BufferTriple))
  231. print('Adding terms.')
  232. print('ss: ')
  233. print((<unsigned char *>ss[0].addr)[:ss[0].sz])
  234. calg.set_insert(self._terms, ss)
  235. print('sp: ')
  236. print((<unsigned char *>sp[0].addr)[:sp[0].sz])
  237. calg.set_insert(self._terms, sp)
  238. print('so: ')
  239. print((<unsigned char *>so[0].addr)[:so[0].sz])
  240. calg.set_insert(self._terms, so)
  241. print('Added terms.')
  242. cdef calg.SetIterator ti
  243. cdef Buffer *t
  244. calg.set_iterate(self._terms, &ti)
  245. while calg.set_iter_has_more(&ti):
  246. t = <Buffer *>calg.set_iter_next(&ti)
  247. print('term @{}: '.format(<size_t>t.addr))
  248. print((<unsigned char *>t.addr)[:t.sz])
  249. trp.s = ss
  250. trp.p = sp
  251. trp.o = so
  252. print('Adding triple.')
  253. calg.set_insert(self._triples, trp)
  254. print('Added triple.')
  255. cdef BufferTriple *tt
  256. calg.set_iterate(self._triples, &ti)
  257. while calg.set_iter_has_more(&ti):
  258. tt = <BufferTriple *>calg.set_iter_next(&ti)
  259. print('Triple pointer address: {}'.format(<unsigned long>tt))
  260. print('Triple s address: {}'.format(<unsigned long>tt[0].s))
  261. print(f'Triple s size: {tt.s.sz}')
  262. print('triple s: ')
  263. print((<unsigned char *>tt[0].s.addr)[:tt[0].s.sz])
  264. print('Triple p address: {}'.format(<unsigned long>tt[0].p))
  265. print(f'Triple p size: {tt.p.sz}')
  266. print('triple p: ')
  267. print((<unsigned char *>tt[0].p.addr)[:tt[0].o.sz])
  268. print('Triple o address: {}'.format(<unsigned long>tt[0].o))
  269. print(f'Triple o size: {tt.o.sz}')
  270. print('triple o: ')
  271. print((<unsigned char *>tt[0].o.addr)[:tt[0].o.sz])
  272. cdef set _data_as_set(self):
  273. """
  274. Convert triple data to a Python set.
  275. :rtype: set
  276. """
  277. cdef:
  278. calg.SetIterator ti
  279. BufferTriple *trp
  280. term.Term s, p, o
  281. graph_set = set()
  282. print('Initialize iterator.')
  283. calg.set_iterate(self._triples, &ti)
  284. print('start loop.')
  285. while calg.set_iter_has_more(&ti):
  286. print('Set up triple.')
  287. trp = <BufferTriple *>calg.set_iter_next(&ti)
  288. if trp == NULL:
  289. print('Triple is NULL!')
  290. return graph_set
  291. print('Triple pointer address: {}'.format(<unsigned long>trp))
  292. print('Triple s address: {}'.format(<unsigned long>trp[0].s))
  293. print(f'Triple s size: {trp[0].s.sz}')
  294. print('Triple s:')
  295. print((<unsigned char *>trp[0].s.addr)[:trp[0].s.sz])
  296. print('Triple p address: {}'.format(<unsigned long>trp[0].p))
  297. print(f'Triple p size: {trp[0].p.sz}')
  298. print('Triple p:')
  299. print((<unsigned char *>trp[0].p.addr)[:trp[0].p.sz])
  300. print('Triple o address: {}'.format(<unsigned long>trp[0].o))
  301. print(f'Triple o size: {trp[0].o.sz}')
  302. print('Triple o:')
  303. print((<unsigned char *>trp[0].o.addr)[:trp[0].o.sz])
  304. print('Add triple.')
  305. graph_set.add((
  306. term.deserialize_to_rdflib(trp.s),
  307. term.deserialize_to_rdflib(trp.p),
  308. term.deserialize_to_rdflib(trp.o),
  309. ))
  310. return graph_set
  311. # Basic set operations.
  312. def add(self, triple):
  313. """ Add one triple to the graph. """
  314. cdef:
  315. Buffer *ss = <Buffer *>self._pool.alloc(1, sizeof(Buffer))
  316. Buffer *sp = <Buffer *>self._pool.alloc(1, sizeof(Buffer))
  317. Buffer *so = <Buffer *>self._pool.alloc(1, sizeof(Buffer))
  318. s, p, o = triple
  319. #print('Serializing s.')
  320. term.serialize_from_rdflib(s, ss, self._pool)
  321. #print('Serializing p.')
  322. term.serialize_from_rdflib(p, sp, self._pool)
  323. #print('Serializing o.')
  324. term.serialize_from_rdflib(o, so, self._pool)
  325. print('Adding triple from rdflib.')
  326. self._add_triple(ss, sp, so)
  327. print('Added triple from rdflib.')
  328. def remove(self, item):
  329. """
  330. Remove one item from the graph.
  331. :param tuple item: A 3-tuple of RDFlib terms. Only exact terms, i.e.
  332. wildcards are not accepted.
  333. """
  334. self.data.remove(item)
  335. def __len__(self):
  336. """ Number of triples in the graph. """
  337. return len(self.data)
  338. @use_data
  339. def __eq__(self, other):
  340. """ Equality operator between ``SimpleGraph`` instances. """
  341. return self.data == other
  342. def __repr__(self):
  343. """
  344. String representation of the graph.
  345. It provides the number of triples in the graph and memory address of
  346. the instance.
  347. """
  348. return (f'<{self.__class__.__name__} @{hex(id(self))} '
  349. f'length={len(self.data)}>')
  350. def __str__(self):
  351. """ String dump of the graph triples. """
  352. return str(self.data)
  353. @use_data
  354. def __sub__(self, other):
  355. """ Set subtraction. """
  356. return self.data - other
  357. @use_data
  358. def __isub__(self, other):
  359. """ In-place set subtraction. """
  360. self.data -= other
  361. return self
  362. @use_data
  363. def __and__(self, other):
  364. """ Set intersection. """
  365. return self.data & other
  366. @use_data
  367. def __iand__(self, other):
  368. """ In-place set intersection. """
  369. self.data &= other
  370. return self
  371. @use_data
  372. def __or__(self, other):
  373. """ Set union. """
  374. return self.data | other
  375. @use_data
  376. def __ior__(self, other):
  377. """ In-place set union. """
  378. self.data |= other
  379. return self
  380. @use_data
  381. def __xor__(self, other):
  382. """ Set exclusive intersection (XOR). """
  383. return self.data ^ other
  384. @use_data
  385. def __ixor__(self, other):
  386. """ In-place set exclusive intersection (XOR). """
  387. self.data ^= other
  388. return self
  389. def __contains__(self, item):
  390. """
  391. Whether the graph contains a triple.
  392. :rtype: boolean
  393. """
  394. return item in self.data
  395. def __iter__(self):
  396. """ Graph iterator. It iterates over the set triples. """
  397. return self.data.__iter__()
  398. # Slicing.
  399. def __getitem__(self, item):
  400. """
  401. Slicing function.
  402. It behaves similarly to `RDFLib graph slicing
  403. <https://rdflib.readthedocs.io/en/stable/utilities.html#slicing-graphs>`__
  404. """
  405. if isinstance(item, slice):
  406. s, p, o = item.start, item.stop, item.step
  407. return self._slice(s, p, o)
  408. else:
  409. raise TypeError(f'Wrong slice format: {item}.')
  410. cpdef void set(self, tuple trp) except *:
  411. """
  412. Set a single value for subject and predicate.
  413. Remove all triples matching ``s`` and ``p`` before adding ``s p o``.
  414. """
  415. if None in trp:
  416. raise ValueError(f'Invalid triple: {trp}')
  417. self.remove_triples((trp[0], trp[1], None))
  418. self.add(trp)
  419. cpdef void remove_triples(self, pattern) except *:
  420. """
  421. Remove triples by pattern.
  422. The pattern used is similar to :py:meth:`LmdbTripleStore.delete`.
  423. """
  424. s, p, o = pattern
  425. for match in self.lookup(s, p, o):
  426. logger.debug(f'Removing from graph: {match}.')
  427. self.data.remove(match)
  428. cpdef object as_rdflib(self):
  429. """
  430. Return the data set as an RDFLib Graph.
  431. :rtype: rdflib.Graph
  432. """
  433. gr = Graph()
  434. for trp in self.data:
  435. gr.add(trp)
  436. return gr
  437. def _slice(self, s, p, o):
  438. """
  439. Return terms filtered by other terms.
  440. This behaves like the rdflib.Graph slicing policy.
  441. """
  442. _data = self.data
  443. logger.debug(f'Slicing graph by: {s}, {p}, {o}.')
  444. if s is None and p is None and o is None:
  445. return _data
  446. elif s is None and p is None:
  447. return {(r[0], r[1]) for r in _data if r[2] == o}
  448. elif s is None and o is None:
  449. return {(r[0], r[2]) for r in _data if r[1] == p}
  450. elif p is None and o is None:
  451. return {(r[1], r[2]) for r in _data if r[0] == s}
  452. elif s is None:
  453. return {r[0] for r in _data if r[1] == p and r[2] == o}
  454. elif p is None:
  455. return {r[1] for r in _data if r[0] == s and r[2] == o}
  456. elif o is None:
  457. return {r[2] for r in _data if r[0] == s and r[1] == p}
  458. else:
  459. # all given
  460. return (s,p,o) in _data
  461. def lookup(self, s, p, o):
  462. """
  463. Look up triples by a pattern.
  464. This function converts RDFLib terms into the serialized format stored
  465. in the graph's internal structure and compares them bytewise.
  466. Any and all of the lookup terms can be ``None``.
  467. """
  468. cdef:
  469. BufferTriple trp
  470. BufferTriple *trp_p
  471. calg.SetIterator ti
  472. const Buffer t1
  473. const Buffer t2
  474. lookup_fn_t fn
  475. res = set()
  476. # Decide comparison logic outside the loop.
  477. if s is not None and p is not None and o is not None:
  478. # Return immediately if 3-term match is requested.
  479. term.serialize_from_rdflib(s, trp.s)
  480. term.serialize_from_rdflib(p, trp.p)
  481. term.serialize_from_rdflib(o, trp.o)
  482. if calg.set_query(self._triples, &trp):
  483. res.add((s, p, o))
  484. return res
  485. elif s is not None:
  486. term.serialize_from_rdflib(s, &t1)
  487. if p is not None:
  488. fn = lookup_sp_cmp_fn
  489. term.serialize_from_rdflib(p, &t2)
  490. elif o is not None:
  491. fn = lookup_so_cmp_fn
  492. term.serialize_from_rdflib(o, &t2)
  493. else:
  494. fn = lookup_s_cmp_fn
  495. elif p is not None:
  496. term.serialize_from_rdflib(p, &t1)
  497. if o is not None:
  498. fn = lookup_po_cmp_fn
  499. term.serialize_from_rdflib(o, &t2)
  500. else:
  501. fn = lookup_p_cmp_fn
  502. elif o is not None:
  503. fn = lookup_o_cmp_fn
  504. term.serialize_from_rdflib(o, &t1)
  505. else:
  506. fn = lookup_none_cmp_fn
  507. # Iterate over serialized triples.
  508. calg.set_iterate(self._triples, &ti)
  509. while calg.set_iter_has_more(&ti):
  510. trp_p = <BufferTriple *>calg.set_iter_next(&ti)
  511. if fn(trp_p, &t1, &t2):
  512. res.add((
  513. term.deserialize_to_rdflib(trp_p[0].s),
  514. term.deserialize_to_rdflib(trp_p[0].p),
  515. term.deserialize_to_rdflib(trp_p[0].o),
  516. ))
  517. return res
  518. cpdef set terms(self, str type):
  519. """
  520. Get all terms of a type: subject, predicate or object.
  521. :param str type: One of ``s``, ``p`` or ``o``.
  522. """
  523. i = 'spo'.index(type)
  524. return {r[i] for r in self.data}
  525. cdef class Imr(SimpleGraph):
  526. """
  527. In-memory resource data container.
  528. This is an extension of :py:class:`~SimpleGraph` that adds a subject URI to
  529. the data set and some convenience methods.
  530. An instance of this class can be converted to a ``rdflib.Resource``
  531. instance.
  532. Some set operations that produce a new object (``-``, ``|``, ``&``, ``^``)
  533. will create a new ``Imr`` instance with the same subject URI.
  534. """
  535. def __init__(self, str uri, *args, **kwargs):
  536. """
  537. Initialize the graph with pre-existing data or by looking up a store.
  538. Either ``data``, or ``lookup`` *and* ``store``, can be provide.
  539. ``lookup`` and ``store`` have precedence. If none of them is specified,
  540. an empty graph is initialized.
  541. :param rdflib.URIRef uri: The graph URI.
  542. This will serve as the subject for some queries.
  543. :param set data: Initial data as a set of 3-tuples of RDFLib terms.
  544. :param tuple lookup: tuple of a 3-tuple of lookup terms, and a context.
  545. E.g. ``((URIRef('urn:ns:a'), None, None), URIRef('urn:ns:ctx'))``.
  546. Any and all elements may be ``None``.
  547. :param lmdbStore store: the store to look data up.
  548. """
  549. super().__init__(*args, **kwargs)
  550. self.uri = uri
  551. @property
  552. def identifier(self):
  553. """
  554. IMR URI. For compatibility with RDFLib Resource.
  555. :rtype: string
  556. """
  557. return self.uri
  558. @property
  559. def graph(self):
  560. """
  561. Return a SimpleGraph with the same data.
  562. :rtype: SimpleGraph
  563. """
  564. return SimpleGraph(self.data)
  565. def __repr__(self):
  566. """
  567. String representation of an Imr.
  568. This includes the subject URI, number of triples contained and the
  569. memory address of the instance.
  570. """
  571. return (f'<{self.__class__.__name__} @{hex(id(self))} uri={self.uri}, '
  572. f'length={len(self.data)}>')
  573. @use_data
  574. def __sub__(self, other):
  575. """
  576. Set difference. This creates a new Imr with the same subject URI.
  577. """
  578. return self.__class__(uri=self.uri, data=self.data - other)
  579. @use_data
  580. def __and__(self, other):
  581. """
  582. Set intersection. This creates a new Imr with the same subject URI.
  583. """
  584. return self.__class__(uri=self.uri, data=self.data & other)
  585. @use_data
  586. def __or__(self, other):
  587. """
  588. Set union. This creates a new Imr with the same subject URI.
  589. """
  590. return self.__class__(uri=self.uri, data=self.data | other)
  591. @use_data
  592. def __xor__(self, other):
  593. """
  594. Set exclusive OR (XOR). This creates a new Imr with the same subject
  595. URI.
  596. """
  597. return self.__class__(uri=self.uri, data=self.data ^ other)
  598. def __getitem__(self, item):
  599. """
  600. Supports slicing notation.
  601. """
  602. if isinstance(item, slice):
  603. s, p, o = item.start, item.stop, item.step
  604. return self._slice(s, p, o)
  605. elif isinstance(item, Node):
  606. # If a Node is given, return all values for that predicate.
  607. return {
  608. r[2] for r in self.data
  609. if r[0] == self.uri and r[1] == item}
  610. else:
  611. raise TypeError(f'Wrong slice format: {item}.')
  612. def value(self, p, strict=False):
  613. """
  614. Get an individual value.
  615. :param rdflib.termNode p: Predicate to search for.
  616. :param bool strict: If set to ``True`` the method raises an error if
  617. more than one value is found. If ``False`` (the default) only
  618. the first found result is returned.
  619. :rtype: rdflib.term.Node
  620. """
  621. values = self[p]
  622. if strict and len(values) > 1:
  623. raise RuntimeError('More than one value found for {}, {}.'.format(
  624. self.uri, p))
  625. for ret in values:
  626. return ret
  627. return None
  628. cpdef as_rdflib(self):
  629. """
  630. Return the IMR as a RDFLib Resource.
  631. :rtype: rdflib.Resource
  632. """
  633. gr = Graph()
  634. for trp in self.data:
  635. gr.add(trp)
  636. return gr.resource(identifier=self.uri)