graph.pyx 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913
  1. import logging
  2. from functools import wraps
  3. from rdflib import Graph, URIRef
  4. from rdflib.term import Node
  5. from lakesuperior import env
  6. from libc.string cimport memcpy
  7. from libc.stdlib cimport free
  8. from cymem.cymem cimport Pool
  9. from lakesuperior.cy_include cimport collections as cc
  10. from lakesuperior.model.base cimport Buffer, buffer_dump
  11. from lakesuperior.model.graph cimport callbacks as cb
  12. from lakesuperior.model.graph cimport term
  13. from lakesuperior.model.graph.triple cimport BufferTriple
  14. from lakesuperior.model.structures.hash cimport term_hash_seed32
  15. logger = logging.getLogger(__name__)
  16. cdef class SimpleGraph:
  17. """
  18. Fast and simple implementation of a graph.
  19. Most functions should mimic RDFLib's graph with less overhead. It uses
  20. the same funny but functional slicing notation.
  21. A SimpleGraph can be instantiated from a store lookup or obtained from a
  22. :py:class:`lakesuperior.store.keyset.Keyset`. This makes it possible to use
  23. a Keyset to perform initial filtering via identity by key, then the
  24. filtered Keyset can be converted into a set of meaningful terms.
  25. An instance of this class can also be converted to and from a
  26. ``rdflib.Graph`` instance.
  27. """
  28. def __cinit__(self, set data=set(), *args, **kwargs):
  29. """
  30. Initialize the graph, optionally with Python data.
  31. :param set data: Initial data as a set of 3-tuples of RDFLib terms.
  32. """
  33. cdef:
  34. cc.HashSetConf terms_conf, trp_conf
  35. self.term_cmp_fn = cb.term_cmp_fn
  36. self.trp_cmp_fn = cb.trp_cmp_fn
  37. cc.hashset_conf_init(&terms_conf)
  38. terms_conf.load_factor = 0.85
  39. terms_conf.hash = cb.term_hash_fn
  40. terms_conf.hash_seed = term_hash_seed32
  41. terms_conf.key_compare = self.term_cmp_fn
  42. terms_conf.key_length = sizeof(Buffer*)
  43. cc.hashset_conf_init(&trp_conf)
  44. trp_conf.load_factor = 0.75
  45. trp_conf.hash = cb.trp_hash_fn
  46. trp_conf.hash_seed = term_hash_seed32
  47. trp_conf.key_compare = self.trp_cmp_fn
  48. trp_conf.key_length = sizeof(BufferTriple)
  49. cc.hashset_new_conf(&terms_conf, &self._terms)
  50. cc.hashset_new_conf(&trp_conf, &self._triples)
  51. self.pool = Pool()
  52. # Initialize empty data set.
  53. if data:
  54. # Populate with provided Python set.
  55. self.add(data)
  56. def __dealloc__(self):
  57. """
  58. Free the triple pointers.
  59. """
  60. free(self._triples)
  61. free(self._terms)
  62. ## PROPERTIES ##
  63. @property
  64. def data(self):
  65. """
  66. Triple data as a Python generator.
  67. :rtype: generator
  68. """
  69. cdef:
  70. void *void_p
  71. cc.HashSetIter ti
  72. Buffer* ss
  73. Buffer* sp
  74. Buffer* so
  75. cc.hashset_iter_init(&ti, self._triples)
  76. while cc.hashset_iter_next(&ti, &void_p) != cc.CC_ITER_END:
  77. logger.info(f'Data loop.')
  78. if void_p == NULL:
  79. logger.warn('Triple is NULL!')
  80. break
  81. trp = <BufferTriple *>void_p
  82. #print(f'trp.s: {buffer_dump(trp.s)}')
  83. #print(f'trp.p: {buffer_dump(trp.p)}')
  84. #print(f'trp.o: {buffer_dump(trp.o)}')
  85. yield (
  86. term.deserialize_to_rdflib(trp.s),
  87. term.deserialize_to_rdflib(trp.p),
  88. term.deserialize_to_rdflib(trp.o),
  89. )
  90. @property
  91. def stored_terms(self):
  92. """
  93. All terms in the graph with their memory address.
  94. For debugging purposes.
  95. """
  96. cdef:
  97. cc.HashSetIter it
  98. void *cur
  99. terms = set()
  100. cc.hashset_iter_init(&it, self._terms)
  101. while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
  102. s_term = <Buffer*>cur
  103. terms.add((f'0x{<size_t>cur:02x}', term.deserialize_to_rdflib(s_term)))
  104. return terms
  105. ## MAGIC METHODS ##
  106. def __len__(self):
  107. """ Number of triples in the graph. """
  108. return cc.hashset_size(self._triples)
  109. def __eq__(self, other):
  110. """ Equality operator between ``SimpleGraph`` instances. """
  111. return len(self ^ other) == 0
  112. def __repr__(self):
  113. """
  114. String representation of the graph.
  115. It provides the number of triples in the graph and memory address of
  116. the instance.
  117. """
  118. return (
  119. f'<{self.__class__.__name__} @{hex(id(self))} '
  120. f'length={len(self)}>'
  121. )
  122. def __str__(self):
  123. """ String dump of the graph triples. """
  124. return str(self.data)
  125. def __add__(self, other):
  126. """ Alias for set-theoretical union. """
  127. return self.union_(other)
  128. def __iadd__(self, other):
  129. """ Alias for in-place set-theoretical union. """
  130. self.ip_union(other)
  131. return self
  132. def __sub__(self, other):
  133. """ Set-theoretical subtraction. """
  134. return self.subtraction(other)
  135. def __isub__(self, other):
  136. """ In-place set-theoretical subtraction. """
  137. self.ip_subtraction(other)
  138. return self
  139. def __and__(self, other):
  140. """ Set-theoretical intersection. """
  141. return self.intersection(other)
  142. def __iand__(self, other):
  143. """ In-place set-theoretical intersection. """
  144. self.ip_intersection(other)
  145. return self
  146. def __or__(self, other):
  147. """ Set-theoretical union. """
  148. return self.union_(other)
  149. def __ior__(self, other):
  150. """ In-place set-theoretical union. """
  151. self.ip_union(other)
  152. return self
  153. def __xor__(self, other):
  154. """ Set-theoretical exclusive disjunction (XOR). """
  155. return self.xor(other)
  156. def __ixor__(self, other):
  157. """ In-place set-theoretical exclusive disjunction (XOR). """
  158. self.ip_xor(other)
  159. return self
  160. def __contains__(self, trp):
  161. """
  162. Whether the graph contains a triple.
  163. :rtype: boolean
  164. """
  165. cdef:
  166. Buffer ss, sp, so
  167. BufferTriple btrp
  168. btrp.s = &ss
  169. btrp.p = &sp
  170. btrp.o = &so
  171. s, p, o = trp
  172. term.serialize_from_rdflib(s, &ss)
  173. term.serialize_from_rdflib(p, &sp)
  174. term.serialize_from_rdflib(o, &so)
  175. return self.trp_contains(&btrp)
  176. def __iter__(self):
  177. """ Graph iterator. It iterates over the set triples. """
  178. yield from self.data
  179. #def __next__(self):
  180. # """ Graph iterator. It iterates over the set triples. """
  181. # return self.data.__next__()
  182. # Slicing.
  183. def __getitem__(self, item):
  184. """
  185. Slicing function.
  186. It behaves similarly to `RDFLib graph slicing
  187. <https://rdflib.readthedocs.io/en/stable/utilities.html#slicing-graphs>`__
  188. """
  189. if isinstance(item, slice):
  190. s, p, o = item.start, item.stop, item.step
  191. return self._slice(s, p, o)
  192. else:
  193. raise TypeError(f'Wrong slice format: {item}.')
  194. def __hash__(self):
  195. return 23465
  196. ## BASIC PYTHON-ACCESSIBLE SET OPERATIONS ##
  197. def terms_by_type(self, type):
  198. """
  199. Get all terms of a type: subject, predicate or object.
  200. :param str type: One of ``s``, ``p`` or ``o``.
  201. """
  202. i = 'spo'.index(type)
  203. return {r[i] for r in self.data}
  204. def add(self, trp):
  205. """
  206. Add triples to the graph.
  207. :param iterable triples: iterable of 3-tuple triples.
  208. """
  209. cdef size_t cur = 0, trp_cur = 0
  210. trp_ct = len(trp)
  211. term_buf = <Buffer*>self.pool.alloc(3 * trp_ct, sizeof(Buffer))
  212. trp_buf = <BufferTriple*>self.pool.alloc(trp_ct, sizeof(BufferTriple))
  213. for s, p, o in trp:
  214. term.serialize_from_rdflib(s, term_buf + cur, self.pool)
  215. term.serialize_from_rdflib(p, term_buf + cur + 1, self.pool)
  216. term.serialize_from_rdflib(o, term_buf + cur + 2, self.pool)
  217. (trp_buf + trp_cur).s = term_buf + cur
  218. (trp_buf + trp_cur).p = term_buf + cur + 1
  219. (trp_buf + trp_cur).o = term_buf + cur + 2
  220. self.add_triple(trp_buf + trp_cur)
  221. trp_cur += 1
  222. cur += 3
  223. def len_terms(self):
  224. """ Number of terms in the graph. """
  225. return cc.hashset_size(self._terms)
  226. def remove(self, pattern):
  227. """
  228. Remove triples by pattern.
  229. The pattern used is similar to :py:meth:`LmdbTripleStore.delete`.
  230. """
  231. self._match_ptn_callback(
  232. pattern, self, cb.del_trp_callback, NULL
  233. )
  234. ## CYTHON-ACCESSIBLE BASIC METHODS ##
  235. cdef SimpleGraph empty_copy(self):
  236. """
  237. Create an empty copy carrying over some key properties.
  238. Override in subclasses to accommodate for different init properties.
  239. """
  240. return self.__class__()
  241. cpdef union_(self, SimpleGraph other):
  242. """
  243. Perform set union resulting in a new SimpleGraph instance.
  244. TODO Allow union of multiple graphs at a time.
  245. :param SimpleGraph other: The other graph to merge.
  246. :rtype: SimpleGraph
  247. :return: A new SimpleGraph instance.
  248. """
  249. cdef:
  250. void *cur
  251. cc.HashSetIter it
  252. BufferTriple *trp
  253. new_gr = self.empty_copy()
  254. for gr in (self, other):
  255. cc.hashset_iter_init(&it, gr._triples)
  256. while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
  257. bt = <BufferTriple*>cur
  258. new_gr.add_triple(bt, True)
  259. return new_gr
  260. cdef void ip_union(self, SimpleGraph other) except *:
  261. """
  262. Perform an in-place set union that adds triples to this instance
  263. TODO Allow union of multiple graphs at a time.
  264. :param SimpleGraph other: The other graph to merge.
  265. :rtype: void
  266. """
  267. cdef:
  268. void *cur
  269. cc.HashSetIter it
  270. cc.hashset_iter_init(&it, other._triples)
  271. while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
  272. bt = <BufferTriple*>cur
  273. self.add_triple(bt, True)
  274. cpdef intersection(self, SimpleGraph other):
  275. """
  276. Graph intersection.
  277. :param SimpleGraph other: The other graph to intersect.
  278. :rtype: SimpleGraph
  279. :return: A new SimpleGraph instance.
  280. """
  281. cdef:
  282. void *cur
  283. cc.HashSetIter it
  284. new_gr = self.empty_copy()
  285. cc.hashset_iter_init(&it, self._triples)
  286. while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
  287. bt = <BufferTriple*>cur
  288. #print('Checking: <0x{:02x}> <0x{:02x}> <0x{:02x}>'.format(
  289. # <size_t>bt.s, <size_t>bt.p, <size_t>bt.o))
  290. if other.trp_contains(bt):
  291. #print('Adding.')
  292. new_gr.add_triple(bt, True)
  293. return new_gr
  294. cdef void ip_intersection(self, SimpleGraph other) except *:
  295. """
  296. In-place graph intersection.
  297. Triples not in common with another graph are removed from the current
  298. one.
  299. :param SimpleGraph other: The other graph to intersect.
  300. :rtype: void
  301. """
  302. cdef:
  303. void *cur
  304. cc.HashSetIter it
  305. cc.hashset_iter_init(&it, self._triples)
  306. while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
  307. bt = <BufferTriple*>cur
  308. if not other.trp_contains(bt):
  309. self.remove_triple(bt)
  310. cpdef subtraction(self, SimpleGraph other):
  311. """
  312. Graph set-theoretical subtraction.
  313. Create a new graph with the triples of this graph minus the ones in
  314. common with the other graph.
  315. :param SimpleGraph other: The other graph to subtract to this.
  316. :rtype: SimpleGraph
  317. :return: A new SimpleGraph instance.
  318. """
  319. cdef:
  320. void *cur
  321. cc.HashSetIter it
  322. new_gr = self.empty_copy()
  323. cc.hashset_iter_init(&it, self._triples)
  324. while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
  325. bt = <BufferTriple*>cur
  326. #print('Checking: <0x{:02x}> <0x{:02x}> <0x{:02x}>'.format(
  327. # <size_t>bt.s, <size_t>bt.p, <size_t>bt.o))
  328. if not other.trp_contains(bt):
  329. #print('Adding.')
  330. new_gr.add_triple(bt, True)
  331. return new_gr
  332. cdef void ip_subtraction(self, SimpleGraph other) except *:
  333. """
  334. In-place graph subtraction.
  335. Triples in common with another graph are removed from the current one.
  336. :param SimpleGraph other: The other graph to intersect.
  337. :rtype: void
  338. """
  339. cdef:
  340. void *cur
  341. cc.HashSetIter it
  342. cc.hashset_iter_init(&it, self._triples)
  343. while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
  344. bt = <BufferTriple*>cur
  345. if other.trp_contains(bt):
  346. self.remove_triple(bt)
  347. cpdef xor(self, SimpleGraph other):
  348. """
  349. Graph Exclusive disjunction (XOR).
  350. :param SimpleGraph other: The other graph to perform XOR with.
  351. :rtype: SimpleGraph
  352. :return: A new SimpleGraph instance.
  353. """
  354. cdef:
  355. void *cur
  356. cc.HashSetIter it
  357. BufferTriple* bt
  358. new_gr = self.empty_copy()
  359. # Add triples in this and not in other.
  360. cc.hashset_iter_init(&it, self._triples)
  361. while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
  362. bt = <BufferTriple*>cur
  363. if not other.trp_contains(bt):
  364. new_gr.add_triple(bt, True)
  365. # Other way around.
  366. cc.hashset_iter_init(&it, other._triples)
  367. while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
  368. bt = <BufferTriple*>cur
  369. if not self.trp_contains(bt):
  370. new_gr.add_triple(bt, True)
  371. return new_gr
  372. cdef void ip_xor(self, SimpleGraph other) except *:
  373. """
  374. In-place graph XOR.
  375. Triples in common with another graph are removed from the current one,
  376. and triples not in common will be added from the other one.
  377. :param SimpleGraph other: The other graph to perform XOR with.
  378. :rtype: void
  379. """
  380. cdef:
  381. void *cur
  382. cc.HashSetIter it
  383. # TODO This could be more efficient to stash values in a simple
  384. # array, but how urgent is it to improve an in-place XOR?
  385. SimpleGraph tmp = SimpleGraph()
  386. # Add *to the tmp graph* triples in other graph and not in this graph.
  387. cc.hashset_iter_init(&it, other._triples)
  388. while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
  389. bt = <BufferTriple*>cur
  390. if not self.trp_contains(bt):
  391. tmp.add_triple(bt)
  392. # Remove triples in common.
  393. cc.hashset_iter_init(&it, self._triples)
  394. while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
  395. bt = <BufferTriple*>cur
  396. if other.trp_contains(bt):
  397. self.remove_triple(bt)
  398. self |= tmp
  399. cdef inline BufferTriple* store_triple(self, const BufferTriple* strp):
  400. """
  401. Store triple data in the graph.
  402. Normally, raw data underlying the triple and terms are only referenced
  403. by pointers. If the destination data are garbage collected before the
  404. graph is, segfaults are bound to happen.
  405. This method copies the data to the graph's memory pool, so they are
  406. managed with the lifecycle of the graph.
  407. Note that this method stores items regardless of whether thwy are
  408. duplicate or not, so there may be some duplication.
  409. """
  410. cdef:
  411. BufferTriple* dtrp = <BufferTriple*>self.pool.alloc(
  412. 1, sizeof(BufferTriple)
  413. )
  414. Buffer* spo = <Buffer*>self.pool.alloc(3, sizeof(Buffer))
  415. if not dtrp:
  416. raise MemoryError()
  417. if not spo:
  418. raise MemoryError()
  419. dtrp.s = spo
  420. dtrp.p = spo + 1
  421. dtrp.o = spo + 2
  422. spo[0].addr = self.pool.alloc(strp.s.sz, 1)
  423. spo[0].sz = strp.s.sz
  424. spo[1].addr = self.pool.alloc(strp.p.sz, 1)
  425. spo[1].sz = strp.p.sz
  426. spo[2].addr = self.pool.alloc(strp.o.sz, 1)
  427. spo[2].sz = strp.o.sz
  428. if not spo[0].addr or not spo[1].addr or not spo[2].addr:
  429. raise MemoryError()
  430. memcpy(dtrp.s.addr, strp.s.addr, strp.s.sz)
  431. memcpy(dtrp.p.addr, strp.p.addr, strp.p.sz)
  432. memcpy(dtrp.o.addr, strp.o.addr, strp.o.sz)
  433. return dtrp
  434. cdef inline void add_triple(
  435. self, const BufferTriple* trp, bint add=False
  436. ) except *:
  437. """
  438. Add a triple from 3 (TPL) serialized terms.
  439. Each of the terms is added to the term set if not existing. The triple
  440. also is only added if not existing.
  441. :param BufferTriple* trp: The triple to add.
  442. :param bint add: if ``True``, the triple and term data will be
  443. allocated and copied into the graph memory pool.
  444. """
  445. if add:
  446. trp = self.store_triple(trp)
  447. logger.info('Inserting terms.')
  448. cc.hashset_add(self._terms, trp.s)
  449. cc.hashset_add(self._terms, trp.p)
  450. cc.hashset_add(self._terms, trp.o)
  451. logger.info('inserted terms.')
  452. logger.info(f'Terms set size: {cc.hashset_size(self._terms)}')
  453. cdef size_t trp_sz = cc.hashset_size(self._triples)
  454. logger.info(f'Triples set size before adding: {trp_sz}')
  455. r = cc.hashset_add(self._triples, trp)
  456. trp_sz = cc.hashset_size(self._triples)
  457. logger.info(f'Triples set size after adding: {trp_sz}')
  458. cdef:
  459. cc.HashSetIter ti
  460. void *cur
  461. cdef int remove_triple(self, const BufferTriple* btrp) except -1:
  462. """
  463. Remove one triple from the graph.
  464. """
  465. return cc.hashset_remove(self._triples, btrp, NULL)
  466. cdef bint trp_contains(self, const BufferTriple* btrp):
  467. cdef:
  468. cc.HashSetIter it
  469. void* cur
  470. cc.hashset_iter_init(&it, self._triples)
  471. while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
  472. if self.trp_cmp_fn(cur, btrp) == 0:
  473. return True
  474. return False
  475. cpdef void set(self, tuple trp) except *:
  476. """
  477. Set a single value for subject and predicate.
  478. Remove all triples matching ``s`` and ``p`` before adding ``s p o``.
  479. """
  480. if None in trp:
  481. raise ValueError(f'Invalid triple: {trp}')
  482. self.remove((trp[0], trp[1], None))
  483. self.add((trp,))
  484. def as_rdflib(self):
  485. """
  486. Return the data set as an RDFLib Graph.
  487. :rtype: rdflib.Graph
  488. """
  489. gr = Graph()
  490. for trp in self.data:
  491. gr.add(trp)
  492. return gr
  493. def _slice(self, s, p, o):
  494. """
  495. Return terms filtered by other terms.
  496. This behaves like the rdflib.Graph slicing policy.
  497. """
  498. _data = self.data
  499. logger.debug(f'Slicing graph by: {s}, {p}, {o}.')
  500. if s is None and p is None and o is None:
  501. return _data
  502. elif s is None and p is None:
  503. return {(r[0], r[1]) for r in _data if r[2] == o}
  504. elif s is None and o is None:
  505. return {(r[0], r[2]) for r in _data if r[1] == p}
  506. elif p is None and o is None:
  507. return {(r[1], r[2]) for r in _data if r[0] == s}
  508. elif s is None:
  509. return {r[0] for r in _data if r[1] == p and r[2] == o}
  510. elif p is None:
  511. return {r[1] for r in _data if r[0] == s and r[2] == o}
  512. elif o is None:
  513. return {r[2] for r in _data if r[0] == s and r[1] == p}
  514. else:
  515. # all given
  516. return (s,p,o) in _data
  517. def lookup(self, pattern):
  518. """
  519. Look up triples by a pattern.
  520. This function converts RDFLib terms into the serialized format stored
  521. in the graph's internal structure and compares them bytewise.
  522. Any and all of the lookup terms msy be ``None``.
  523. :rtype: SimpleGraph
  524. "return: New SimpleGraph instance with matching triples.
  525. """
  526. cdef:
  527. void* cur
  528. BufferTriple trp
  529. SimpleGraph res_gr = SimpleGraph()
  530. self._match_ptn_callback(pattern, res_gr, cb.add_trp_callback, NULL)
  531. return res_gr
  532. cdef void _match_ptn_callback(
  533. self, pattern, SimpleGraph gr,
  534. lookup_callback_fn_t callback_fn, void* ctx=NULL
  535. ) except *:
  536. """
  537. Execute an arbitrary function on a list of triples matching a pattern.
  538. The arbitrary function is appied to each triple found in the current
  539. graph, and to a discrete graph that can be the current graph itself
  540. or a different one.
  541. """
  542. cdef:
  543. void* cur
  544. Buffer t1, t2
  545. Buffer ss, sp, so
  546. BufferTriple trp
  547. BufferTriple* trp_p
  548. lookup_fn_t cmp_fn
  549. cc.HashSetIter it
  550. s, p, o = pattern
  551. # Decide comparison logic outside the loop.
  552. if s is not None and p is not None and o is not None:
  553. logger.info('Looping over one triple only.')
  554. # Shortcut for 3-term match.
  555. trp.s = &ss
  556. trp.p = &sp
  557. trp.o = &so
  558. term.serialize_from_rdflib(s, trp.s, self.pool)
  559. term.serialize_from_rdflib(p, trp.p, self.pool)
  560. term.serialize_from_rdflib(o, trp.o, self.pool)
  561. if cc.hashset_contains(self._triples, &trp):
  562. callback_fn(gr, &trp, ctx)
  563. return
  564. if s is not None:
  565. term.serialize_from_rdflib(s, &t1)
  566. if p is not None:
  567. cmp_fn = cb.lookup_sp_cmp_fn
  568. term.serialize_from_rdflib(p, &t2)
  569. elif o is not None:
  570. cmp_fn = cb.lookup_so_cmp_fn
  571. term.serialize_from_rdflib(o, &t2)
  572. else:
  573. cmp_fn = cb.lookup_s_cmp_fn
  574. elif p is not None:
  575. term.serialize_from_rdflib(p, &t1)
  576. if o is not None:
  577. cmp_fn = cb.lookup_po_cmp_fn
  578. term.serialize_from_rdflib(o, &t2)
  579. else:
  580. cmp_fn = cb.lookup_p_cmp_fn
  581. elif o is not None:
  582. cmp_fn = cb.lookup_o_cmp_fn
  583. term.serialize_from_rdflib(o, &t1)
  584. else:
  585. cmp_fn = cb.lookup_none_cmp_fn
  586. # Iterate over serialized triples.
  587. cc.hashset_iter_init(&it, self._triples)
  588. while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
  589. trp_p = <BufferTriple*>cur
  590. if cmp_fn(trp_p, &t1, &t2):
  591. callback_fn(gr, trp_p, ctx)
  592. cdef class Imr(SimpleGraph):
  593. """
  594. In-memory resource data container.
  595. This is an extension of :py:class:`~SimpleGraph` that adds a subject URI to
  596. the data set and some convenience methods.
  597. An instance of this class can be converted to a ``rdflib.Resource``
  598. instance.
  599. Some set operations that produce a new object (``-``, ``|``, ``&``, ``^``)
  600. will create a new ``Imr`` instance with the same subject URI.
  601. """
  602. def __init__(self, uri, *args, **kwargs):
  603. """
  604. Initialize the graph with pre-existing data or by looking up a store.
  605. Either ``data``, or ``lookup`` *and* ``store``, can be provide.
  606. ``lookup`` and ``store`` have precedence. If none of them is specified,
  607. an empty graph is initialized.
  608. :param rdflib.URIRef uri: The graph URI.
  609. This will serve as the subject for some queries.
  610. :param args: Positional arguments inherited from
  611. ``SimpleGraph.__init__``.
  612. :param kwargs: Keyword arguments inherited from
  613. ``SimpleGraph.__init__``.
  614. """
  615. self.id = str(uri)
  616. #super().__init(*args, **kwargs)
  617. def __repr__(self):
  618. """
  619. String representation of an Imr.
  620. This includes the subject URI, number of triples contained and the
  621. memory address of the instance.
  622. """
  623. return (f'<{self.__class__.__name__} @{hex(id(self))} id={self.id}, '
  624. f'length={len(self)}>')
  625. def __getitem__(self, item):
  626. """
  627. Supports slicing notation.
  628. """
  629. if isinstance(item, slice):
  630. s, p, o = item.start, item.stop, item.step
  631. return self._slice(s, p, o)
  632. elif isinstance(item, Node):
  633. # If a Node is given, return all values for that predicate.
  634. return {
  635. r[2] for r in self.data
  636. if r[0] == self.id and r[1] == item}
  637. else:
  638. raise TypeError(f'Wrong slice format: {item}.')
  639. @property
  640. def uri(self):
  641. """
  642. Get resource identifier as a RDFLib URIRef.
  643. :rtype: rdflib.URIRef.
  644. """
  645. return URIRef(self.id)
  646. cdef Imr empty_copy(self):
  647. """
  648. Create an empty instance carrying over some key properties.
  649. """
  650. return self.__class__(uri=self.id)
  651. def value(self, p, strict=False):
  652. """
  653. Get an individual value.
  654. :param rdflib.termNode p: Predicate to search for.
  655. :param bool strict: If set to ``True`` the method raises an error if
  656. more than one value is found. If ``False`` (the default) only
  657. the first found result is returned.
  658. :rtype: rdflib.term.Node
  659. """
  660. values = self[p]
  661. if strict and len(values) > 1:
  662. raise RuntimeError('More than one value found for {}, {}.'.format(
  663. self.id, p))
  664. for ret in values:
  665. return ret
  666. return None
  667. cpdef as_rdflib(self):
  668. """
  669. Return the IMR as a RDFLib Resource.
  670. :rtype: rdflib.Resource
  671. """
  672. gr = Graph()
  673. for trp in self.data:
  674. gr.add(trp)
  675. return gr.resource(identifier=self.uri)