graph.pyx 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. import logging
  2. from functools import wraps
  3. from rdflib import Graph
  4. from rdflib.term import Node
  5. from lakesuperior.store.ldp_rs.lmdb_triplestore cimport (
  6. TRP_KLEN, LmdbTriplestore)
  7. logger = logging.getLogger(__name__)
  8. def use_data(fn):
  9. """
  10. Decorator to indicate that a set operation between two SimpleGraph
  11. instances should use the ``data`` property of the second term. The second
  12. term can also be a simple set.
  13. """
  14. @wraps(fn)
  15. def _wrapper(self, other):
  16. if isinstance(other, SimpleGraph):
  17. other = other.data
  18. return _wrapper
  19. cdef class SimpleGraph:
  20. """
  21. Fast and simple implementation of a graph.
  22. Most functions should mimic RDFLib's graph with less overhead. It uses
  23. the same funny but functional slicing notation.
  24. An instance of this class can be converted to a ``rdflib.Graph`` instance.
  25. """
  26. cdef:
  27. readonly set data
  28. def __init__(
  29. self, set data=set(), tuple lookup=(), store=None):
  30. """
  31. Initialize the graph with pre-existing data or by looking up a store.
  32. Either ``data``, or both ``lookup`` and ``store``, can be provided.
  33. ``lookup`` and ``store`` have precedence. If none of them is specified,
  34. an empty graph is initialized.
  35. :param rdflib.URIRef uri: The graph URI.
  36. This will serve as the subject for some queries.
  37. :param set data: Initial data as a set of 3-tuples of RDFLib terms.
  38. :param tuple lookup: tuple of a 3-tuple of lookup terms, and a context.
  39. E.g. ``((URIRef('urn:ns:a'), None, None), URIRef('urn:ns:ctx'))``.
  40. Any and all elements may be ``None``.
  41. :param lmdbStore store: the store to look data up.
  42. """
  43. if data:
  44. self.data = set(data)
  45. else:
  46. if not lookup:
  47. self.data = set()
  48. else:
  49. if store is None:
  50. raise ValueError('Store not specified for triple lookup.')
  51. trp_ptn = lookup[0]
  52. ctx = lookup[1] if len(lookup) > 1 else None
  53. self._data_from_lookup(store, trp_ptn, ctx)
  54. cdef void _data_from_lookup(
  55. self, LmdbTriplestore store, tuple trp_ptn, ctx=None) except *:
  56. """
  57. Look up triples in the triplestore and load them into ``data``.
  58. :param tuple lookup: 3-tuple of RDFlib terms or ``None``.
  59. :param LmdbTriplestore store: Reference to a LMDB triplestore. This
  60. is normally set to ``lakesuperior.env.app_globals.rdf_store``.
  61. """
  62. cdef:
  63. size_t i
  64. unsigned char spok[TRP_KLEN]
  65. self.data = set()
  66. with store.txn_ctx():
  67. keyset = store.triple_keys(trp_ptn, ctx)
  68. for i in range(keyset.ct):
  69. spok = keyset.data + i * TRP_KLEN
  70. self.data.add(store.from_trp_key(spok[: TRP_KLEN]))
  71. # Basic set operations.
  72. def add(self, dataset):
  73. """ Set union. """
  74. self.data.add(dataset)
  75. def remove(self, item):
  76. """
  77. Remove one item from the graph.
  78. :param tuple item: A 3-tuple of RDFlib terms. Only exact terms, i.e.
  79. wildcards are not accepted.
  80. """
  81. self.data.remove(item)
  82. def __len__(self):
  83. """ Number of triples in the graph. """
  84. return len(self.data)
  85. @use_data
  86. def __eq__(self, other):
  87. """ Equality operator between ``SimpleGraph`` instances. """
  88. return self.data == other
  89. def __repr__(self):
  90. """
  91. String representation of the graph.
  92. It provides the number of triples in the graph and memory address of
  93. the instance.
  94. """
  95. return (f'<{self.__class__.__name__} @{hex(id(self))} '
  96. f'length={len(self.data)}>')
  97. def __str__(self):
  98. """ String dump of the graph triples. """
  99. return str(self.data)
  100. @use_data
  101. def __sub__(self, other):
  102. """ Set subtraction. """
  103. return self.data - other
  104. @use_data
  105. def __isub__(self, other):
  106. """ In-place set subtraction. """
  107. self.data -= other
  108. return self
  109. @use_data
  110. def __and__(self, other):
  111. """ Set intersection. """
  112. return self.data & other
  113. @use_data
  114. def __iand__(self, other):
  115. """ In-place set intersection. """
  116. self.data &= other
  117. return self
  118. @use_data
  119. def __or__(self, other):
  120. """ Set union. """
  121. return self.data | other
  122. @use_data
  123. def __ior__(self, other):
  124. """ In-place set union. """
  125. self.data |= other
  126. return self
  127. @use_data
  128. def __xor__(self, other):
  129. """ Set exclusive intersection (XOR). """
  130. return self.data ^ other
  131. @use_data
  132. def __ixor__(self, other):
  133. """ In-place set exclusive intersection (XOR). """
  134. self.data ^= other
  135. return self
  136. def __contains__(self, item):
  137. """
  138. Whether the graph contains a triple.
  139. :rtype: boolean
  140. """
  141. return item in self.data
  142. def __iter__(self):
  143. """ Graph iterator. It iterates over the set triples. """
  144. return self.data.__iter__()
  145. # Slicing.
  146. def __getitem__(self, item):
  147. """
  148. Slicing function.
  149. It behaves similarly to `RDFLib graph slicing
  150. <https://rdflib.readthedocs.io/en/stable/utilities.html#slicing-graphs>`__
  151. """
  152. if isinstance(item, slice):
  153. s, p, o = item.start, item.stop, item.step
  154. return self._slice(s, p, o)
  155. else:
  156. raise TypeError(f'Wrong slice format: {item}.')
  157. cpdef void set(self, tuple trp) except *:
  158. """
  159. Set a single value for subject and predicate.
  160. Remove all triples matching ``s`` and ``p`` before adding ``s p o``.
  161. """
  162. self.remove_triples((trp[0], trp[1], None))
  163. if None in trp:
  164. raise ValueError(f'Invalid triple: {trp}')
  165. self.data.add(trp)
  166. cpdef void remove_triples(self, pattern) except *:
  167. """
  168. Remove triples by pattern.
  169. The pattern used is similar to :py:meth:`LmdbTripleStore.delete`.
  170. """
  171. s, p, o = pattern
  172. for match in self.lookup(s, p, o):
  173. logger.debug(f'Removing from graph: {match}.')
  174. self.data.remove(match)
  175. cpdef object as_rdflib(self):
  176. """
  177. Return the data set as an RDFLib Graph.
  178. :rtype: rdflib.Graph
  179. """
  180. gr = Graph()
  181. for trp in self.data:
  182. gr.add(trp)
  183. return gr
  184. cdef _slice(self, s, p, o):
  185. """
  186. Return terms filtered by other terms.
  187. This behaves like the rdflib.Graph slicing policy.
  188. """
  189. if s is None and p is None and o is None:
  190. return self.data
  191. elif s is None and p is None:
  192. return {(r[0], r[1]) for r in self.data if r[2] == o}
  193. elif s is None and o is None:
  194. return {(r[0], r[2]) for r in self.data if r[1] == p}
  195. elif p is None and o is None:
  196. return {(r[1], r[2]) for r in self.data if r[0] == s}
  197. elif s is None:
  198. return {r[0] for r in self.data if r[1] == p and r[2] == o}
  199. elif p is None:
  200. return {r[1] for r in self.data if r[0] == s and r[2] == o}
  201. elif o is None:
  202. return {r[2] for r in self.data if r[0] == s and r[1] == p}
  203. else:
  204. # all given
  205. return (s,p,o) in self.data
  206. cpdef lookup(self, s, p, o):
  207. """
  208. Look up triples by a pattern.
  209. """
  210. logger.debug(f'Looking up in graph: {s}, {p}, {o}.')
  211. if s is None and p is None and o is None:
  212. return self.data
  213. elif s is None and p is None:
  214. return {r for r in self.data if r[2] == o}
  215. elif s is None and o is None:
  216. return {r for r in self.data if r[1] == p}
  217. elif p is None and o is None:
  218. return {r for r in self.data if r[0] == s}
  219. elif s is None:
  220. return {r for r in self.data if r[1] == p and r[2] == o}
  221. elif p is None:
  222. return {r for r in self.data if r[0] == s and r[2] == o}
  223. elif o is None:
  224. return {r for r in self.data if r[0] == s and r[1] == p}
  225. else:
  226. # all given
  227. return (s,p,o) if (s, p, o) in self.data else set()
  228. cpdef set terms(self, str type):
  229. """
  230. Get all terms of a type: subject, predicate or object.
  231. :param str type: One of ``s``, ``p`` or ``o``.
  232. """
  233. i = 'spo'.index(type)
  234. return {r[i] for r in self.data}
  235. cdef class Imr(SimpleGraph):
  236. """
  237. In-memory resource data container.
  238. This is an extension of :py:class:`~SimpleGraph` that adds a subject URI to
  239. the data set and some convenience methods.
  240. An instance of this class can be converted to a ``rdflib.Resource``
  241. instance.
  242. Some set operations that produce a new object (``-``, ``|``, ``&``, ``^``)
  243. will create a new ``Imr`` instance with the same subject URI.
  244. """
  245. cdef:
  246. readonly object uri
  247. def __init__(self, uri, *args, **kwargs):
  248. """
  249. Initialize the graph with pre-existing data or by looking up a store.
  250. Either ``data``, or ``lookup`` *and* ``store``, can be provide.
  251. ``lookup`` and ``store`` have precedence. If none of them is specified,
  252. an empty graph is initialized.
  253. :param rdflib.URIRef uri: The graph URI.
  254. This will serve as the subject for some queries.
  255. :param set data: Initial data as a set of 3-tuples of RDFLib terms.
  256. :param tuple lookup: tuple of a 3-tuple of lookup terms, and a context.
  257. E.g. ``((URIRef('urn:ns:a'), None, None), URIRef('urn:ns:ctx'))``.
  258. Any and all elements may be ``None``.
  259. :param lmdbStore store: the store to look data up.
  260. """
  261. super().__init__(*args, **kwargs)
  262. self.uri = uri
  263. @property
  264. def identifier(self):
  265. """
  266. IMR URI. For compatibility with RDFLib Resource.
  267. :rtype: string
  268. """
  269. return self.uri
  270. @property
  271. def graph(self):
  272. """
  273. Return a SimpleGraph with the same data.
  274. :rtype: SimpleGraph
  275. """
  276. return SimpleGraph(self.data)
  277. def __repr__(self):
  278. """
  279. String representation of an Imr.
  280. This includes the subject URI, number of triples contained and the
  281. memory address of the instance.
  282. """
  283. return (f'<{self.__class__.__name__} @{hex(id(self))} uri={self.uri}, '
  284. f'length={len(self.data)}>')
  285. @use_data
  286. def __sub__(self, other):
  287. """
  288. Set difference. This creates a new Imr with the same subject URI.
  289. """
  290. return self.__class__(uri=self.uri, data=self.data - other)
  291. @use_data
  292. def __and__(self, other):
  293. """
  294. Set intersection. This creates a new Imr with the same subject URI.
  295. """
  296. return self.__class__(uri=self.uri, data=self.data & other)
  297. @use_data
  298. def __or__(self, other):
  299. """
  300. Set union. This creates a new Imr with the same subject URI.
  301. """
  302. return self.__class__(uri=self.uri, data=self.data | other)
  303. @use_data
  304. def __xor__(self, other):
  305. """
  306. Set exclusive OR (XOR). This creates a new Imr with the same subject
  307. URI.
  308. """
  309. return self.__class__(uri=self.uri, data=self.data ^ other)
  310. def __getitem__(self, item):
  311. """
  312. Supports slicing notation.
  313. """
  314. if isinstance(item, slice):
  315. s, p, o = item.start, item.stop, item.step
  316. return self._slice(s, p, o)
  317. elif isinstance(item, Node):
  318. # If a Node is given, return all values for that predicate.
  319. return {
  320. r[2] for r in self.data
  321. if r[0] == self.uri and r[1] == item}
  322. else:
  323. raise TypeError(f'Wrong slice format: {item}.')
  324. def value(self, p, strict=False):
  325. """
  326. Get an individual value.
  327. :param rdflib.termNode p: Predicate to search for.
  328. :param bool strict: If set to ``True`` the method raises an error if
  329. more than one value is found. If ``False`` (the default) only
  330. the first found result is returned.
  331. :rtype: rdflib.term.Node
  332. """
  333. values = self[p]
  334. if strict and len(values) > 1:
  335. raise RuntimeError('More than one value found for {}, {}.'.format(
  336. self.uri, p))
  337. for ret in values:
  338. return ret
  339. return None
  340. cpdef as_rdflib(self):
  341. """
  342. Return the IMR as a RDFLib Resource.
  343. :rtype: rdflib.Resource
  344. """
  345. gr = Graph()
  346. for trp in self.data:
  347. gr.add(trp)
  348. return gr.resource(identifier=self.uri)