lmdb_triplestore.pyx 72 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119
  1. import logging
  2. from functools import wraps
  3. from rdflib import Graph
  4. from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as RDFLIB_DEFAULT_GRAPH_URI
  5. from rdflib.term import Node
  6. from lakesuperior.store.base_lmdb_store import (
  7. KeyExistsError, KeyNotFoundError, LmdbError)
  8. from lakesuperior.store.base_lmdb_store cimport _check
  9. from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
  10. from cython.parallel import prange
  11. from libc.stdlib cimport free
  12. from libc.string cimport memcpy
  13. cimport lakesuperior.cy_include.cylmdb as lmdb
  14. from lakesuperior.store.base_lmdb_store cimport (
  15. BaseLmdbStore, data_v, dbi, key_v)
  16. from lakesuperior.store.ldp_rs.term cimport serialize, deserialize
  17. DEF KLEN = 5
  18. """
  19. Fixed length for term keys.
  20. 4 or 5 is a safe range. 4 allows for ~4 billion (256 ** 4) unique terms
  21. in the store. 5 allows ~1 trillion terms. While these numbers may seem
  22. huge (the total number of Internet pages indexed by Google as of 2018 is 45
  23. billions), it must be reminded that the keys cannot be reused, so a
  24. repository that deletes a lot of triples may burn through a lot of terms.
  25. If a repository runs ot of keys it can no longer store new terms and must
  26. be migrated to a new database, which will regenerate and compact the keys.
  27. For smaller repositories it should be safe to set this value to 4, which
  28. could improve performance since keys make up the vast majority of record
  29. exchange between the store and the application. However it is sensible not
  30. to expose this value as a configuration option.
  31. TODO: Explore the option to use size_t (8 bits, or in some architectures,
  32. 4 bits). If the overhead of handling 8
  33. vs. 5 bytes is not huge (and maybe counterbalanced by x86_64 arch optimizations
  34. for 8-byte words) it may be worth using those instead of char[5] to simplify
  35. the code significantly.
  36. """
  37. DEF DBL_KLEN = KLEN * 2
  38. DEF TRP_KLEN = KLEN * 3
  39. DEF QUAD_KLEN = KLEN * 4
  40. DEF TERM_HASH_ALGO = 'sha1'
  41. """ Term hashing algorithm. SHA1 is the default. """
  42. DEF HLEN = 20
  43. """ Hash byte length. For SHA1 this is 20. """
  44. DEF KEY_START = b'\x01'
  45. """
  46. Lexical sequence start. ``\\x01`` is fine since no special characters are
  47. used, but it's good to leave a spare for potential future use.
  48. """
  49. DEF FIRST_KEY = KEY_START * KLEN
  50. """First key of a sequence."""
  51. DEF IDX_OP_ADD = '_idx_add'
  52. DEF IDX_OP_REMOVE = '_idx_remove'
  53. cdef extern from '<openssl/sha.h>':
  54. unsigned char *SHA1(const unsigned char *d, size_t n, unsigned char *md)
  55. ctypedef unsigned char Key[KLEN]
  56. ctypedef unsigned char DoubleKey[DBL_KLEN]
  57. ctypedef unsigned char TripleKey[TRP_KLEN]
  58. ctypedef unsigned char QuadKey[QUAD_KLEN]
  59. ctypedef unsigned char Hash[HLEN]
  60. cdef unsigned char first_key[KLEN]
  61. memcpy(first_key, FIRST_KEY, KLEN)
  62. cdef unsigned char lookup_rank[3]
  63. lookup_rank = [0, 2, 1]
  64. """
  65. Order in which keys are looked up if two terms are bound.
  66. The indices with the smallest average number of values per key should be
  67. looked up first.
  68. 0 = s:po
  69. 1 = p:so
  70. 2 = o:sp
  71. If we want to get fancy, this can be rebalanced from time to time by
  72. looking up the number of keys in (s:po, p:so, o:sp).
  73. """
  74. cdef unsigned char lookup_ordering[3][3]
  75. lookup_ordering = [
  76. [0, 1, 2], # spo
  77. [1, 0, 2], # pso
  78. [2, 0, 1], # osp
  79. ]
  80. cdef unsigned char lookup_ordering_2bound[3][3]
  81. lookup_ordering_2bound = [
  82. [1, 2, 0], # po:s
  83. [0, 2, 1], # so:p
  84. [0, 1, 2], # sp:o
  85. ]
  86. cdef inline void _hash(
  87. const unsigned char *message, size_t message_size, Hash digest):
  88. """Get the hash value of a serialized object."""
  89. SHA1(message, message_size, digest)
  90. logger = logging.getLogger(__name__)
  91. cdef class ResultSet:
  92. """
  93. Pre-allocated result set.
  94. Data in the set are stored as a 1D contiguous array of characters.
  95. Access to elements at an arbitrary index position is achieved by using the
  96. ``itemsize`` property multiplied by the index number.
  97. Key properties:
  98. ``ct``: number of elements in the set.
  99. ``itemsize``: size of each element, in bytes. All elements have the same
  100. size.
  101. ``size``: Total size, in bytes, of the data set. This is the product of
  102. ``itemsize`` and ``ct``.
  103. """
  104. cdef:
  105. readonly unsigned char *data
  106. readonly unsigned char itemsize
  107. readonly size_t ct, size
  108. def __cinit__(self, size_t ct, unsigned char itemsize):
  109. """
  110. Initialize and allocate memory for the data set.
  111. :param size_t ct: Number of elements to be accounted for.
  112. :param unsigned char itemsize: Size of an individual item.
  113. Note that the ``itemsize`` is an unsigned char,
  114. i.e. an item can be at most 255 bytes. This is for economy reasons,
  115. since many multiplications are done between ``itemsize`` and other
  116. char variables.
  117. """
  118. self.ct = ct
  119. self.itemsize = itemsize
  120. self.size = self.itemsize * self.ct
  121. #logger.debug('Got malloc sizes: {}, {}'.format(ct, itemsize))
  122. #logger.debug(
  123. # 'Allocating {0} ({1}x{2}) bytes of ResultSet data...'.format(
  124. # self.size, self.ct, self.itemsize))
  125. self.data = <unsigned char *>PyMem_Malloc(ct * itemsize)
  126. if not self.data:
  127. raise MemoryError()
  128. #logger.debug('...done allocating @ {0:x}.'.format(
  129. # <unsigned long>self.data))
  130. def __dealloc__(self):
  131. """
  132. Free the memory.
  133. This is called when the Python instance is garbage collected, which
  134. makes it handy to safely pass a ResultSet instance across functions.
  135. """
  136. #logger.debug(
  137. # 'Releasing {0} ({1}x{2}) bytes of ResultSet @ {3:x}...'.format(
  138. # self.size, self.ct, self.itemsize,
  139. # <unsigned long>self.data))
  140. PyMem_Free(self.data)
  141. #logger.debug('...done releasing.')
  142. cdef void resize(self, size_t ct) except *:
  143. """
  144. Resize the result set. Uses ``PyMem_Realloc``.
  145. Note that resizing to a smaller size does not copy or reallocate the
  146. data, resizing to a larger size does.
  147. Also, note that only the number of items can be changed, the item size
  148. cannot.
  149. :param size_t ct: Number of items in the result set.
  150. """
  151. cdef unsigned char *tmp
  152. self.ct = ct
  153. self.size = self.itemsize * self.ct
  154. #logger.debug(
  155. # 'Resizing ResultSet to {0} ({1}x{2}) bytes @ {3:x}...'.format(
  156. # self.itemsize * ct, ct, self.itemsize,
  157. # <unsigned long>self.data))
  158. tmp = <unsigned char *>PyMem_Realloc(self.data, ct * self.itemsize)
  159. if not tmp:
  160. raise MemoryError()
  161. #logger.debug('...done resizing.')
  162. self.data = tmp
  163. # Access methods.
  164. def to_tuple(self):
  165. """
  166. Return the data set as a Python tuple.
  167. :rtype: tuple
  168. """
  169. return tuple(
  170. self.data[i: i + self.itemsize]
  171. for i in range(0, self.size, self.itemsize))
  172. def get_item_obj(self, i):
  173. """
  174. Get an item at a given index position.
  175. :rtype: bytes
  176. """
  177. return self.get_item(i)[: self.itemsize]
  178. cdef unsigned char *get_item(self, i):
  179. """
  180. Get an item at a given index position. Cython-level method.
  181. The item size is known by the ``itemsize`` property of the object.
  182. :rtype: unsigned char*
  183. """
  184. return self.data + self.itemsize * i
  185. def use_data(fn):
  186. """
  187. Decorator to indicate that a set operation between two SimpleGraph
  188. instances should use the ``data`` property of the second term. The second
  189. term can also be a simple set.
  190. """
  191. @wraps(fn)
  192. def _wrapper(self, other):
  193. if isinstance(other, SimpleGraph):
  194. other = other.data
  195. return _wrapper
  196. cdef class SimpleGraph:
  197. """
  198. Fast and simple implementation of a graph.
  199. Most functions should mimic RDFLib's graph with less overhead. It uses
  200. the same funny but functional slicing notation.
  201. An instance of this class can be converted to a ``rdflib.Graph`` instance.
  202. """
  203. cdef:
  204. readonly set data
  205. def __init__(
  206. self, set data=set(), tuple lookup=(), store=None):
  207. """
  208. Initialize the graph with pre-existing data or by looking up a store.
  209. Either ``data``, or both ``lookup`` and ``store``, can be provided.
  210. ``lookup`` and ``store`` have precedence. If none of them is specified,
  211. an empty graph is initialized.
  212. :param rdflib.URIRef uri: The graph URI.
  213. This will serve as the subject for some queries.
  214. :param set data: Initial data as a set of 3-tuples of RDFLib terms.
  215. :param tuple lookup: tuple of a 3-tuple of lookup terms, and a context.
  216. E.g. ``((URIRef('urn:ns:a'), None, None), URIRef('urn:ns:ctx'))``.
  217. Any and all elements may be ``None``.
  218. :param lmdbStore store: the store to look data up.
  219. """
  220. if data:
  221. self.data = set(data)
  222. else:
  223. if not lookup:
  224. self.data = set()
  225. else:
  226. if store is None:
  227. raise ValueError('Store not specified for triple lookup.')
  228. self._data_from_lookup(lookup, store)
  229. cdef void _data_from_lookup(
  230. self, tuple lookup, LmdbTriplestore store) except *:
  231. """
  232. Look up triples in the triplestore and load them into ``data``.
  233. :param tuple lookup: 3-tuple of RDFlib terms or ``None``.
  234. :param LmdbTriplestore store: Reference to a LMDB triplestore. This
  235. is normally set to ``lakesuperior.env.app_globals.rdf_store``.
  236. """
  237. cdef:
  238. size_t i
  239. unsigned char spok[TRP_KLEN]
  240. self.data = set()
  241. with store.txn_ctx():
  242. keyset = store.triple_keys(*lookup)
  243. for i in range(keyset.ct):
  244. spok = keyset.data + i * TRP_KLEN
  245. self.data.add(store.from_trp_key(spok[: TRP_KLEN]))
  246. # Basic set operations.
  247. def add(self, dataset):
  248. """ Set union. """
  249. self.data.add(dataset)
  250. def remove(self, item):
  251. """
  252. Remove one item from the graph.
  253. :param tuple item: A 3-tuple of RDFlib terms. Only exact terms, i.e.
  254. wildcards are not accepted.
  255. """
  256. self.data.remove(item)
  257. def __len__(self):
  258. """ Number of triples in the graph. """
  259. return len(self.data)
  260. @use_data
  261. def __eq__(self, other):
  262. """ Equality operator between ``SimpleGraph`` instances. """
  263. return self.data == other
  264. def __repr__(self):
  265. """
  266. String representation of the graph.
  267. It provides the number of triples in the graph and memory address of
  268. the instance.
  269. """
  270. return (f'<{self.__class__.__name__} @{hex(id(self))} '
  271. f'length={len(self.data)}>')
  272. def __str__(self):
  273. """ String dump of the graph triples. """
  274. return str(self.data)
  275. @use_data
  276. def __sub__(self, other):
  277. """ Set subtraction. """
  278. return self.data - other
  279. @use_data
  280. def __isub__(self, other):
  281. """ In-place set subtraction. """
  282. self.data -= other
  283. return self
  284. @use_data
  285. def __and__(self, other):
  286. """ Set intersection. """
  287. return self.data & other
  288. @use_data
  289. def __iand__(self, other):
  290. """ In-place set intersection. """
  291. self.data &= other
  292. return self
  293. @use_data
  294. def __or__(self, other):
  295. """ Set union. """
  296. return self.data | other
  297. @use_data
  298. def __ior__(self, other):
  299. """ In-place set union. """
  300. self.data |= other
  301. return self
  302. @use_data
  303. def __xor__(self, other):
  304. """ Set exclusive intersection (XOR). """
  305. return self.data ^ other
  306. @use_data
  307. def __ixor__(self, other):
  308. """ In-place set exclusive intersection (XOR). """
  309. self.data ^= other
  310. return self
  311. def __contains__(self, item):
  312. """
  313. Whether the graph contains a triple.
  314. :rtype: boolean
  315. """
  316. return item in self.data
  317. def __iter__(self):
  318. """ Graph iterator. It iterates over the set triples. """
  319. return self.data.__iter__()
  320. # Slicing.
  321. def __getitem__(self, item):
  322. """
  323. Slicing function.
  324. It behaves similarly to `RDFLib graph slicing
  325. <https://rdflib.readthedocs.io/en/stable/utilities.html#slicing-graphs>`__
  326. """
  327. if isinstance(item, slice):
  328. s, p, o = item.start, item.stop, item.step
  329. return self._slice(s, p, o)
  330. else:
  331. raise TypeError(f'Wrong slice format: {item}.')
  332. cpdef void set(self, tuple trp) except *:
  333. """
  334. Set a single value for subject and predicate.
  335. Remove all triples matching ``s`` and ``p`` before adding ``s p o``.
  336. """
  337. self.remove_triples((trp[0], trp[1], None))
  338. if None in trp:
  339. raise ValueError(f'Invalid triple: {trp}')
  340. self.data.add(trp)
  341. cpdef void remove_triples(self, pattern) except *:
  342. """
  343. Remove triples by pattern.
  344. The pattern used is similar to :py:meth:`LmdbTripleStore.delete`.
  345. """
  346. s, p, o = pattern
  347. for match in self.lookup(s, p, o):
  348. logger.debug(f'Removing from graph: {match}.')
  349. self.data.remove(match)
  350. cpdef object as_rdflib(self):
  351. """
  352. Return the data set as an RDFLib Graph.
  353. :rtype: rdflib.Graph
  354. """
  355. gr = Graph()
  356. for trp in self.data:
  357. gr.add(trp)
  358. return gr
  359. cdef _slice(self, s, p, o):
  360. """
  361. Return terms filtered by other terms.
  362. This behaves like the rdflib.Graph slicing policy.
  363. """
  364. if s is None and p is None and o is None:
  365. return self.data
  366. elif s is None and p is None:
  367. return {(r[0], r[1]) for r in self.data if r[2] == o}
  368. elif s is None and o is None:
  369. return {(r[0], r[2]) for r in self.data if r[1] == p}
  370. elif p is None and o is None:
  371. return {(r[1], r[2]) for r in self.data if r[0] == s}
  372. elif s is None:
  373. return {r[0] for r in self.data if r[1] == p and r[2] == o}
  374. elif p is None:
  375. return {r[1] for r in self.data if r[0] == s and r[2] == o}
  376. elif o is None:
  377. return {r[2] for r in self.data if r[0] == s and r[1] == p}
  378. else:
  379. # all given
  380. return (s,p,o) in self.data
  381. cpdef lookup(self, s, p, o):
  382. """
  383. Look up triples by a pattern.
  384. """
  385. logger.debug(f'Looking up in graph: {s}, {p}, {o}.')
  386. if s is None and p is None and o is None:
  387. return self.data
  388. elif s is None and p is None:
  389. return {r for r in self.data if r[2] == o}
  390. elif s is None and o is None:
  391. return {r for r in self.data if r[1] == p}
  392. elif p is None and o is None:
  393. return {r for r in self.data if r[0] == s}
  394. elif s is None:
  395. return {r for r in self.data if r[1] == p and r[2] == o}
  396. elif p is None:
  397. return {r for r in self.data if r[0] == s and r[2] == o}
  398. elif o is None:
  399. return {r for r in self.data if r[0] == s and r[1] == p}
  400. else:
  401. # all given
  402. return (s,p,o) if (s, p, o) in self.data else set()
  403. cpdef set terms(self, str type):
  404. """
  405. Get all terms of a type: subject, predicate or object.
  406. :param str type: One of ``s``, ``p`` or ``o``.
  407. """
  408. i = 'spo'.index(type)
  409. return {r[i] for r in self.data}
  410. cdef class Imr(SimpleGraph):
  411. """
  412. In-memory resource data container.
  413. This is an extension of :py:class:`~SimpleGraph` that adds a subject URI to
  414. the data set and some convenience methods.
  415. An instance of this class can be converted to a ``rdflib.Resource``
  416. instance.
  417. Some set operations that produce a new object (``-``, ``|``, ``&``, ``^``)
  418. will create a new ``Imr`` instance with the same subject URI.
  419. """
  420. cdef:
  421. readonly object uri
  422. def __init__(self, uri, *args, **kwargs):
  423. """
  424. Initialize the graph with pre-existing data or by looking up a store.
  425. Either ``data``, or ``lookup`` *and* ``store``, can be provide.
  426. ``lookup`` and ``store`` have precedence. If none of them is specified,
  427. an empty graph is initialized.
  428. :param rdflib.URIRef uri: The graph URI.
  429. This will serve as the subject for some queries.
  430. :param set data: Initial data as a set of 3-tuples of RDFLib terms.
  431. :param tuple lookup: tuple of a 3-tuple of lookup terms, and a context.
  432. E.g. ``((URIRef('urn:ns:a'), None, None), URIRef('urn:ns:ctx'))``.
  433. Any and all elements may be ``None``.
  434. :param lmdbStore store: the store to look data up.
  435. """
  436. super().__init__(*args, **kwargs)
  437. self.uri = uri
  438. @property
  439. def identifier(self):
  440. """
  441. IMR URI. For compatibility with RDFLib Resource.
  442. :rtype: string
  443. """
  444. return self.uri
  445. @property
  446. def graph(self):
  447. """
  448. Return a SimpleGraph with the same data.
  449. :rtype: SimpleGraph
  450. """
  451. return SimpleGraph(self.data)
  452. def __repr__(self):
  453. """
  454. String representation of an Imr.
  455. This includes the subject URI, number of triples contained and the
  456. memory address of the instance.
  457. """
  458. return (f'<{self.__class__.__name__} @{hex(id(self))} uri={self.uri}, '
  459. f'length={len(self.data)}>')
  460. @use_data
  461. def __sub__(self, other):
  462. """
  463. Set difference. This creates a new Imr with the same subject URI.
  464. """
  465. return self.__class__(uri=self.uri, data=self.data - other)
  466. @use_data
  467. def __and__(self, other):
  468. """
  469. Set intersection. This creates a new Imr with the same subject URI.
  470. """
  471. return self.__class__(uri=self.uri, data=self.data & other)
  472. @use_data
  473. def __or__(self, other):
  474. """
  475. Set union. This creates a new Imr with the same subject URI.
  476. """
  477. return self.__class__(uri=self.uri, data=self.data | other)
  478. @use_data
  479. def __xor__(self, other):
  480. """
  481. Set exclusive OR (XOR). This creates a new Imr with the same subject
  482. URI.
  483. """
  484. return self.__class__(uri=self.uri, data=self.data ^ other)
  485. def __getitem__(self, item):
  486. """
  487. Supports slicing notation.
  488. """
  489. if isinstance(item, slice):
  490. s, p, o = item.start, item.stop, item.step
  491. return self._slice(s, p, o)
  492. elif isinstance(item, Node):
  493. # If a Node is given, return all values for that predicate.
  494. return {
  495. r[2] for r in self.data
  496. if r[0] == self.uri and r[1] == item}
  497. else:
  498. raise TypeError(f'Wrong slice format: {item}.')
  499. def value(self, p, strict=False):
  500. """
  501. Get an individual value.
  502. :param rdflib.termNode p: Predicate to search for.
  503. :param bool strict: If set to ``True`` the method raises an error if
  504. more than one value is found. If ``False`` (the default) only
  505. the first found result is returned.
  506. :rtype: rdflib.term.Node
  507. """
  508. values = self[p]
  509. if strict and len(values) > 1:
  510. raise RuntimeError('More than one value found for {}, {}.'.format(
  511. self.uri, p))
  512. for ret in values:
  513. return ret
  514. return None
  515. cpdef as_rdflib(self):
  516. """
  517. Return the IMR as a RDFLib Resource.
  518. :rtype: rdflib.Resource
  519. """
  520. gr = Graph()
  521. for trp in self.data:
  522. gr.add(trp)
  523. return gr.resource(identifier=self.uri)
  524. cdef class LmdbTriplestore(BaseLmdbStore):
  525. """
  526. Low-level storage layer.
  527. This class extends the RDFLib-compatible :py:class:`BaseLmdbStore` and maps
  528. triples and contexts to key-value records in LMDB.
  529. This class uses the original LMDB C API rather than the Python bindings,
  530. because several data manipulations happen after retrieval from the store,
  531. which are more efficiently performed at the C level.
  532. """
  533. dbi_labels = [
  534. # Main data
  535. # Term key to serialized term content
  536. 't:st',
  537. # Joined triple keys to context key
  538. 'spo:c',
  539. # This has empty values and is used to keep track of empty contexts.
  540. 'c:',
  541. # Prefix to namespace
  542. 'pfx:ns',
  543. # Indices
  544. # Namespace to prefix
  545. 'ns:pfx',
  546. # Term hash to triple key
  547. 'th:t',
  548. # Lookups
  549. 's:po',
  550. 'p:so',
  551. 'o:sp',
  552. 'po:s',
  553. 'so:p',
  554. 'sp:o',
  555. 'c:spo',
  556. ]
  557. lookup_indices = [
  558. b's:po',
  559. b'p:so',
  560. b'o:sp',
  561. b'po:s',
  562. b'so:p',
  563. b'sp:o',
  564. ]
  565. dbi_flags = {
  566. 's:po': lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED,
  567. 'p:so': lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED,
  568. 'o:sp': lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED,
  569. 'po:s': lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED,
  570. 'so:p': lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED,
  571. 'sp:o': lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED,
  572. 'c:spo': lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED,
  573. 'spo:c': lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED,
  574. }
  575. flags = 0
  576. options = {
  577. 'map_size': 1024 ** 4 # 1Tb.
  578. }
  579. # DB management methods.
  580. cpdef dict stats(self):
  581. """
  582. Gather statistics about the database."""
  583. st = self._stats()
  584. st['num_triples'] = st['db_stats']['spo:c']['ms_entries']
  585. return st
  586. cpdef size_t _len(self, context=None) except -1:
  587. """
  588. Return the length of the dataset.
  589. The RDFLib interface defines `__len__` in a nonstandard way that
  590. causes a Cython compilation error, so this method is called by the
  591. `__len__` method of its Python counterpart.
  592. """
  593. cdef:
  594. size_t ct
  595. if context is not None:
  596. self._to_key(context, <Key *>key_v.mv_data)
  597. key_v.mv_size = KLEN
  598. cur = self._cur_open('c:spo')
  599. try:
  600. _check(lmdb.mdb_cursor_get(
  601. cur, &key_v, NULL, lmdb.MDB_SET))
  602. _check(lmdb.mdb_cursor_count(cur, &ct))
  603. except KeyNotFoundError:
  604. return 0
  605. else:
  606. return ct
  607. finally:
  608. #pass
  609. self._cur_close(cur)
  610. else:
  611. return self.stats()['num_triples']
  612. ## PRIVATE METHODS ##
  613. # Triple and graph methods.
  614. cpdef add(self, triple, context=None, quoted=False):
  615. """
  616. Add a triple and start indexing.
  617. :param tuple(rdflib.Identifier) triple: Tuple of three identifiers.
  618. :param context: Context identifier. ``None`` inserts in the default
  619. graph.
  620. :type context: rdflib.Identifier or None
  621. :param bool quoted: Not used.
  622. """
  623. cdef:
  624. lmdb.MDB_cursor *icur
  625. lmdb.MDB_val spo_v, c_v, null_v
  626. unsigned char i
  627. unsigned char *pk_t
  628. unsigned char thash[HLEN]
  629. # Using Key or TripleKey here breaks Cython. This might be a bug.
  630. # See https://github.com/cython/cython/issues/2517
  631. unsigned char spock[QUAD_KLEN]
  632. unsigned char nkey[KLEN]
  633. size_t term_size
  634. c = self._normalize_context(context)
  635. if c is None:
  636. c = RDFLIB_DEFAULT_GRAPH_URI
  637. # TODO: figure out how the RDFLib dispatcher is inherited
  638. # (and if there is a use for it in a first place)
  639. #Store.add(self, triple, context)
  640. s, p, o = triple
  641. #logger.debug('Trying to add a triple.')
  642. icur = self._cur_open('th:t')
  643. try:
  644. for i, term in enumerate((s, p, o, c)):
  645. serialize(term, &pk_t, &term_size)
  646. _hash(pk_t, term_size, thash)
  647. try:
  648. key_v.mv_data = &thash
  649. key_v.mv_size = HLEN
  650. _check(lmdb.mdb_get(
  651. self.txn, self.get_dbi('th:t'), &key_v, &data_v))
  652. memcpy(spock + (i * KLEN), data_v.mv_data, KLEN)
  653. #logger.debug('Hash {} found. Not adding.'.format(thash[: HLEN]))
  654. except KeyNotFoundError:
  655. # If term is not found, add it...
  656. #logger.debug('Hash {} not found. Adding to DB.'.format(
  657. # thash[: HLEN]))
  658. self._append(pk_t, term_size, &nkey, dblabel=b't:st')
  659. free(pk_t)
  660. memcpy(spock + (i * KLEN), nkey, KLEN)
  661. # ...and index it.
  662. #logger.debug('Indexing on th:t: {}: {}'.format(
  663. # thash[: HLEN], nkey[: KLEN]))
  664. key_v.mv_data = thash
  665. key_v.mv_size = HLEN
  666. data_v.mv_data = nkey
  667. data_v.mv_size = KLEN
  668. _check(
  669. lmdb.mdb_cursor_put(icur, &key_v, &data_v, 0),
  670. 'Error setting key {}.'.format(thash))
  671. finally:
  672. #pass
  673. self._cur_close(icur)
  674. #logger.debug('Triple add action completed.')
  675. spo_v.mv_data = spock
  676. spo_v.mv_size = TRP_KLEN
  677. c_v.mv_data = spock + TRP_KLEN
  678. c_v.mv_size = KLEN
  679. null_v.mv_data = b''
  680. null_v.mv_size = 0
  681. #logger.debug('Adding context.')
  682. try:
  683. _check(lmdb.mdb_put(
  684. self.txn, self.get_dbi('c:'), &c_v, &null_v,
  685. lmdb.MDB_NOOVERWRITE))
  686. except KeyExistsError:
  687. pass
  688. #logger.debug('Added c:.')
  689. try:
  690. # Add triple:context association.
  691. _check(lmdb.mdb_put(
  692. self.txn, self.get_dbi('spo:c'), &spo_v, &c_v,
  693. lmdb.MDB_NODUPDATA))
  694. except KeyExistsError:
  695. pass
  696. #logger.debug('Added spo:c.')
  697. try:
  698. # Index context:triple association.
  699. _check(lmdb.mdb_put(
  700. self.txn, self.get_dbi('c:spo'), &c_v, &spo_v,
  701. lmdb.MDB_NODUPDATA))
  702. except KeyExistsError:
  703. pass
  704. #logger.debug('Added c:spo.')
  705. #logger.debug('All main data entered. Indexing.')
  706. self._index_triple(IDX_OP_ADD, spock[: TRP_KLEN])
  707. cpdef add_graph(self, graph):
  708. """
  709. Add a graph to the database.
  710. This creates an empty graph by associating the graph URI with the
  711. pickled `None` value. This prevents from removing the graph when all
  712. triples are removed.
  713. This may be called by read-only operations:
  714. https://github.com/RDFLib/rdflib/blob/master/rdflib/graph.py#L1623
  715. In which case it needs to open a write transaction. This is not ideal
  716. but the only way to handle datasets in RDFLib.
  717. :param rdflib.URIRef graph: URI of the named graph to add.
  718. """
  719. cdef:
  720. unsigned char *pk_c
  721. size_t pk_size
  722. if isinstance(graph, Graph):
  723. graph = graph.identifier
  724. serialize(graph, &pk_c, &pk_size)
  725. self._add_graph(pk_c, pk_size)
  726. free(pk_c)
  727. cpdef void _add_graph(
  728. self, unsigned char *pk_c, size_t pk_size) except *:
  729. """
  730. Add a graph.
  731. :param pk_c: Pickled context URIRef object.
  732. :type pk_c: unsigned char*
  733. :param pk_size: Size of pickled string.
  734. :type pk_size: size_t
  735. """
  736. cdef:
  737. unsigned char c_hash[HLEN]
  738. unsigned char ck[KLEN]
  739. lmdb.MDB_txn *tmp_txn
  740. lmdb.MDB_cursor *th_cur
  741. lmdb.MDB_cursor *pk_cur
  742. lmdb.MDB_cursor *ck_cur
  743. _hash(pk_c, pk_size, c_hash)
  744. #logger.debug('Adding a graph.')
  745. if not self._key_exists(c_hash, HLEN, b'th:t'):
  746. # Insert context term if not existing.
  747. if self.is_txn_rw:
  748. #logger.debug('Working in existing RW transaction.')
  749. # Use existing R/W transaction.
  750. # Main entry.
  751. self._append(pk_c, pk_size, &ck, b't:st')
  752. # Index.
  753. self._put(c_hash, HLEN, ck, KLEN, b'th:t')
  754. # Add to list of contexts.
  755. self._put(ck, KLEN, b'', 0, 'c:')
  756. else:
  757. # Open new R/W transactions.
  758. #logger.debug('Opening a temporary RW transaction.')
  759. _check(lmdb.mdb_txn_begin(self.dbenv, NULL, 0, &tmp_txn))
  760. try:
  761. self._append(
  762. pk_c, pk_size, &ck, dblabel=b't:st', txn=tmp_txn)
  763. # Index.
  764. self._put(c_hash, HLEN, ck, KLEN, b'th:t', txn=tmp_txn)
  765. # Add to list of contexts.
  766. self._put(ck, KLEN, b'', 0, b'c:', txn=tmp_txn)
  767. _check(lmdb.mdb_txn_commit(tmp_txn))
  768. #logger.debug('Temp RW transaction closed.')
  769. except:
  770. lmdb.mdb_txn_abort(tmp_txn)
  771. raise
  772. cpdef void _remove(self, tuple triple_pattern, context=None) except *:
  773. cdef:
  774. unsigned char spok[TRP_KLEN]
  775. size_t i = 0
  776. Key ck
  777. lmdb.MDB_val spok_v, ck_v
  778. #logger.debug('Removing triple: {}'.format(triple_pattern))
  779. if context is not None:
  780. try:
  781. self._to_key(context, &ck)
  782. except KeyNotFoundError:
  783. # If context is specified but not found, return to avoid
  784. # deleting the wrong triples.
  785. return
  786. # Get the matching pattern.
  787. match_set = self.triple_keys(triple_pattern, context)
  788. dcur = self._cur_open('spo:c')
  789. icur = self._cur_open('c:spo')
  790. try:
  791. spok_v.mv_size = TRP_KLEN
  792. # If context was specified, remove only associations with that context.
  793. if context is not None:
  794. #logger.debug('Removing triples in matching context.')
  795. ck_v.mv_data = ck
  796. ck_v.mv_size = KLEN
  797. while i < match_set.ct:
  798. memcpy(
  799. spok, match_set.data + match_set.itemsize * i,
  800. TRP_KLEN)
  801. spok_v.mv_data = spok
  802. # Delete spo:c entry.
  803. try:
  804. _check(lmdb.mdb_cursor_get(
  805. dcur, &spok_v, &ck_v, lmdb.MDB_GET_BOTH))
  806. except KeyNotFoundError:
  807. pass
  808. else:
  809. _check(lmdb.mdb_cursor_del(dcur, 0))
  810. # Restore ck after delete.
  811. ck_v.mv_data = ck
  812. # Delete c:spo entry.
  813. try:
  814. _check(lmdb.mdb_cursor_get(
  815. icur, &ck_v, &spok_v, lmdb.MDB_GET_BOTH))
  816. except KeyNotFoundError:
  817. pass
  818. else:
  819. _check(lmdb.mdb_cursor_del(icur, 0))
  820. # Delete lookup indices, only if no other context
  821. # association is present.
  822. spok_v.mv_data = spok
  823. try:
  824. _check(lmdb.mdb_cursor_get(
  825. dcur, &spok_v, NULL, lmdb.MDB_SET))
  826. except KeyNotFoundError:
  827. self._index_triple(IDX_OP_REMOVE, spok)
  828. i += 1
  829. # If no context is specified, remove all associations.
  830. else:
  831. #logger.debug('Removing triples in all contexts.')
  832. # Loop over all SPO matching the triple pattern.
  833. while i < match_set.ct:
  834. spok = match_set.data + match_set.itemsize * i
  835. spok_v.mv_data = spok
  836. # Loop over all context associations for this SPO.
  837. try:
  838. _check(lmdb.mdb_cursor_get(
  839. dcur, &spok_v, &ck_v, lmdb.MDB_SET_KEY))
  840. except KeyNotFoundError:
  841. # Move on to the next SPO.
  842. continue
  843. else:
  844. ck = <Key>ck_v.mv_data
  845. logger.debug(f'Removing {spok[: TRP_KLEN]} from main.')
  846. while True:
  847. # Delete c:spo association.
  848. try:
  849. _check(lmdb.mdb_cursor_get(
  850. icur, &ck_v, &spok_v, lmdb.MDB_GET_BOTH))
  851. except KeyNotFoundError:
  852. pass
  853. else:
  854. lmdb.mdb_cursor_del(icur, 0)
  855. # Restore the pointer to the deleted SPO.
  856. spok_v.mv_data = spok
  857. # Move on to next associated context.
  858. try:
  859. _check(lmdb.mdb_cursor_get(
  860. dcur, &spok_v, &ck_v, lmdb.MDB_NEXT_DUP))
  861. except KeyNotFoundError:
  862. break
  863. # Then delete the spo:c association.
  864. try:
  865. _check(lmdb.mdb_cursor_get(
  866. dcur, &spok_v, &ck_v, lmdb.MDB_SET))
  867. except KeyNotFoundError:
  868. pass
  869. else:
  870. lmdb.mdb_cursor_del(dcur, lmdb.MDB_NODUPDATA)
  871. self._index_triple(IDX_OP_REMOVE, spok)
  872. #ck_v.mv_data = ck # Unnecessary?
  873. finally:
  874. i += 1
  875. finally:
  876. #pass
  877. #logger.debug('Closing spo:c in _remove.')
  878. self._cur_close(dcur)
  879. #logger.debug('Closing c:spo in _remove.')
  880. self._cur_close(icur)
  881. cdef void _index_triple(self, str op, TripleKey spok) except *:
  882. """
  883. Update index for a triple and context (add or remove).
  884. :param str op: 'add' or 'remove'.
  885. :param TripleKey spok: Triple key.
  886. """
  887. cdef:
  888. unsigned char keys[3][KLEN]
  889. unsigned char dbl_keys[3][DBL_KLEN]
  890. size_t i = 0
  891. lmdb.MDB_val key_v, dbl_key_v
  892. keys[0] = spok # sk
  893. keys[1] = spok + KLEN # pk
  894. keys[2] = spok + DBL_KLEN # ok
  895. dbl_keys[0] = spok + KLEN # pok
  896. memcpy(&dbl_keys[1], spok, KLEN) # sok, 1st part
  897. memcpy(&dbl_keys[1][KLEN], spok + DBL_KLEN, KLEN) # sok, 2nd part
  898. dbl_keys[2] = spok # spk
  899. #logger.debug('''Indices:
  900. #spok: {}
  901. #sk: {}
  902. #pk: {}
  903. #ok: {}
  904. #pok: {}
  905. #sok: {}
  906. #spk: {}
  907. #'''.format(
  908. # spok[:TRP_KLEN],
  909. # keys[0][:KLEN], keys[1][:KLEN], keys[2][:KLEN],
  910. # dbl_keys[0][:DBL_KLEN], dbl_keys[1][:DBL_KLEN], dbl_keys[2][:DBL_KLEN]))
  911. key_v.mv_size = KLEN
  912. dbl_key_v.mv_size = DBL_KLEN
  913. #logger.debug('Start indexing: {}.'.format(spok[: TRP_KLEN]))
  914. if op == IDX_OP_REMOVE:
  915. logger.debug(f'Remove {spok[ : TRP_KLEN]} from indices.')
  916. else:
  917. logger.debug(f'Add {spok[ : TRP_KLEN]} to indices.')
  918. while i < 3:
  919. cur1 = self._cur_open(self.lookup_indices[i]) # s:po, p:so, o:sp
  920. cur2 = self._cur_open(self.lookup_indices[i + 3])# po:s, so:p, sp:o
  921. try:
  922. key_v.mv_data = keys[i]
  923. dbl_key_v.mv_data = dbl_keys[i]
  924. # Removal op indexing.
  925. if op == IDX_OP_REMOVE:
  926. try:
  927. _check(lmdb.mdb_cursor_get(
  928. cur1, &key_v, &dbl_key_v, lmdb.MDB_GET_BOTH))
  929. logger.debug(f'Removed: {keys[i][: KLEN]}, '
  930. f'{dbl_keys[i][: DBL_KLEN]}')
  931. except KeyNotFoundError:
  932. logger.debug(f'Not found in index: {keys[i][: KLEN]}, '
  933. f'{dbl_keys[i][: DBL_KLEN]}')
  934. pass
  935. else:
  936. _check(lmdb.mdb_cursor_del(cur1, 0))
  937. # Restore pointers after delete.
  938. key_v.mv_data = keys[i]
  939. dbl_key_v.mv_data = dbl_keys[i]
  940. try:
  941. _check(lmdb.mdb_cursor_get(
  942. cur2, &dbl_key_v, &key_v, lmdb.MDB_GET_BOTH))
  943. logger.debug(f'Removed: {dbl_keys[i][: DBL_KLEN]}, '
  944. f'{keys[i][: KLEN]}')
  945. except KeyNotFoundError:
  946. logger.debug(f'Not found in index: '
  947. f'{dbl_keys[i][: DBL_KLEN]}, '
  948. f'{keys[i][: KLEN]}')
  949. pass
  950. else:
  951. _check(lmdb.mdb_cursor_del(cur2, 0))
  952. # Addition op indexing.
  953. elif op == IDX_OP_ADD:
  954. logger.debug('Adding to index `{}`: {}, {}'.format(
  955. self.lookup_indices[i],
  956. (<unsigned char *>key_v.mv_data)[ : key_v.mv_size],
  957. (<unsigned char *>dbl_key_v.mv_data)[ : dbl_key_v.mv_size]))
  958. try:
  959. _check(lmdb.mdb_cursor_put(
  960. cur1, &key_v, &dbl_key_v, lmdb.MDB_NODUPDATA))
  961. except KeyExistsError:
  962. logger.debug(f'Key {keys[i][: KLEN]} exists already.')
  963. pass
  964. logger.debug('Adding to index `{}`: {}, {}'.format(
  965. self.lookup_indices[i + 3],
  966. (<unsigned char *>dbl_key_v.mv_data)[ : dbl_key_v.mv_size],
  967. (<unsigned char *>key_v.mv_data)[ : key_v.mv_size]))
  968. try:
  969. _check(lmdb.mdb_cursor_put(
  970. cur2, &dbl_key_v, &key_v, lmdb.MDB_NODUPDATA))
  971. except KeyExistsError:
  972. logger.debug(f'Double key {dbl_keys[i][: DBL_KLEN]} exists already.')
  973. pass
  974. else:
  975. raise ValueError(
  976. 'Index operation \'{}\' is not supported.'.format(op))
  977. i += 1
  978. finally:
  979. #pass
  980. self._cur_close(cur1)
  981. self._cur_close(cur2)
  982. cpdef void _remove_graph(self, object gr_uri) except *:
  983. """
  984. Delete a context.
  985. """
  986. cdef:
  987. unsigned char chash[HLEN]
  988. unsigned char ck[KLEN]
  989. unsigned char *pk_c
  990. size_t c_size
  991. lmdb.MDB_val ck_v, chash_v
  992. #logger.debug('Deleting context: {}'.format(gr_uri))
  993. #logger.debug('Pickled context: {}'.format(serialize(gr_uri)))
  994. # Gather information on the graph prior to deletion.
  995. try:
  996. self._to_key(gr_uri, &ck)
  997. except KeyNotFoundError:
  998. return
  999. # Remove all triples and indices associated with the graph.
  1000. self._remove((None, None, None), gr_uri)
  1001. # Remove the graph if it is in triples.
  1002. self._remove((gr_uri, None, None))
  1003. self._remove((None, None, gr_uri))
  1004. # Clean up all terms related to the graph.
  1005. serialize(gr_uri, &pk_c, &c_size)
  1006. _hash(pk_c, c_size, chash)
  1007. free(pk_c)
  1008. ck_v.mv_size = KLEN
  1009. chash_v.mv_size = HLEN
  1010. try:
  1011. ck_v.mv_data = ck
  1012. _check(lmdb.mdb_del(self.txn, self.get_dbi(b'c:'), &ck_v, NULL))
  1013. ck_v.mv_data = ck
  1014. _check(lmdb.mdb_del(self.txn, self.get_dbi(b't:st'), &ck_v, NULL))
  1015. chash_v.mv_data = chash
  1016. _check(lmdb.mdb_del(self.txn, self.get_dbi(b'th:t'), &chash_v, NULL))
  1017. except KeyNotFoundError:
  1018. pass
  1019. # Lookup methods.
  1020. # TODO Deprecate RDFLib API?
  1021. def contexts(self, triple=None):
  1022. """
  1023. Get a list of all contexts.
  1024. :rtype: Iterator(rdflib.Graph)
  1025. """
  1026. for ctx_uri in self.all_contexts(triple):
  1027. yield Graph(identifier=self.from_key(ctx_uri), store=self)
  1028. def triples(self, triple_pattern, context=None):
  1029. """
  1030. Generator over matching triples.
  1031. :param tuple triple_pattern: 3 RDFLib terms
  1032. :param context: Context graph, if available.
  1033. :type context: rdflib.Graph or None
  1034. :rtype: Iterator
  1035. :return: Generator over triples and contexts in which each result has
  1036. the following format::
  1037. (s, p, o), generator(contexts)
  1038. Where the contexts generator lists all context that the triple appears
  1039. in.
  1040. """
  1041. cdef:
  1042. size_t i = 0, j = 0
  1043. unsigned char spok[TRP_KLEN]
  1044. unsigned char ck[KLEN]
  1045. lmdb.MDB_val key_v, data_v
  1046. # This sounds strange, RDFLib should be passing None at this point,
  1047. # but anyway...
  1048. context = self._normalize_context(context)
  1049. logger.debug(
  1050. 'Getting triples for: {}, {}'.format(triple_pattern, context))
  1051. rset = self.triple_keys(triple_pattern, context)
  1052. #logger.debug('Triple keys found: {}'.format(rset.data[:rset.size]))
  1053. cur = self._cur_open('spo:c')
  1054. try:
  1055. key_v.mv_size = TRP_KLEN
  1056. for i in range(rset.ct):
  1057. #logger.debug('Checking contexts for triples: {}'.format(
  1058. # (rset.data + i * TRP_KLEN)[:TRP_KLEN]))
  1059. key_v.mv_data = rset.data + i * TRP_KLEN
  1060. # Get contexts associated with each triple.
  1061. contexts = []
  1062. # This shall never be MDB_NOTFOUND.
  1063. _check(lmdb.mdb_cursor_get(cur, &key_v, &data_v, lmdb.MDB_SET))
  1064. while True:
  1065. c_uri = self.from_key(<Key>data_v.mv_data)
  1066. contexts.append(Graph(identifier=c_uri, store=self))
  1067. try:
  1068. _check(lmdb.mdb_cursor_get(
  1069. cur, &key_v, &data_v, lmdb.MDB_NEXT_DUP))
  1070. except KeyNotFoundError:
  1071. break
  1072. #logger.debug('Triple keys before yield: {}: {}.'.format(
  1073. # (<TripleKey>key_v.mv_data)[:TRP_KLEN], tuple(contexts)))
  1074. yield self.from_trp_key(
  1075. (<TripleKey>key_v.mv_data)[: TRP_KLEN]), tuple(contexts)
  1076. #logger.debug('After yield.')
  1077. finally:
  1078. self._cur_close(cur)
  1079. cpdef ResultSet triple_keys(self, tuple triple_pattern, context=None):
  1080. """
  1081. Top-level lookup method.
  1082. This method is used by `triples` which returns native Python tuples,
  1083. as well as by other methods that need to iterate and filter triple
  1084. keys without incurring in the overhead of converting them to triples.
  1085. :param tuple triple_pattern: 3 RDFLib terms
  1086. :param context: Context graph or URI, or None.
  1087. :type context: rdflib.term.Identifier or None
  1088. """
  1089. # TODO: Improve performance by allowing passing contexts as a tuple.
  1090. cdef:
  1091. unsigned char tk[KLEN]
  1092. unsigned char ck[KLEN]
  1093. unsigned char spok[TRP_KLEN]
  1094. #unsigned char *pk_c
  1095. size_t ct = 0, flt_j = 0, i = 0, j = 0, pg_offset = 0, c_size
  1096. lmdb.MDB_cursor *icur
  1097. lmdb.MDB_val key_v, data_v
  1098. ResultSet flt_res, ret
  1099. if context is not None:
  1100. #serialize(context, &pk_c, &c_size)
  1101. try:
  1102. self._to_key(context, &ck)
  1103. except KeyNotFoundError:
  1104. # Context not found.
  1105. return ResultSet(0, TRP_KLEN)
  1106. icur = self._cur_open('c:spo')
  1107. try:
  1108. key_v.mv_data = ck
  1109. key_v.mv_size = KLEN
  1110. # s p o c
  1111. if all(triple_pattern):
  1112. #logger.debug('Lookup: s p o c')
  1113. for i, term in enumerate(triple_pattern):
  1114. try:
  1115. self._to_key(term, &tk)
  1116. except KeyNotFoundError:
  1117. # Context not found.
  1118. return ResultSet(0, TRP_KLEN)
  1119. memcpy(spok + (KLEN * i), tk, KLEN)
  1120. if tk is NULL:
  1121. # A term in the triple is not found.
  1122. return ResultSet(0, TRP_KLEN)
  1123. data_v.mv_data = spok
  1124. data_v.mv_size = TRP_KLEN
  1125. #logger.debug(
  1126. # 'Found spok {}. Matching with context {}'.format(
  1127. # (<TripleKey>data_v.mv_data)[: TRP_KLEN],
  1128. # (<Key>key_v.mv_data)[: KLEN]))
  1129. try:
  1130. _check(lmdb.mdb_cursor_get(
  1131. icur, &key_v, &data_v, lmdb.MDB_GET_BOTH))
  1132. except KeyNotFoundError:
  1133. # Triple not found.
  1134. #logger.debug('spok / ck pair not found.')
  1135. return ResultSet(0, TRP_KLEN)
  1136. ret = ResultSet(1, TRP_KLEN)
  1137. memcpy(ret.data, spok, TRP_KLEN)
  1138. return ret
  1139. # ? ? ? c
  1140. elif not any(triple_pattern):
  1141. # Get all triples from the context
  1142. #logger.debug('Lookup: ? ? ? c')
  1143. try:
  1144. _check(lmdb.mdb_cursor_get(
  1145. icur, &key_v, &data_v, lmdb.MDB_SET))
  1146. except KeyNotFoundError:
  1147. # Triple not found.
  1148. return ResultSet(0, TRP_KLEN)
  1149. _check(lmdb.mdb_cursor_count(icur, &ct))
  1150. ret = ResultSet(ct, TRP_KLEN)
  1151. logger.debug(f'Entries in c:spo: {ct}')
  1152. logger.debug(f'Allocated {ret.size} bytes.')
  1153. logger.debug('Looking in key: {}'.format(
  1154. (<unsigned char *>key_v.mv_data)[:key_v.mv_size]))
  1155. _check(lmdb.mdb_cursor_get(
  1156. icur, &key_v, &data_v, lmdb.MDB_GET_MULTIPLE))
  1157. while True:
  1158. #logger.debug(f'Data offset: {pg_offset} Page size: {data_v.mv_size} bytes')
  1159. #logger.debug('Data page: {}'.format(
  1160. # (<unsigned char *>data_v.mv_data)[: data_v.mv_size]))
  1161. memcpy(ret.data + pg_offset, data_v.mv_data, data_v.mv_size)
  1162. pg_offset += data_v.mv_size
  1163. try:
  1164. _check(lmdb.mdb_cursor_get(
  1165. icur, &key_v, &data_v, lmdb.MDB_NEXT_MULTIPLE))
  1166. except KeyNotFoundError:
  1167. return ret
  1168. # Regular lookup. Filter _lookup() results by context.
  1169. else:
  1170. try:
  1171. res = self._lookup(triple_pattern)
  1172. except KeyNotFoundError:
  1173. return ResultSet(0, TRP_KLEN)
  1174. #logger.debug('Allocating for context filtering.')
  1175. key_v.mv_data = ck
  1176. key_v.mv_size = KLEN
  1177. data_v.mv_size = TRP_KLEN
  1178. flt_res = ResultSet(res.ct, res.itemsize)
  1179. while j < res.ct:
  1180. #logger.debug('Checking row #{}'.format(flt_j))
  1181. data_v.mv_data = res.data + j * res.itemsize
  1182. #logger.debug('Checking c:spo {}, {}'.format(
  1183. # (<unsigned char *>key_v.mv_data)[: key_v.mv_size],
  1184. # (<unsigned char *>data_v.mv_data)[: data_v.mv_size]))
  1185. try:
  1186. # Verify that the triple is associated with the
  1187. # context being searched.
  1188. _check(lmdb.mdb_cursor_get(
  1189. icur, &key_v, &data_v, lmdb.MDB_GET_BOTH))
  1190. except KeyNotFoundError:
  1191. #logger.debug('Discarding source[{}].'.format(j))
  1192. continue
  1193. else:
  1194. #logger.debug('Copying source[{}] to dest[{}].'.format(
  1195. # j, flt_j))
  1196. memcpy(
  1197. flt_res.data + res.itemsize * flt_j,
  1198. res.data + res.itemsize * j, res.itemsize)
  1199. flt_j += 1
  1200. finally:
  1201. j += 1
  1202. # Resize result set to the size of context matches.
  1203. # This crops the memory block without copying it.
  1204. flt_res.resize(flt_j)
  1205. return flt_res
  1206. finally:
  1207. self._cur_close(icur)
  1208. # Unfiltered lookup. No context checked.
  1209. else:
  1210. #logger.debug('No context in query.')
  1211. try:
  1212. res = self._lookup(triple_pattern)
  1213. except KeyNotFoundError:
  1214. return ResultSet(0, TRP_KLEN)
  1215. #logger.debug('Res data before triple_keys return: {}'.format(
  1216. # res.data[: res.size]))
  1217. return res
  1218. cdef ResultSet _lookup(self, tuple triple_pattern):
  1219. """
  1220. Look up triples in the indices based on a triple pattern.
  1221. :rtype: Iterator
  1222. :return: Matching triple keys.
  1223. """
  1224. cdef:
  1225. TripleKey spok
  1226. lmdb.MDB_stat db_stat
  1227. size_t ct = 0, i = 0
  1228. lmdb.MDB_val spok_v, ck_v
  1229. s, p, o = triple_pattern
  1230. if s is not None:
  1231. if p is not None:
  1232. # s p o
  1233. if o is not None:
  1234. spok_v.mv_data = spok
  1235. spok_v.mv_size = TRP_KLEN
  1236. try:
  1237. self._to_triple_key(triple_pattern, &spok)
  1238. _check(lmdb.mdb_get(
  1239. self.txn, self.get_dbi('spo:c'), &spok_v, &ck_v))
  1240. except KeyNotFoundError:
  1241. return ResultSet(0, TRP_KLEN)
  1242. matches = ResultSet(1, TRP_KLEN)
  1243. memcpy(matches.data, spok, TRP_KLEN)
  1244. return matches
  1245. # s p ?
  1246. else:
  1247. return self._lookup_2bound(0, s, 1, p)
  1248. else:
  1249. # s ? o
  1250. if o is not None:
  1251. return self._lookup_2bound(0, s, 2, o)
  1252. # s ? ?
  1253. else:
  1254. return self._lookup_1bound(0, s)
  1255. else:
  1256. if p is not None:
  1257. # ? p o
  1258. if o is not None:
  1259. return self._lookup_2bound(1, p, 2, o)
  1260. # ? p ?
  1261. else:
  1262. return self._lookup_1bound(1, p)
  1263. else:
  1264. # ? ? o
  1265. if o is not None:
  1266. return self._lookup_1bound(2, o)
  1267. # ? ? ?
  1268. else:
  1269. # Get all triples in the database.
  1270. logger.debug('Getting all DB triples.')
  1271. dcur = self._cur_open('spo:c')
  1272. try:
  1273. _check(lmdb.mdb_stat(
  1274. self.txn, lmdb.mdb_cursor_dbi(dcur), &db_stat),
  1275. 'Error gathering DB stats.')
  1276. ct = db_stat.ms_entries
  1277. ret = ResultSet(ct, TRP_KLEN)
  1278. logger.debug(f'Triples found: {ct}')
  1279. if ct == 0:
  1280. return ResultSet(0, TRP_KLEN)
  1281. _check(lmdb.mdb_cursor_get(
  1282. dcur, &key_v, &data_v, lmdb.MDB_FIRST))
  1283. while True:
  1284. logger.debug(f'i in 0bound: {i}')
  1285. memcpy(
  1286. ret.data + ret.itemsize * i,
  1287. key_v.mv_data, TRP_KLEN)
  1288. try:
  1289. _check(lmdb.mdb_cursor_get(
  1290. dcur, &key_v, &data_v, lmdb.MDB_NEXT_NODUP))
  1291. except KeyNotFoundError:
  1292. break
  1293. i += 1
  1294. # Size is guessed from all entries. Unique keys will be
  1295. # much less than that.
  1296. ret.resize(i + 1)
  1297. #logger.debug('Assembled data: {}'.format(ret.data[:ret.size]))
  1298. return ret
  1299. finally:
  1300. self._cur_close(dcur)
  1301. cdef ResultSet _lookup_1bound(self, unsigned char idx, term):
  1302. """
  1303. Lookup triples for a pattern with one bound term.
  1304. :param str idx_name: The index to look up as one of the keys of
  1305. ``_lookup_ordering``.
  1306. :param rdflib.URIRef term: Bound term to search for.
  1307. :rtype: Iterator(bytes)
  1308. :return: SPO keys matching the pattern.
  1309. """
  1310. cdef:
  1311. unsigned char luk[KLEN]
  1312. unsigned int dbflags
  1313. unsigned char asm_rng[3]
  1314. size_t ct, ret_offset = 0, src_pos, ret_pos
  1315. size_t j # Must be signed for older OpenMP versions
  1316. lmdb.MDB_cursor *icur
  1317. logger.debug(f'lookup 1bound: {idx}, {term}')
  1318. try:
  1319. self._to_key(term, &luk)
  1320. except KeyNotFoundError:
  1321. return ResultSet(0, TRP_KLEN)
  1322. logging.debug('luk: {}'.format(luk))
  1323. term_order = lookup_ordering[idx]
  1324. icur = self._cur_open(self.lookup_indices[idx])
  1325. logging.debug(f'DB label: {self.lookup_indices[idx]}')
  1326. logging.debug('term order: {}'.format(term_order[: 3]))
  1327. try:
  1328. key_v.mv_data = luk
  1329. key_v.mv_size = KLEN
  1330. _check(lmdb.mdb_cursor_get(icur, &key_v, &data_v, lmdb.MDB_SET))
  1331. _check(lmdb.mdb_cursor_count(icur, &ct))
  1332. # Allocate memory for results.
  1333. ret = ResultSet(ct, TRP_KLEN)
  1334. logger.debug(f'Entries for {self.lookup_indices[idx]}: {ct}')
  1335. logger.debug(f'Allocated {ret.size} bytes of data.')
  1336. #logger.debug('First row: {}'.format(
  1337. # (<unsigned char *>data_v.mv_data)[:DBL_KLEN]))
  1338. # Arrange results according to lookup order.
  1339. asm_rng = [
  1340. KLEN * term_order[0],
  1341. KLEN * term_order[1],
  1342. KLEN * term_order[2],
  1343. ]
  1344. logger.debug('asm_rng: {}'.format(asm_rng[:3]))
  1345. logger.debug('luk: {}'.format(luk))
  1346. _check(lmdb.mdb_cursor_get(icur, &key_v, &data_v, lmdb.MDB_SET))
  1347. _check(lmdb.mdb_cursor_get(
  1348. icur, &key_v, &data_v, lmdb.MDB_GET_MULTIPLE))
  1349. while True:
  1350. logger.debug('ret_offset: {}'.format(ret_offset))
  1351. logger.debug(f'Page size: {data_v.mv_size}')
  1352. #logger.debug(
  1353. # 'Got data in 1bound ({}): {}'.format(
  1354. # data_v.mv_size,
  1355. # (<unsigned char *>data_v.mv_data)[: data_v.mv_size]))
  1356. for j in prange(data_v.mv_size // DBL_KLEN, nogil=True):
  1357. src_pos = DBL_KLEN * j
  1358. ret_pos = (ret_offset + ret.itemsize * j)
  1359. memcpy(ret.data + ret_pos + asm_rng[0], luk, KLEN)
  1360. memcpy(ret.data + ret_pos + asm_rng[1],
  1361. data_v.mv_data + src_pos, KLEN)
  1362. memcpy(ret.data + ret_pos + asm_rng[2],
  1363. data_v.mv_data + src_pos + KLEN, KLEN)
  1364. # Increment MUST be done before MDB_NEXT_MULTIPLE otherwise
  1365. # data_v.mv_size will be overwritten with the *next* page size
  1366. # and cause corruption in the output data.
  1367. ret_offset += data_v.mv_size // DBL_KLEN * ret.itemsize
  1368. try:
  1369. # Get results by the page.
  1370. _check(lmdb.mdb_cursor_get(
  1371. icur, &key_v, &data_v, lmdb.MDB_NEXT_MULTIPLE))
  1372. except KeyNotFoundError:
  1373. # For testing only. Errors will be caught in triples()
  1374. # when looking for a context.
  1375. #if ret_offset + ret.itemsize < ret.size:
  1376. # raise RuntimeError(
  1377. # 'Retrieved less values than expected: {} of {}.'
  1378. # .format(src_offset, ret.size))
  1379. return ret
  1380. #logger.debug('Assembled data in 1bound ({}): {}'.format(ret.size, ret.data[: ret.size]))
  1381. finally:
  1382. self._cur_close(icur)
  1383. cdef ResultSet _lookup_2bound(
  1384. self, unsigned char idx1, term1, unsigned char idx2, term2):
  1385. """
  1386. Look up triples for a pattern with two bound terms.
  1387. :param str idx1: The index to look up as one of the keys of
  1388. ``lookup_ordering_2bound``.
  1389. :param rdflib.URIRef term1: First bound term to search for.
  1390. :rtype: Iterator(bytes)
  1391. :return: SPO keys matching the pattern.
  1392. """
  1393. cdef:
  1394. unsigned char luk1_offset, luk2_offset
  1395. unsigned char luk1[KLEN]
  1396. unsigned char luk2[KLEN]
  1397. unsigned char luk[DBL_KLEN]
  1398. unsigned int dbflags
  1399. unsigned char asm_rng[3]
  1400. unsigned char term_order[3] # Lookup ordering
  1401. size_t ct, i = 0, ret_offset = 0, ret_pos, src_pos
  1402. size_t j # Must be signed for older OpenMP versions
  1403. lmdb.MDB_cursor *icur
  1404. ResultSet ret
  1405. logging.debug(
  1406. f'2bound lookup for term {term1} at position {idx1} '
  1407. f'and term {term2} at position {idx2}.')
  1408. try:
  1409. self._to_key(term1, &luk1)
  1410. self._to_key(term2, &luk2)
  1411. except KeyNotFoundError:
  1412. return ResultSet(0, TRP_KLEN)
  1413. logging.debug('luk1: {}'.format(luk1[: KLEN]))
  1414. logging.debug('luk2: {}'.format(luk2[: KLEN]))
  1415. for i in range(3):
  1416. if (
  1417. idx1 in lookup_ordering_2bound[i][: 2]
  1418. and idx2 in lookup_ordering_2bound[i][: 2]):
  1419. term_order = lookup_ordering_2bound[i]
  1420. if term_order[0] == idx1:
  1421. luk1_offset = 0
  1422. luk2_offset = KLEN
  1423. else:
  1424. luk1_offset = KLEN
  1425. luk2_offset = 0
  1426. dblabel = self.lookup_indices[i + 3] # skip 1bound index labels
  1427. break
  1428. if i == 2:
  1429. raise ValueError(
  1430. 'Indices {} and {} not found in LU keys.'.format(
  1431. idx1, idx2))
  1432. logger.debug('Term order: {}'.format(term_order[:3]))
  1433. logger.debug('LUK offsets: {}, {}'.format(luk1_offset, luk2_offset))
  1434. # Compose terms in lookup key.
  1435. memcpy(luk + luk1_offset, luk1, KLEN)
  1436. memcpy(luk + luk2_offset, luk2, KLEN)
  1437. logger.debug('Lookup key: {}'.format(luk))
  1438. icur = self._cur_open(dblabel)
  1439. logger.debug('Database label: {}'.format(dblabel))
  1440. try:
  1441. key_v.mv_data = luk
  1442. key_v.mv_size = DBL_KLEN
  1443. # Count duplicates for key and allocate memory for result set.
  1444. _check(lmdb.mdb_cursor_get(icur, &key_v, &data_v, lmdb.MDB_SET))
  1445. _check(lmdb.mdb_cursor_count(icur, &ct))
  1446. ret = ResultSet(ct, TRP_KLEN)
  1447. #logger.debug('Entries for {}: {}'.format(self.lookup_indices[idx], ct))
  1448. #logger.debug('First row: {}'.format(
  1449. # (<unsigned char *>data_v.mv_data)[:DBL_KLEN]))
  1450. # Arrange results according to lookup order.
  1451. asm_rng = [
  1452. KLEN * term_order[0],
  1453. KLEN * term_order[1],
  1454. KLEN * term_order[2],
  1455. ]
  1456. logger.debug('asm_rng: {}'.format(asm_rng[:3]))
  1457. logger.debug('luk: {}'.format(luk))
  1458. _check(lmdb.mdb_cursor_get(icur, &key_v, &data_v, lmdb.MDB_SET))
  1459. _check(lmdb.mdb_cursor_get(
  1460. icur, &key_v, &data_v, lmdb.MDB_GET_MULTIPLE))
  1461. while True:
  1462. logger.debug('Got data in 2bound ({}): {}'.format(
  1463. data_v.mv_size,
  1464. (<unsigned char *>data_v.mv_data)[: data_v.mv_size]))
  1465. for j in prange(data_v.mv_size // KLEN, nogil=True):
  1466. src_pos = KLEN * j
  1467. ret_pos = (ret_offset + ret.itemsize * j)
  1468. #logger.debug('Page offset: {}'.format(pg_offset))
  1469. #logger.debug('Ret offset: {}'.format(ret_offset))
  1470. memcpy(ret.data + ret_pos + asm_rng[0], luk, KLEN)
  1471. memcpy(ret.data + ret_pos + asm_rng[1], luk + KLEN, KLEN)
  1472. memcpy(ret.data + ret_pos + asm_rng[2],
  1473. data_v.mv_data + src_pos, KLEN)
  1474. #logger.debug('Assembled triple: {}'.format((ret.data + ret_offset)[: TRP_KLEN]))
  1475. ret_offset += data_v.mv_size // KLEN * ret.itemsize
  1476. try:
  1477. # Get results by the page.
  1478. _check(lmdb.mdb_cursor_get(
  1479. icur, &key_v, &data_v, lmdb.MDB_NEXT_MULTIPLE))
  1480. except KeyNotFoundError:
  1481. # For testing only. Errors will be caught in triples()
  1482. # when looking for a context.
  1483. #if ret_offset + ret.itemsize < ret.size:
  1484. # raise RuntimeError(
  1485. # 'Retrieved less values than expected: {} of {}.'
  1486. # .format(pg_offset, ret.size))
  1487. #logger.debug('Assembled data in 2bound ({}): {}'.format(ret.size, ret.data[: ret.size]))
  1488. return ret
  1489. finally:
  1490. self._cur_close(icur)
  1491. cpdef ResultSet _all_term_keys(self, term_type):
  1492. """
  1493. Return all keys of a (``s:po``, ``p:so``, ``o:sp``) index.
  1494. """
  1495. cdef:
  1496. size_t i = 0
  1497. lmdb.MDB_stat stat
  1498. idx_label = self.lookup_indices['spo'.index(term_type)]
  1499. #logger.debug('Looking for all terms in index: {}'.format(idx_label))
  1500. icur = self._cur_open(idx_label)
  1501. try:
  1502. _check(lmdb.mdb_stat(self.txn, lmdb.mdb_cursor_dbi(icur), &stat))
  1503. # TODO: This may allocate memory for several times the amount
  1504. # needed. Even though it is resized later, we need to know how
  1505. # performance is affected by this.
  1506. ret = ResultSet(stat.ms_entries, KLEN)
  1507. try:
  1508. _check(lmdb.mdb_cursor_get(
  1509. icur, &key_v, NULL, lmdb.MDB_FIRST))
  1510. except KeyNotFoundError:
  1511. return ResultSet(0, DBL_KLEN)
  1512. while True:
  1513. memcpy(ret.data + ret.itemsize * i, key_v.mv_data, KLEN)
  1514. rc = lmdb.mdb_cursor_get(
  1515. icur, &key_v, NULL, lmdb.MDB_NEXT_NODUP)
  1516. try:
  1517. _check(rc)
  1518. except KeyNotFoundError:
  1519. ret.resize(i + 1)
  1520. return ret
  1521. i += 1
  1522. finally:
  1523. #pass
  1524. self._cur_close(icur)
  1525. def all_terms(self, term_type):
  1526. """
  1527. Return all terms of a type (``s``, ``p``, or ``o``) in the store.
  1528. """
  1529. for key in self._all_term_keys(term_type).to_tuple():
  1530. #logger.debug('Yielding: {}'.format(key))
  1531. yield self.from_key(key)
  1532. cpdef tuple all_namespaces(self):
  1533. """
  1534. Return all registered namespaces.
  1535. """
  1536. cdef:
  1537. size_t i = 0
  1538. lmdb.MDB_stat stat
  1539. ret = []
  1540. dcur = self._cur_open('pfx:ns')
  1541. try:
  1542. try:
  1543. _check(lmdb.mdb_cursor_get(
  1544. dcur, &key_v, &data_v, lmdb.MDB_FIRST))
  1545. except KeyNotFoundError:
  1546. return tuple()
  1547. while True:
  1548. ret.append((
  1549. (<unsigned char *>key_v.mv_data)[: key_v.mv_size].decode(),
  1550. (<unsigned char *>data_v.mv_data)[: data_v.mv_size].decode()))
  1551. #logger.debug('Found namespace: {}:{}'.format(<unsigned char *>key_v.mv_data, <unsigned char *>data_v.mv_data))
  1552. try:
  1553. _check(lmdb.mdb_cursor_get(
  1554. dcur, &key_v, &data_v, lmdb.MDB_NEXT))
  1555. except KeyNotFoundError:
  1556. return tuple(ret)
  1557. i += 1
  1558. finally:
  1559. #pass
  1560. self._cur_close(dcur)
  1561. cpdef tuple all_contexts(self, triple=None):
  1562. """
  1563. Get a list of all contexts.
  1564. :rtype: Iterator(rdflib.Graph)
  1565. """
  1566. cdef:
  1567. lmdb.MDB_stat stat
  1568. size_t i = 0
  1569. unsigned char spok[TRP_KLEN]
  1570. unsigned char ck[KLEN]
  1571. lmdb.MDB_cursor_op op
  1572. cur = (
  1573. self._cur_open('spo:c') if triple and all(triple)
  1574. else self._cur_open('c:'))
  1575. try:
  1576. if triple and all(triple):
  1577. _check(lmdb.mdb_stat(
  1578. self.txn, lmdb.mdb_cursor_dbi(cur), &stat))
  1579. ret = ResultSet(stat.ms_entries, KLEN)
  1580. self._to_triple_key(triple, &spok)
  1581. key_v.mv_data = spok
  1582. key_v.mv_size = TRP_KLEN
  1583. try:
  1584. _check(lmdb.mdb_cursor_get(
  1585. cur, &key_v, &data_v, lmdb.MDB_SET_KEY))
  1586. except KeyNotFoundError:
  1587. return tuple()
  1588. while True:
  1589. memcpy(ret.data + ret.itemsize * i, data_v.mv_data, KLEN)
  1590. try:
  1591. _check(lmdb.mdb_cursor_get(
  1592. cur, &key_v, &data_v, lmdb.MDB_NEXT_DUP))
  1593. except KeyNotFoundError:
  1594. break
  1595. i += 1
  1596. else:
  1597. _check(lmdb.mdb_stat(
  1598. self.txn, lmdb.mdb_cursor_dbi(cur), &stat))
  1599. ret = ResultSet(stat.ms_entries, KLEN)
  1600. try:
  1601. _check(lmdb.mdb_cursor_get(
  1602. cur, &key_v, &data_v, lmdb.MDB_FIRST))
  1603. except KeyNotFoundError:
  1604. return tuple()
  1605. while True:
  1606. memcpy(
  1607. ret.data + ret.itemsize * i, key_v.mv_data, KLEN)
  1608. try:
  1609. _check(lmdb.mdb_cursor_get(
  1610. cur, &key_v, NULL, lmdb.MDB_NEXT))
  1611. except KeyNotFoundError:
  1612. break
  1613. i += 1
  1614. return ret.to_tuple()
  1615. finally:
  1616. #pass
  1617. self._cur_close(cur)
  1618. # Key conversion methods.
  1619. cdef object from_key(self, Key key):
  1620. """
  1621. Convert a single or multiple key into one or more terms.
  1622. :param Key key: The key to be converted.
  1623. """
  1624. cdef:
  1625. unsigned char *pk
  1626. key_v.mv_data = key
  1627. key_v.mv_size = KLEN
  1628. _check(
  1629. lmdb.mdb_get(self.txn, self.get_dbi('t:st'), &key_v, &data_v),
  1630. 'Error getting data for key \'{}\'.'.format(key))
  1631. return deserialize(
  1632. <unsigned char *>data_v.mv_data, data_v.mv_size)
  1633. cdef tuple from_trp_key(self, TripleKey key):
  1634. """
  1635. Convert a single or multiple key into one or more terms.
  1636. :param Key key: The key to be converted.
  1637. """
  1638. #logger.debug(f'From triple key: {key[: TRP_KLEN]}')
  1639. return (
  1640. self.from_key(key),
  1641. self.from_key(key + KLEN),
  1642. self.from_key(key + DBL_KLEN))
  1643. cdef inline void _to_key(self, term, Key *key) except *:
  1644. """
  1645. Convert a triple, quad or term into a key.
  1646. The key is the checksum of the pickled object, therefore unique for
  1647. that object. The hashing algorithm is specified in `TERM_HASH_ALGO`.
  1648. :param Object obj: Anything that can be pickled.
  1649. :rtype: memoryview or None
  1650. :return: Keys stored for the term(s) or None if not found.
  1651. """
  1652. cdef:
  1653. unsigned char *pk_t
  1654. size_t term_size
  1655. Hash thash
  1656. serialize(term, &pk_t, &term_size)
  1657. #logger.debug('Hashing pickle: {} with lentgh: {}'.format(pk_t, term_size))
  1658. _hash(pk_t, term_size, thash)
  1659. free(pk_t)
  1660. #logger.debug('Hash to search for: {}'.format(thash[: HLEN]))
  1661. key_v.mv_data = &thash
  1662. key_v.mv_size = HLEN
  1663. dbi = self.get_dbi('th:t')
  1664. logger.debug(f'DBI: {dbi}')
  1665. _check(lmdb.mdb_get(self.txn, dbi, &key_v, &data_v))
  1666. #logger.debug('Found key: {}'.format((<Key>data_v.mv_data)[: KLEN]))
  1667. key[0] = <Key>data_v.mv_data
  1668. cdef inline void _to_triple_key(self, tuple terms, TripleKey *tkey) except *:
  1669. """
  1670. Convert a tuple of 3 terms into a triple key.
  1671. """
  1672. cdef:
  1673. char i = 0
  1674. Key key
  1675. while i < 3:
  1676. self._to_key(terms[i], &key)
  1677. memcpy(tkey[0] + (KLEN * i), key, KLEN)
  1678. if key is NULL:
  1679. # A term in the triple is not found.
  1680. tkey = NULL
  1681. return
  1682. i += 1
  1683. cdef void _append(
  1684. self, unsigned char *value, size_t vlen, Key *nkey,
  1685. unsigned char *dblabel=b'', lmdb.MDB_txn *txn=NULL,
  1686. unsigned int flags=0) except *:
  1687. """
  1688. Append one or more keys and values to the end of a database.
  1689. :param lmdb.Cursor cur: The write cursor to act on.
  1690. :param list(bytes) values: Value(s) to append.
  1691. :rtype: list(memoryview)
  1692. :return: Last key(s) inserted.
  1693. """
  1694. cdef:
  1695. unsigned char key[KLEN]
  1696. lmdb.MDB_cursor *cur
  1697. if txn is NULL:
  1698. txn = self.txn
  1699. cur = self._cur_open(dblabel, txn=txn)
  1700. try:
  1701. _check(lmdb.mdb_cursor_get(cur, &key_v, NULL, lmdb.MDB_LAST))
  1702. except KeyNotFoundError:
  1703. memcpy(nkey[0], first_key, KLEN)
  1704. else:
  1705. memcpy(key, key_v.mv_data, KLEN)
  1706. self._next_key(key, nkey)
  1707. finally:
  1708. #pass
  1709. self._cur_close(cur)
  1710. key_v.mv_data = nkey
  1711. key_v.mv_size = KLEN
  1712. data_v.mv_data = value
  1713. data_v.mv_size = vlen
  1714. #logger.debug('Appending value {} to db {} with key: {}'.format(
  1715. # value[: vlen], dblabel.decode(), nkey[0][:KLEN]))
  1716. #logger.debug('data size: {}'.format(data_v.mv_size))
  1717. lmdb.mdb_put(
  1718. txn, self.get_dbi(dblabel), &key_v, &data_v,
  1719. flags | lmdb.MDB_APPEND)
  1720. cdef void _next_key(self, const Key key, Key *nkey) except *:
  1721. """
  1722. Calculate the next closest byte sequence in lexicographical order.
  1723. This is used to fill the next available slot after the last one in
  1724. LMDB. Keys are byte strings, which is a convenient way to keep key
  1725. lengths as small as possible since they are referenced in several
  1726. indices.
  1727. This function assumes that all the keys are padded with the `start`
  1728. value up to the `max_len` length.
  1729. :param bytes n: Current byte sequence to add to.
  1730. """
  1731. cdef:
  1732. size_t i = KLEN
  1733. memcpy(nkey[0], key, KLEN)
  1734. #logger.debug('Last key in _next_key: {}'.format(key[0]))
  1735. while i > 0:
  1736. i -= 1
  1737. if nkey[0][i] < 255:
  1738. nkey[0][i] += 1
  1739. break
  1740. # If the value exceeds 255, i.e. the current value is the last one
  1741. else:
  1742. # If we are already at the leftmost byte, and this is already
  1743. # at 255, the sequence is exhausted.
  1744. if i == 0:
  1745. raise RuntimeError(
  1746. 'BAD DAY: Sequence exhausted. No more '
  1747. 'combinations are possible.')
  1748. # Move one position up and try to increment that.
  1749. else:
  1750. nkey[0][i] = KEY_START
  1751. #logger.debug('New key: {}'.format(nkey[0][:KLEN]))