lmdb_triplestore.pyx 55 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538
  1. import logging
  2. from rdflib import Graph
  3. from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as RDFLIB_DEFAULT_GRAPH_URI
  4. from lakesuperior.model.graph.graph import Imr
  5. from lakesuperior.store.base_lmdb_store import (
  6. KeyExistsError, KeyNotFoundError, LmdbError)
  7. from lakesuperior.store.base_lmdb_store cimport _check
  8. from cython.parallel import prange
  9. from libc.stdlib cimport free
  10. from libc.string cimport memcpy
  11. cimport lakesuperior.cy_include.cylmdb as lmdb
  12. from lakesuperior.model.graph.term cimport Term
  13. from lakesuperior.store.base_lmdb_store cimport (
  14. BaseLmdbStore, data_v, dbi, key_v)
  15. from lakesuperior.model.base cimport Buffer
  16. from lakesuperior.model.graph.term cimport (
  17. deserialize_to_rdflib, serialize_from_rdflib)
  18. from lakesuperior.model.structures.keyset cimport Keyset
  19. from lakesuperior.model.structures.hash cimport (
  20. HLEN_128 as HLEN, Hash128, hash128)
  21. FIRST_KEY = <bytes>KEY_START * KLEN
  22. """First key of a sequence."""
  23. IDX_OP_ADD = '_idx_add'
  24. IDX_OP_REMOVE = '_idx_remove'
  25. lookup_rank = [0, 2, 1]
  26. """
  27. Order in which keys are looked up if two terms are bound.
  28. The indices with the smallest average number of values per key should be
  29. looked up first.
  30. 0 = s:po
  31. 1 = p:so
  32. 2 = o:sp
  33. If we want to get fancy, this can be rebalanced from time to time by
  34. looking up the number of keys in (s:po, p:so, o:sp).
  35. """
  36. lookup_ordering = [
  37. [0, 1, 2], # spo
  38. [1, 0, 2], # pso
  39. [2, 0, 1], # osp
  40. ]
  41. lookup_ordering_2bound = [
  42. [1, 2, 0], # po:s
  43. [0, 2, 1], # so:p
  44. [0, 1, 2], # sp:o
  45. ]
  46. logger = logging.getLogger(__name__)
  47. cdef class LmdbTriplestore(BaseLmdbStore):
  48. """
  49. Low-level storage layer.
  50. This class extends the RDFLib-compatible :py:class:`BaseLmdbStore` and maps
  51. triples and contexts to key-value records in LMDB.
  52. This class uses the original LMDB C API rather than the Python bindings,
  53. because several data manipulations happen after retrieval from the store,
  54. which are more efficiently performed at the C level.
  55. """
  56. dbi_labels = [
  57. # Main data
  58. # Term key to serialized term content
  59. 't:st',
  60. # Joined triple keys to context key
  61. 'spo:c',
  62. # This has empty values and is used to keep track of empty contexts.
  63. 'c:',
  64. # Prefix to namespace
  65. 'pfx:ns',
  66. # Indices
  67. # Namespace to prefix
  68. 'ns:pfx',
  69. # Term hash to triple key
  70. 'th:t',
  71. # Lookups
  72. 's:po',
  73. 'p:so',
  74. 'o:sp',
  75. 'po:s',
  76. 'so:p',
  77. 'sp:o',
  78. 'c:spo',
  79. ]
  80. lookup_indices = [
  81. b's:po',
  82. b'p:so',
  83. b'o:sp',
  84. b'po:s',
  85. b'so:p',
  86. b'sp:o',
  87. ]
  88. dbi_flags = {
  89. 's:po': lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED,
  90. 'p:so': lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED,
  91. 'o:sp': lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED,
  92. 'po:s': lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED,
  93. 'so:p': lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED,
  94. 'sp:o': lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED,
  95. 'c:spo': lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED,
  96. 'spo:c': lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED,
  97. }
  98. flags = 0
  99. options = {
  100. 'map_size': 1024 ** 4 # 1Tb.
  101. }
  102. # DB management methods.
  103. cpdef dict stats(self):
  104. """
  105. Gather statistics about the database."""
  106. st = self._stats()
  107. st['num_triples'] = st['db_stats']['spo:c']['ms_entries']
  108. return st
  109. cpdef size_t _len(self, context=None) except -1:
  110. """
  111. Return the length of the dataset.
  112. The RDFLib interface defines `__len__` in a nonstandard way that
  113. causes a Cython compilation error, so this method is called by the
  114. `__len__` method of its Python counterpart.
  115. """
  116. cdef:
  117. size_t ct
  118. if context is not None:
  119. self._to_key(context, <Key *>key_v.mv_data)
  120. key_v.mv_size = KLEN
  121. cur = self._cur_open('c:spo')
  122. try:
  123. _check(lmdb.mdb_cursor_get(
  124. cur, &key_v, NULL, lmdb.MDB_SET))
  125. _check(lmdb.mdb_cursor_count(cur, &ct))
  126. except KeyNotFoundError:
  127. return 0
  128. else:
  129. return ct
  130. finally:
  131. #pass
  132. self._cur_close(cur)
  133. else:
  134. return self.stats()['num_triples']
  135. ## PRIVATE METHODS ##
  136. # Triple and graph methods.
  137. cpdef add(self, triple, context=None, quoted=False):
  138. """
  139. Add a triple and start indexing.
  140. :param tuple(rdflib.Identifier) triple: Tuple of three identifiers.
  141. :param context: Context identifier. ``None`` inserts in the default
  142. graph.
  143. :type context: rdflib.Identifier or None
  144. :param bool quoted: Not used.
  145. """
  146. cdef:
  147. lmdb.MDB_cursor *icur
  148. lmdb.MDB_val spo_v, c_v, null_v
  149. unsigned char i
  150. Hash128 thash
  151. # Using Key or TripleKey here breaks Cython. This might be a bug.
  152. # See https://github.com/cython/cython/issues/2517
  153. unsigned char spock[QUAD_KLEN]
  154. unsigned char nkey[KLEN]
  155. Buffer pk_t
  156. c = self._normalize_context(context)
  157. if c is None:
  158. c = RDFLIB_DEFAULT_GRAPH_URI
  159. # TODO: figure out how the RDFLib dispatcher is inherited
  160. # (and if there is a use for it in a first place)
  161. #Store.add(self, triple, context)
  162. s, p, o = triple
  163. #logger.debug('Trying to add a triple.')
  164. icur = self._cur_open('th:t')
  165. try:
  166. for i, term_obj in enumerate((s, p, o, c)):
  167. serialize_from_rdflib(term_obj, &pk_t)
  168. hash128(&pk_t, &thash)
  169. try:
  170. key_v.mv_data = thash
  171. key_v.mv_size = HLEN
  172. _check(lmdb.mdb_get(
  173. self.txn, self.get_dbi('th:t'), &key_v, &data_v))
  174. memcpy(spock + (i * KLEN), data_v.mv_data, KLEN)
  175. #logger.debug('Hash {} found. Not adding.'.format(thash[: HLEN]))
  176. except KeyNotFoundError:
  177. # If term_obj is not found, add it...
  178. #logger.debug('Hash {} not found. Adding to DB.'.format(
  179. # thash[: HLEN]))
  180. self._append(&pk_t, &nkey, dblabel=b't:st')
  181. memcpy(spock + (i * KLEN), nkey, KLEN)
  182. # ...and index it.
  183. #logger.debug('Indexing on th:t: {}: {}'.format(
  184. # thash[: HLEN], nkey[: KLEN]))
  185. key_v.mv_data = thash
  186. key_v.mv_size = HLEN
  187. data_v.mv_data = nkey
  188. data_v.mv_size = KLEN
  189. _check(
  190. lmdb.mdb_cursor_put(icur, &key_v, &data_v, 0),
  191. 'Error setting key {}.'.format(thash))
  192. finally:
  193. #pass
  194. self._cur_close(icur)
  195. #logger.debug('Triple add action completed.')
  196. spo_v.mv_data = spock
  197. spo_v.mv_size = TRP_KLEN
  198. c_v.mv_data = spock + TRP_KLEN
  199. c_v.mv_size = KLEN
  200. null_v.mv_data = b''
  201. null_v.mv_size = 0
  202. #logger.debug('Adding context.')
  203. try:
  204. _check(lmdb.mdb_put(
  205. self.txn, self.get_dbi('c:'), &c_v, &null_v,
  206. lmdb.MDB_NOOVERWRITE))
  207. except KeyExistsError:
  208. pass
  209. #logger.debug('Added c:.')
  210. try:
  211. # Add triple:context association.
  212. _check(lmdb.mdb_put(
  213. self.txn, self.get_dbi('spo:c'), &spo_v, &c_v,
  214. lmdb.MDB_NODUPDATA))
  215. except KeyExistsError:
  216. pass
  217. #logger.debug('Added spo:c.')
  218. try:
  219. # Index context:triple association.
  220. _check(lmdb.mdb_put(
  221. self.txn, self.get_dbi('c:spo'), &c_v, &spo_v,
  222. lmdb.MDB_NODUPDATA))
  223. except KeyExistsError:
  224. pass
  225. #logger.debug('Added c:spo.')
  226. #logger.debug('All main data entered. Indexing.')
  227. self._index_triple(IDX_OP_ADD, spock[: TRP_KLEN])
  228. cpdef add_graph(self, graph):
  229. """
  230. Add a graph to the database.
  231. This creates an empty graph by associating the graph URI with the
  232. pickled `None` value. This prevents from removing the graph when all
  233. triples are removed.
  234. This may be called by read-only operations:
  235. https://github.com/RDFLib/rdflib/blob/master/rdflib/graph.py#L1623
  236. In which case it needs to open a write transaction. This is not ideal
  237. but the only way to handle datasets in RDFLib.
  238. :param rdflib.URIRef graph: URI of the named graph to add.
  239. """
  240. cdef Buffer _sc
  241. if isinstance(graph, Graph):
  242. graph = graph.identifier
  243. # FIXME This is all wrong.
  244. serialize_from_rdflib(graph, &_sc)
  245. self._add_graph(&_sc)
  246. cdef void _add_graph(self, Buffer *pk_gr) except *:
  247. """
  248. Add a graph.
  249. :param pk_gr: Pickled context URIRef object.
  250. :type pk_gr: Buffer*
  251. """
  252. cdef:
  253. Hash128 chash
  254. unsigned char ck[KLEN]
  255. lmdb.MDB_txn *tmp_txn
  256. lmdb.MDB_cursor *th_cur
  257. lmdb.MDB_cursor *pk_cur
  258. lmdb.MDB_cursor *ck_cur
  259. hash128(pk_gr, &chash)
  260. #logger.debug('Adding a graph.')
  261. if not self._key_exists(chash, HLEN, b'th:t'):
  262. # Insert context term if not existing.
  263. if self.is_txn_rw:
  264. #logger.debug('Working in existing RW transaction.')
  265. # Use existing R/W transaction.
  266. # Main entry.
  267. self._append(pk_gr, &ck, b't:st')
  268. # Index.
  269. self._put(chash, HLEN, ck, KLEN, b'th:t')
  270. # Add to list of contexts.
  271. self._put(ck, KLEN, b'', 0, 'c:')
  272. else:
  273. # Open new R/W transactions.
  274. #logger.debug('Opening a temporary RW transaction.')
  275. _check(lmdb.mdb_txn_begin(self.dbenv, NULL, 0, &tmp_txn))
  276. try:
  277. self._append(pk_gr, &ck, b't:st', txn=tmp_txn)
  278. # Index.
  279. self._put(chash, HLEN, ck, KLEN, b'th:t', txn=tmp_txn)
  280. # Add to list of contexts.
  281. self._put(ck, KLEN, b'', 0, b'c:', txn=tmp_txn)
  282. _check(lmdb.mdb_txn_commit(tmp_txn))
  283. #logger.debug('Temp RW transaction closed.')
  284. except:
  285. lmdb.mdb_txn_abort(tmp_txn)
  286. raise
  287. cpdef void _remove(self, tuple triple_pattern, context=None) except *:
  288. cdef:
  289. unsigned char spok[TRP_KLEN]
  290. size_t i = 0
  291. Key ck
  292. lmdb.MDB_val spok_v, ck_v
  293. #logger.debug('Removing triple: {}'.format(triple_pattern))
  294. if context is not None:
  295. try:
  296. self._to_key(context, &ck)
  297. except KeyNotFoundError:
  298. # If context is specified but not found, return to avoid
  299. # deleting the wrong triples.
  300. return
  301. # Get the matching pattern.
  302. match_set = self.triple_keys(triple_pattern, context)
  303. dcur = self._cur_open('spo:c')
  304. icur = self._cur_open('c:spo')
  305. try:
  306. spok_v.mv_size = TRP_KLEN
  307. # If context was specified, remove only associations with that context.
  308. if context is not None:
  309. #logger.debug('Removing triples in matching context.')
  310. ck_v.mv_data = ck
  311. ck_v.mv_size = KLEN
  312. while i < match_set.ct:
  313. memcpy(
  314. spok, match_set.data + match_set.itemsize * i,
  315. TRP_KLEN)
  316. spok_v.mv_data = spok
  317. # Delete spo:c entry.
  318. try:
  319. _check(lmdb.mdb_cursor_get(
  320. dcur, &spok_v, &ck_v, lmdb.MDB_GET_BOTH))
  321. except KeyNotFoundError:
  322. pass
  323. else:
  324. _check(lmdb.mdb_cursor_del(dcur, 0))
  325. # Restore ck after delete.
  326. ck_v.mv_data = ck
  327. # Delete c:spo entry.
  328. try:
  329. _check(lmdb.mdb_cursor_get(
  330. icur, &ck_v, &spok_v, lmdb.MDB_GET_BOTH))
  331. except KeyNotFoundError:
  332. pass
  333. else:
  334. _check(lmdb.mdb_cursor_del(icur, 0))
  335. # Delete lookup indices, only if no other context
  336. # association is present.
  337. spok_v.mv_data = spok
  338. try:
  339. _check(lmdb.mdb_cursor_get(
  340. dcur, &spok_v, NULL, lmdb.MDB_SET))
  341. except KeyNotFoundError:
  342. self._index_triple(IDX_OP_REMOVE, spok)
  343. i += 1
  344. # If no context is specified, remove all associations.
  345. else:
  346. #logger.debug('Removing triples in all contexts.')
  347. # Loop over all SPO matching the triple pattern.
  348. while i < match_set.ct:
  349. spok = match_set.data + match_set.itemsize * i
  350. spok_v.mv_data = spok
  351. # Loop over all context associations for this SPO.
  352. try:
  353. _check(lmdb.mdb_cursor_get(
  354. dcur, &spok_v, &ck_v, lmdb.MDB_SET_KEY))
  355. except KeyNotFoundError:
  356. # Move on to the next SPO.
  357. continue
  358. else:
  359. ck = <Key>ck_v.mv_data
  360. logger.debug(f'Removing {spok[: TRP_KLEN]} from main.')
  361. while True:
  362. # Delete c:spo association.
  363. try:
  364. _check(lmdb.mdb_cursor_get(
  365. icur, &ck_v, &spok_v, lmdb.MDB_GET_BOTH))
  366. except KeyNotFoundError:
  367. pass
  368. else:
  369. lmdb.mdb_cursor_del(icur, 0)
  370. # Restore the pointer to the deleted SPO.
  371. spok_v.mv_data = spok
  372. # Move on to next associated context.
  373. try:
  374. _check(lmdb.mdb_cursor_get(
  375. dcur, &spok_v, &ck_v, lmdb.MDB_NEXT_DUP))
  376. except KeyNotFoundError:
  377. break
  378. # Then delete the spo:c association.
  379. try:
  380. _check(lmdb.mdb_cursor_get(
  381. dcur, &spok_v, &ck_v, lmdb.MDB_SET))
  382. except KeyNotFoundError:
  383. pass
  384. else:
  385. lmdb.mdb_cursor_del(dcur, lmdb.MDB_NODUPDATA)
  386. self._index_triple(IDX_OP_REMOVE, spok)
  387. #ck_v.mv_data = ck # Unnecessary?
  388. finally:
  389. i += 1
  390. finally:
  391. #pass
  392. #logger.debug('Closing spo:c in _remove.')
  393. self._cur_close(dcur)
  394. #logger.debug('Closing c:spo in _remove.')
  395. self._cur_close(icur)
  396. cdef void _index_triple(self, str op, TripleKey spok) except *:
  397. """
  398. Update index for a triple and context (add or remove).
  399. :param str op: 'add' or 'remove'.
  400. :param TripleKey spok: Triple key.
  401. """
  402. cdef:
  403. unsigned char keys[3][KLEN]
  404. unsigned char dbl_keys[3][DBL_KLEN]
  405. size_t i = 0
  406. lmdb.MDB_val key_v, dbl_key_v
  407. keys[0] = spok # sk
  408. keys[1] = spok + KLEN # pk
  409. keys[2] = spok + DBL_KLEN # ok
  410. dbl_keys[0] = spok + KLEN # pok
  411. memcpy(&dbl_keys[1], spok, KLEN) # sok, 1st part
  412. memcpy(&dbl_keys[1][KLEN], spok + DBL_KLEN, KLEN) # sok, 2nd part
  413. dbl_keys[2] = spok # spk
  414. #logger.debug('''Indices:
  415. #spok: {}
  416. #sk: {}
  417. #pk: {}
  418. #ok: {}
  419. #pok: {}
  420. #sok: {}
  421. #spk: {}
  422. #'''.format(
  423. # spok[:TRP_KLEN],
  424. # keys[0][:KLEN], keys[1][:KLEN], keys[2][:KLEN],
  425. # dbl_keys[0][:DBL_KLEN], dbl_keys[1][:DBL_KLEN], dbl_keys[2][:DBL_KLEN]))
  426. key_v.mv_size = KLEN
  427. dbl_key_v.mv_size = DBL_KLEN
  428. #logger.debug('Start indexing: {}.'.format(spok[: TRP_KLEN]))
  429. if op == IDX_OP_REMOVE:
  430. logger.debug(f'Remove {spok[ : TRP_KLEN]} from indices.')
  431. else:
  432. logger.debug(f'Add {spok[ : TRP_KLEN]} to indices.')
  433. while i < 3:
  434. cur1 = self._cur_open(self.lookup_indices[i]) # s:po, p:so, o:sp
  435. cur2 = self._cur_open(self.lookup_indices[i + 3])# po:s, so:p, sp:o
  436. try:
  437. key_v.mv_data = keys[i]
  438. dbl_key_v.mv_data = dbl_keys[i]
  439. # Removal op indexing.
  440. if op == IDX_OP_REMOVE:
  441. try:
  442. _check(lmdb.mdb_cursor_get(
  443. cur1, &key_v, &dbl_key_v, lmdb.MDB_GET_BOTH))
  444. logger.debug(f'Removed: {keys[i][: KLEN]}, '
  445. f'{dbl_keys[i][: DBL_KLEN]}')
  446. except KeyNotFoundError:
  447. logger.debug(f'Not found in index: {keys[i][: KLEN]}, '
  448. f'{dbl_keys[i][: DBL_KLEN]}')
  449. pass
  450. else:
  451. _check(lmdb.mdb_cursor_del(cur1, 0))
  452. # Restore pointers after delete.
  453. key_v.mv_data = keys[i]
  454. dbl_key_v.mv_data = dbl_keys[i]
  455. try:
  456. _check(lmdb.mdb_cursor_get(
  457. cur2, &dbl_key_v, &key_v, lmdb.MDB_GET_BOTH))
  458. logger.debug(f'Removed: {dbl_keys[i][: DBL_KLEN]}, '
  459. f'{keys[i][: KLEN]}')
  460. except KeyNotFoundError:
  461. logger.debug(f'Not found in index: '
  462. f'{dbl_keys[i][: DBL_KLEN]}, '
  463. f'{keys[i][: KLEN]}')
  464. pass
  465. else:
  466. _check(lmdb.mdb_cursor_del(cur2, 0))
  467. # Addition op indexing.
  468. elif op == IDX_OP_ADD:
  469. logger.debug('Adding to index `{}`: {}, {}'.format(
  470. self.lookup_indices[i],
  471. (<unsigned char *>key_v.mv_data)[ : key_v.mv_size],
  472. (<unsigned char *>dbl_key_v.mv_data)[ : dbl_key_v.mv_size]))
  473. try:
  474. _check(lmdb.mdb_cursor_put(
  475. cur1, &key_v, &dbl_key_v, lmdb.MDB_NODUPDATA))
  476. except KeyExistsError:
  477. logger.debug(f'Key {keys[i][: KLEN]} exists already.')
  478. pass
  479. logger.debug('Adding to index `{}`: {}, {}'.format(
  480. self.lookup_indices[i + 3],
  481. (<unsigned char *>dbl_key_v.mv_data)[ : dbl_key_v.mv_size],
  482. (<unsigned char *>key_v.mv_data)[ : key_v.mv_size]))
  483. try:
  484. _check(lmdb.mdb_cursor_put(
  485. cur2, &dbl_key_v, &key_v, lmdb.MDB_NODUPDATA))
  486. except KeyExistsError:
  487. logger.debug(f'Double key {dbl_keys[i][: DBL_KLEN]} exists already.')
  488. pass
  489. else:
  490. raise ValueError(
  491. 'Index operation \'{}\' is not supported.'.format(op))
  492. i += 1
  493. finally:
  494. #pass
  495. self._cur_close(cur1)
  496. self._cur_close(cur2)
  497. cpdef void _remove_graph(self, object gr_uri) except *:
  498. """
  499. Delete a context.
  500. """
  501. cdef:
  502. Hash128 chash
  503. unsigned char ck[KLEN]
  504. lmdb.MDB_val ck_v, chash_v
  505. Buffer pk_c
  506. #logger.debug('Deleting context: {}'.format(gr_uri))
  507. #logger.debug('Pickled context: {}'.format(serialize(gr_uri)))
  508. # Gather information on the graph prior to deletion.
  509. try:
  510. self._to_key(gr_uri, &ck)
  511. except KeyNotFoundError:
  512. return
  513. # Remove all triples and indices associated with the graph.
  514. self._remove((None, None, None), gr_uri)
  515. # Remove the graph if it is in triples.
  516. self._remove((gr_uri, None, None))
  517. self._remove((None, None, gr_uri))
  518. # Clean up all terms related to the graph.
  519. serialize_from_rdflib(gr_uri, &pk_c)
  520. hash128(&pk_c, &chash)
  521. ck_v.mv_size = KLEN
  522. chash_v.mv_size = HLEN
  523. try:
  524. ck_v.mv_data = ck
  525. _check(lmdb.mdb_del(self.txn, self.get_dbi(b'c:'), &ck_v, NULL))
  526. ck_v.mv_data = ck
  527. _check(lmdb.mdb_del(self.txn, self.get_dbi(b't:st'), &ck_v, NULL))
  528. chash_v.mv_data = chash
  529. _check(lmdb.mdb_del(self.txn, self.get_dbi(b'th:t'), &chash_v, NULL))
  530. except KeyNotFoundError:
  531. pass
  532. # Lookup methods.
  533. def contexts(self, triple=None):
  534. """
  535. Get a list of all contexts.
  536. :rtype: Iterator(lakesuperior.model.graph.graph.Imr)
  537. """
  538. for ctx_uri in self.all_contexts(triple):
  539. yield Imr(uri=self.from_key(ctx_uri), store=self)
  540. def triples(self, triple_pattern, context=None):
  541. """
  542. Generator over matching triples.
  543. :param tuple triple_pattern: 3 RDFLib terms
  544. :param context: Context graph, if available.
  545. :type context: rdflib.Graph or None
  546. :rtype: Iterator
  547. :return: Generator over triples and contexts in which each result has
  548. the following format::
  549. (s, p, o), generator(contexts)
  550. Where the contexts generator lists all context that the triple appears
  551. in.
  552. """
  553. cdef:
  554. size_t i = 0, j = 0
  555. unsigned char spok[TRP_KLEN]
  556. unsigned char ck[KLEN]
  557. lmdb.MDB_val key_v, data_v
  558. # This sounds strange, RDFLib should be passing None at this point,
  559. # but anyway...
  560. context = self._normalize_context(context)
  561. logger.debug(
  562. 'Getting triples for: {}, {}'.format(triple_pattern, context))
  563. rset = self.triple_keys(triple_pattern, context)
  564. #logger.debug('Triple keys found: {}'.format(rset.data[:rset.size]))
  565. cur = self._cur_open('spo:c')
  566. try:
  567. key_v.mv_size = TRP_KLEN
  568. for i in range(rset.ct):
  569. #logger.debug('Checking contexts for triples: {}'.format(
  570. # (rset.data + i * TRP_KLEN)[:TRP_KLEN]))
  571. key_v.mv_data = rset.data + i * TRP_KLEN
  572. # Get contexts associated with each triple.
  573. contexts = []
  574. # This shall never be MDB_NOTFOUND.
  575. _check(lmdb.mdb_cursor_get(cur, &key_v, &data_v, lmdb.MDB_SET))
  576. while True:
  577. c_uri = self.from_key(<Key>data_v.mv_data)
  578. contexts.append(Imr(uri=c_uri, store=self))
  579. try:
  580. _check(lmdb.mdb_cursor_get(
  581. cur, &key_v, &data_v, lmdb.MDB_NEXT_DUP))
  582. except KeyNotFoundError:
  583. break
  584. #logger.debug('Triple keys before yield: {}: {}.'.format(
  585. # (<TripleKey>key_v.mv_data)[:TRP_KLEN], tuple(contexts)))
  586. yield self.from_trp_key(
  587. (<TripleKey>key_v.mv_data)[: TRP_KLEN]), tuple(contexts)
  588. #logger.debug('After yield.')
  589. finally:
  590. self._cur_close(cur)
  591. cpdef SimpleGraph graph(self, triple_pattern, context=None):
  592. """
  593. Create a SimpleGraph instance from "borrowed" buffers from the store.
  594. The instance is only valid within the LMDB transaction that created it.
  595. :param tuple triple_pattern: 3 RDFLib terms
  596. :param context: Context graph, if available.
  597. :type context: rdflib.Graph or None
  598. :rtype: Iterator
  599. :return: Generator over triples and contexts in which each result has
  600. the following format::
  601. (s, p, o), generator(contexts)
  602. Where the contexts generator lists all context that the triple appears
  603. in.
  604. """
  605. cdef:
  606. unsigned char spok[TRP_KLEN]
  607. size_t cur = 0
  608. lmdb.MDB_val key_v, data_v
  609. SimpleGraph gr = SimpleGraph()
  610. BufferTriple* trp_buffer
  611. logger.debug(
  612. 'Getting triples for: {}, {}'.format(triple_pattern, context))
  613. match = self.triple_keys(triple_pattern, context)
  614. trp_buffer = gr.pool.alloc(len(match), sizeof(BufferTriple))
  615. for spok in match:
  616. (trp_buffer + cur).s = self.lookup_term(spok[: KLEN])
  617. (trp_buffer + cur).p = self.lookup_term(spok[KLEN: DBL_KLEN])
  618. (trp_buffer + cur).o = self.lookup_term(spok[DBL_KLEN: TRP_KLEN])
  619. gr.add_triple(trp_buffer + cur)
  620. return gr
  621. cdef Keyset triple_keys(self, tuple triple_pattern, context=None):
  622. """
  623. Top-level lookup method.
  624. This method is used by `triples` which returns native Python tuples,
  625. as well as by other methods that need to iterate and filter triple
  626. keys without incurring in the overhead of converting them to triples.
  627. :param tuple triple_pattern: 3 RDFLib terms
  628. :param context: Context graph or URI, or None.
  629. :type context: rdflib.term.Identifier or None
  630. """
  631. # TODO: Improve performance by allowing passing contexts as a tuple.
  632. cdef:
  633. unsigned char tk[KLEN]
  634. unsigned char ck[KLEN]
  635. unsigned char spok[TRP_KLEN]
  636. #unsigned char *pk_c
  637. size_t ct = 0, flt_j = 0, i = 0, j = 0, pg_offset = 0, c_size
  638. lmdb.MDB_cursor *icur
  639. lmdb.MDB_val key_v, data_v
  640. Keyset flt_res, ret
  641. if context is not None:
  642. #serialize(context, &pk_c, &c_size)
  643. try:
  644. self._to_key(context, &ck)
  645. except KeyNotFoundError:
  646. # Context not found.
  647. return Keyset(0, TRP_KLEN)
  648. icur = self._cur_open('c:spo')
  649. try:
  650. key_v.mv_data = ck
  651. key_v.mv_size = KLEN
  652. # s p o c
  653. if all(triple_pattern):
  654. #logger.debug('Lookup: s p o c')
  655. for i, term in enumerate(triple_pattern):
  656. try:
  657. self._to_key(term, &tk)
  658. except KeyNotFoundError:
  659. # Context not found.
  660. return Keyset(0, TRP_KLEN)
  661. memcpy(spok + (KLEN * i), tk, KLEN)
  662. if tk is NULL:
  663. # A term in the triple is not found.
  664. return Keyset(0, TRP_KLEN)
  665. data_v.mv_data = spok
  666. data_v.mv_size = TRP_KLEN
  667. #logger.debug(
  668. # 'Found spok {}. Matching with context {}'.format(
  669. # (<TripleKey>data_v.mv_data)[: TRP_KLEN],
  670. # (<Key>key_v.mv_data)[: KLEN]))
  671. try:
  672. _check(lmdb.mdb_cursor_get(
  673. icur, &key_v, &data_v, lmdb.MDB_GET_BOTH))
  674. except KeyNotFoundError:
  675. # Triple not found.
  676. #logger.debug('spok / ck pair not found.')
  677. return Keyset(0, TRP_KLEN)
  678. ret = Keyset(1, TRP_KLEN)
  679. memcpy(ret.data, spok, TRP_KLEN)
  680. return ret
  681. # ? ? ? c
  682. elif not any(triple_pattern):
  683. # Get all triples from the context
  684. #logger.debug('Lookup: ? ? ? c')
  685. try:
  686. _check(lmdb.mdb_cursor_get(
  687. icur, &key_v, &data_v, lmdb.MDB_SET))
  688. except KeyNotFoundError:
  689. # Triple not found.
  690. return Keyset(0, TRP_KLEN)
  691. _check(lmdb.mdb_cursor_count(icur, &ct))
  692. ret = Keyset(ct, TRP_KLEN)
  693. logger.debug(f'Entries in c:spo: {ct}')
  694. logger.debug(f'Allocated {ret.size} bytes.')
  695. logger.debug('Looking in key: {}'.format(
  696. (<unsigned char *>key_v.mv_data)[:key_v.mv_size]))
  697. _check(lmdb.mdb_cursor_get(
  698. icur, &key_v, &data_v, lmdb.MDB_GET_MULTIPLE))
  699. while True:
  700. #logger.debug(f'Data offset: {pg_offset} Page size: {data_v.mv_size} bytes')
  701. #logger.debug('Data page: {}'.format(
  702. # (<unsigned char *>data_v.mv_data)[: data_v.mv_size]))
  703. memcpy(ret.data + pg_offset, data_v.mv_data, data_v.mv_size)
  704. pg_offset += data_v.mv_size
  705. try:
  706. _check(lmdb.mdb_cursor_get(
  707. icur, &key_v, &data_v, lmdb.MDB_NEXT_MULTIPLE))
  708. except KeyNotFoundError:
  709. return ret
  710. # Regular lookup. Filter _lookup() results by context.
  711. else:
  712. try:
  713. res = self._lookup(triple_pattern)
  714. except KeyNotFoundError:
  715. return Keyset(0, TRP_KLEN)
  716. #logger.debug('Allocating for context filtering.')
  717. key_v.mv_data = ck
  718. key_v.mv_size = KLEN
  719. data_v.mv_size = TRP_KLEN
  720. flt_res = Keyset(res.ct, res.itemsize)
  721. while j < res.ct:
  722. #logger.debug('Checking row #{}'.format(flt_j))
  723. data_v.mv_data = res.data + j * res.itemsize
  724. #logger.debug('Checking c:spo {}, {}'.format(
  725. # (<unsigned char *>key_v.mv_data)[: key_v.mv_size],
  726. # (<unsigned char *>data_v.mv_data)[: data_v.mv_size]))
  727. try:
  728. # Verify that the triple is associated with the
  729. # context being searched.
  730. _check(lmdb.mdb_cursor_get(
  731. icur, &key_v, &data_v, lmdb.MDB_GET_BOTH))
  732. except KeyNotFoundError:
  733. #logger.debug('Discarding source[{}].'.format(j))
  734. continue
  735. else:
  736. #logger.debug('Copying source[{}] to dest[{}].'.format(
  737. # j, flt_j))
  738. memcpy(
  739. flt_res.data + res.itemsize * flt_j,
  740. res.data + res.itemsize * j, res.itemsize)
  741. flt_j += 1
  742. finally:
  743. j += 1
  744. # Resize result set to the size of context matches.
  745. # This crops the memory block without copying it.
  746. flt_res.resize(flt_j)
  747. return flt_res
  748. finally:
  749. self._cur_close(icur)
  750. # Unfiltered lookup. No context checked.
  751. else:
  752. #logger.debug('No context in query.')
  753. try:
  754. res = self._lookup(triple_pattern)
  755. except KeyNotFoundError:
  756. return Keyset(0, TRP_KLEN)
  757. #logger.debug('Res data before triple_keys return: {}'.format(
  758. # res.data[: res.size]))
  759. return res
  760. cdef Keyset _lookup(self, tuple triple_pattern):
  761. """
  762. Look up triples in the indices based on a triple pattern.
  763. :rtype: Iterator
  764. :return: Matching triple keys.
  765. """
  766. cdef:
  767. TripleKey spok
  768. lmdb.MDB_stat db_stat
  769. size_t ct = 0, i = 0
  770. lmdb.MDB_val spok_v, ck_v
  771. s, p, o = triple_pattern
  772. if s is not None:
  773. if p is not None:
  774. # s p o
  775. if o is not None:
  776. spok_v.mv_data = spok
  777. spok_v.mv_size = TRP_KLEN
  778. try:
  779. self._to_triple_key(triple_pattern, &spok)
  780. _check(lmdb.mdb_get(
  781. self.txn, self.get_dbi('spo:c'), &spok_v, &ck_v))
  782. except KeyNotFoundError:
  783. return Keyset(0, TRP_KLEN)
  784. matches = Keyset(1, TRP_KLEN)
  785. memcpy(matches.data, spok, TRP_KLEN)
  786. return matches
  787. # s p ?
  788. else:
  789. return self._lookup_2bound(0, s, 1, p)
  790. else:
  791. # s ? o
  792. if o is not None:
  793. return self._lookup_2bound(0, s, 2, o)
  794. # s ? ?
  795. else:
  796. return self._lookup_1bound(0, s)
  797. else:
  798. if p is not None:
  799. # ? p o
  800. if o is not None:
  801. return self._lookup_2bound(1, p, 2, o)
  802. # ? p ?
  803. else:
  804. return self._lookup_1bound(1, p)
  805. else:
  806. # ? ? o
  807. if o is not None:
  808. return self._lookup_1bound(2, o)
  809. # ? ? ?
  810. else:
  811. # Get all triples in the database.
  812. logger.debug('Getting all DB triples.')
  813. dcur = self._cur_open('spo:c')
  814. try:
  815. _check(lmdb.mdb_stat(
  816. self.txn, lmdb.mdb_cursor_dbi(dcur), &db_stat),
  817. 'Error gathering DB stats.')
  818. ct = db_stat.ms_entries
  819. ret = Keyset(ct, TRP_KLEN)
  820. logger.debug(f'Triples found: {ct}')
  821. if ct == 0:
  822. return Keyset(0, TRP_KLEN)
  823. _check(lmdb.mdb_cursor_get(
  824. dcur, &key_v, &data_v, lmdb.MDB_FIRST))
  825. while True:
  826. logger.debug(f'i in 0bound: {i}')
  827. memcpy(
  828. ret.data + ret.itemsize * i,
  829. key_v.mv_data, TRP_KLEN)
  830. try:
  831. _check(lmdb.mdb_cursor_get(
  832. dcur, &key_v, &data_v, lmdb.MDB_NEXT_NODUP))
  833. except KeyNotFoundError:
  834. break
  835. i += 1
  836. # Size is guessed from all entries. Unique keys will be
  837. # much less than that.
  838. ret.resize(i + 1)
  839. #logger.debug('Assembled data: {}'.format(ret.data[:ret.size]))
  840. return ret
  841. finally:
  842. self._cur_close(dcur)
  843. cdef Keyset _lookup_1bound(self, unsigned char idx, term):
  844. """
  845. Lookup triples for a pattern with one bound term.
  846. :param str idx_name: The index to look up as one of the keys of
  847. ``_lookup_ordering``.
  848. :param rdflib.URIRef term: Bound term to search for.
  849. :rtype: Iterator(bytes)
  850. :return: SPO keys matching the pattern.
  851. """
  852. cdef:
  853. unsigned char luk[KLEN]
  854. unsigned int dbflags
  855. unsigned char asm_rng[3]
  856. size_t ct, ret_offset = 0, src_pos, ret_pos
  857. size_t j # Must be signed for older OpenMP versions
  858. lmdb.MDB_cursor *icur
  859. logger.debug(f'lookup 1bound: {idx}, {term}')
  860. try:
  861. self._to_key(term, &luk)
  862. except KeyNotFoundError:
  863. return Keyset(0, TRP_KLEN)
  864. logging.debug('luk: {}'.format(luk))
  865. term_order = lookup_ordering[idx]
  866. icur = self._cur_open(self.lookup_indices[idx])
  867. logging.debug(f'DB label: {self.lookup_indices[idx]}')
  868. logging.debug('term order: {}'.format(term_order[: 3]))
  869. try:
  870. key_v.mv_data = luk
  871. key_v.mv_size = KLEN
  872. _check(lmdb.mdb_cursor_get(icur, &key_v, &data_v, lmdb.MDB_SET))
  873. _check(lmdb.mdb_cursor_count(icur, &ct))
  874. # Allocate memory for results.
  875. ret = Keyset(ct, TRP_KLEN)
  876. logger.debug(f'Entries for {self.lookup_indices[idx]}: {ct}')
  877. logger.debug(f'Allocated {ret.size} bytes of data.')
  878. #logger.debug('First row: {}'.format(
  879. # (<unsigned char *>data_v.mv_data)[:DBL_KLEN]))
  880. # Arrange results according to lookup order.
  881. asm_rng = [
  882. KLEN * term_order[0],
  883. KLEN * term_order[1],
  884. KLEN * term_order[2],
  885. ]
  886. logger.debug('asm_rng: {}'.format(asm_rng[:3]))
  887. logger.debug('luk: {}'.format(luk))
  888. _check(lmdb.mdb_cursor_get(icur, &key_v, &data_v, lmdb.MDB_SET))
  889. _check(lmdb.mdb_cursor_get(
  890. icur, &key_v, &data_v, lmdb.MDB_GET_MULTIPLE))
  891. while True:
  892. logger.debug('ret_offset: {}'.format(ret_offset))
  893. logger.debug(f'Page size: {data_v.mv_size}')
  894. #logger.debug(
  895. # 'Got data in 1bound ({}): {}'.format(
  896. # data_v.mv_size,
  897. # (<unsigned char *>data_v.mv_data)[: data_v.mv_size]))
  898. for j in prange(data_v.mv_size // DBL_KLEN, nogil=True):
  899. src_pos = DBL_KLEN * j
  900. ret_pos = (ret_offset + ret.itemsize * j)
  901. memcpy(ret.data + ret_pos + asm_rng[0], luk, KLEN)
  902. memcpy(ret.data + ret_pos + asm_rng[1],
  903. data_v.mv_data + src_pos, KLEN)
  904. memcpy(ret.data + ret_pos + asm_rng[2],
  905. data_v.mv_data + src_pos + KLEN, KLEN)
  906. # Increment MUST be done before MDB_NEXT_MULTIPLE otherwise
  907. # data_v.mv_size will be overwritten with the *next* page size
  908. # and cause corruption in the output data.
  909. ret_offset += data_v.mv_size // DBL_KLEN * ret.itemsize
  910. try:
  911. # Get results by the page.
  912. _check(lmdb.mdb_cursor_get(
  913. icur, &key_v, &data_v, lmdb.MDB_NEXT_MULTIPLE))
  914. except KeyNotFoundError:
  915. # For testing only. Errors will be caught in triples()
  916. # when looking for a context.
  917. #if ret_offset + ret.itemsize < ret.size:
  918. # raise RuntimeError(
  919. # 'Retrieved less values than expected: {} of {}.'
  920. # .format(src_offset, ret.size))
  921. return ret
  922. #logger.debug('Assembled data in 1bound ({}): {}'.format(ret.size, ret.data[: ret.size]))
  923. finally:
  924. self._cur_close(icur)
  925. cdef Keyset _lookup_2bound(
  926. self, unsigned char idx1, term1, unsigned char idx2, term2):
  927. """
  928. Look up triples for a pattern with two bound terms.
  929. :param str idx1: The index to look up as one of the keys of
  930. ``lookup_ordering_2bound``.
  931. :param rdflib.URIRef term1: First bound term to search for.
  932. :rtype: Iterator(bytes)
  933. :return: SPO keys matching the pattern.
  934. """
  935. cdef:
  936. unsigned char luk1_offset, luk2_offset
  937. unsigned char luk1[KLEN]
  938. unsigned char luk2[KLEN]
  939. unsigned char luk[DBL_KLEN]
  940. unsigned int dbflags
  941. unsigned char asm_rng[3]
  942. unsigned char term_order[3] # Lookup ordering
  943. size_t ct, i = 0, ret_offset = 0, ret_pos, src_pos
  944. size_t j # Must be signed for older OpenMP versions
  945. lmdb.MDB_cursor *icur
  946. Keyset ret
  947. logging.debug(
  948. f'2bound lookup for term {term1} at position {idx1} '
  949. f'and term {term2} at position {idx2}.')
  950. try:
  951. self._to_key(term1, &luk1)
  952. self._to_key(term2, &luk2)
  953. except KeyNotFoundError:
  954. return Keyset(0, TRP_KLEN)
  955. logging.debug('luk1: {}'.format(luk1[: KLEN]))
  956. logging.debug('luk2: {}'.format(luk2[: KLEN]))
  957. for i in range(3):
  958. if (
  959. idx1 in lookup_ordering_2bound[i][: 2]
  960. and idx2 in lookup_ordering_2bound[i][: 2]):
  961. term_order = lookup_ordering_2bound[i]
  962. if term_order[0] == idx1:
  963. luk1_offset = 0
  964. luk2_offset = KLEN
  965. else:
  966. luk1_offset = KLEN
  967. luk2_offset = 0
  968. dblabel = self.lookup_indices[i + 3] # skip 1bound index labels
  969. break
  970. if i == 2:
  971. raise ValueError(
  972. 'Indices {} and {} not found in LU keys.'.format(
  973. idx1, idx2))
  974. logger.debug('Term order: {}'.format(term_order[:3]))
  975. logger.debug('LUK offsets: {}, {}'.format(luk1_offset, luk2_offset))
  976. # Compose terms in lookup key.
  977. memcpy(luk + luk1_offset, luk1, KLEN)
  978. memcpy(luk + luk2_offset, luk2, KLEN)
  979. logger.debug('Lookup key: {}'.format(luk))
  980. icur = self._cur_open(dblabel)
  981. logger.debug('Database label: {}'.format(dblabel))
  982. try:
  983. key_v.mv_data = luk
  984. key_v.mv_size = DBL_KLEN
  985. # Count duplicates for key and allocate memory for result set.
  986. _check(lmdb.mdb_cursor_get(icur, &key_v, &data_v, lmdb.MDB_SET))
  987. _check(lmdb.mdb_cursor_count(icur, &ct))
  988. ret = Keyset(ct, TRP_KLEN)
  989. #logger.debug('Entries for {}: {}'.format(self.lookup_indices[idx], ct))
  990. #logger.debug('First row: {}'.format(
  991. # (<unsigned char *>data_v.mv_data)[:DBL_KLEN]))
  992. # Arrange results according to lookup order.
  993. asm_rng = [
  994. KLEN * term_order[0],
  995. KLEN * term_order[1],
  996. KLEN * term_order[2],
  997. ]
  998. logger.debug('asm_rng: {}'.format(asm_rng[:3]))
  999. logger.debug('luk: {}'.format(luk))
  1000. _check(lmdb.mdb_cursor_get(icur, &key_v, &data_v, lmdb.MDB_SET))
  1001. _check(lmdb.mdb_cursor_get(
  1002. icur, &key_v, &data_v, lmdb.MDB_GET_MULTIPLE))
  1003. while True:
  1004. logger.debug('Got data in 2bound ({}): {}'.format(
  1005. data_v.mv_size,
  1006. (<unsigned char *>data_v.mv_data)[: data_v.mv_size]))
  1007. for j in prange(data_v.mv_size // KLEN, nogil=True):
  1008. src_pos = KLEN * j
  1009. ret_pos = (ret_offset + ret.itemsize * j)
  1010. #logger.debug('Page offset: {}'.format(pg_offset))
  1011. #logger.debug('Ret offset: {}'.format(ret_offset))
  1012. memcpy(ret.data + ret_pos + asm_rng[0], luk, KLEN)
  1013. memcpy(ret.data + ret_pos + asm_rng[1], luk + KLEN, KLEN)
  1014. memcpy(ret.data + ret_pos + asm_rng[2],
  1015. data_v.mv_data + src_pos, KLEN)
  1016. #logger.debug('Assembled triple: {}'.format((ret.data + ret_offset)[: TRP_KLEN]))
  1017. ret_offset += data_v.mv_size // KLEN * ret.itemsize
  1018. try:
  1019. # Get results by the page.
  1020. _check(lmdb.mdb_cursor_get(
  1021. icur, &key_v, &data_v, lmdb.MDB_NEXT_MULTIPLE))
  1022. except KeyNotFoundError:
  1023. # For testing only. Errors will be caught in triples()
  1024. # when looking for a context.
  1025. #if ret_offset + ret.itemsize < ret.size:
  1026. # raise RuntimeError(
  1027. # 'Retrieved less values than expected: {} of {}.'
  1028. # .format(pg_offset, ret.size))
  1029. #logger.debug('Assembled data in 2bound ({}): {}'.format(ret.size, ret.data[: ret.size]))
  1030. return ret
  1031. finally:
  1032. self._cur_close(icur)
  1033. cdef Keyset _all_term_keys(self, term_type):
  1034. """
  1035. Return all keys of a (``s:po``, ``p:so``, ``o:sp``) index.
  1036. """
  1037. cdef:
  1038. size_t i = 0
  1039. lmdb.MDB_stat stat
  1040. idx_label = self.lookup_indices['spo'.index(term_type)]
  1041. #logger.debug('Looking for all terms in index: {}'.format(idx_label))
  1042. icur = self._cur_open(idx_label)
  1043. try:
  1044. _check(lmdb.mdb_stat(self.txn, lmdb.mdb_cursor_dbi(icur), &stat))
  1045. # TODO: This may allocate memory for several times the amount
  1046. # needed. Even though it is resized later, we need to know how
  1047. # performance is affected by this.
  1048. ret = Keyset(stat.ms_entries, KLEN)
  1049. try:
  1050. _check(lmdb.mdb_cursor_get(
  1051. icur, &key_v, NULL, lmdb.MDB_FIRST))
  1052. except KeyNotFoundError:
  1053. return Keyset(0, DBL_KLEN)
  1054. while True:
  1055. memcpy(ret.data + ret.itemsize * i, key_v.mv_data, KLEN)
  1056. rc = lmdb.mdb_cursor_get(
  1057. icur, &key_v, NULL, lmdb.MDB_NEXT_NODUP)
  1058. try:
  1059. _check(rc)
  1060. except KeyNotFoundError:
  1061. ret.resize(i + 1)
  1062. return ret
  1063. i += 1
  1064. finally:
  1065. #pass
  1066. self._cur_close(icur)
  1067. def all_terms(self, term_type):
  1068. """
  1069. Return all terms of a type (``s``, ``p``, or ``o``) in the store.
  1070. """
  1071. for key in self._all_term_keys(term_type).to_tuple():
  1072. #logger.debug('Yielding: {}'.format(key))
  1073. yield self.from_key(key)
  1074. cpdef tuple all_namespaces(self):
  1075. """
  1076. Return all registered namespaces.
  1077. """
  1078. cdef:
  1079. size_t i = 0
  1080. lmdb.MDB_stat stat
  1081. ret = []
  1082. dcur = self._cur_open('pfx:ns')
  1083. try:
  1084. try:
  1085. _check(lmdb.mdb_cursor_get(
  1086. dcur, &key_v, &data_v, lmdb.MDB_FIRST))
  1087. except KeyNotFoundError:
  1088. return tuple()
  1089. while True:
  1090. ret.append((
  1091. (<unsigned char *>key_v.mv_data)[: key_v.mv_size].decode(),
  1092. (<unsigned char *>data_v.mv_data)[: data_v.mv_size].decode()))
  1093. #logger.debug('Found namespace: {}:{}'.format(<unsigned char *>key_v.mv_data, <unsigned char *>data_v.mv_data))
  1094. try:
  1095. _check(lmdb.mdb_cursor_get(
  1096. dcur, &key_v, &data_v, lmdb.MDB_NEXT))
  1097. except KeyNotFoundError:
  1098. return tuple(ret)
  1099. i += 1
  1100. finally:
  1101. #pass
  1102. self._cur_close(dcur)
  1103. cpdef tuple all_contexts(self, triple=None):
  1104. """
  1105. Get a list of all contexts.
  1106. :rtype: Iterator(lakesuperior.model.graph.graph.Imr)
  1107. """
  1108. cdef:
  1109. lmdb.MDB_stat stat
  1110. size_t i = 0
  1111. unsigned char spok[TRP_KLEN]
  1112. unsigned char ck[KLEN]
  1113. lmdb.MDB_cursor_op op
  1114. cur = (
  1115. self._cur_open('spo:c') if triple and all(triple)
  1116. else self._cur_open('c:'))
  1117. try:
  1118. if triple and all(triple):
  1119. _check(lmdb.mdb_stat(
  1120. self.txn, lmdb.mdb_cursor_dbi(cur), &stat))
  1121. ret = Keyset(stat.ms_entries, KLEN)
  1122. self._to_triple_key(triple, &spok)
  1123. key_v.mv_data = spok
  1124. key_v.mv_size = TRP_KLEN
  1125. try:
  1126. _check(lmdb.mdb_cursor_get(
  1127. cur, &key_v, &data_v, lmdb.MDB_SET_KEY))
  1128. except KeyNotFoundError:
  1129. return tuple()
  1130. while True:
  1131. memcpy(ret.data + ret.itemsize * i, data_v.mv_data, KLEN)
  1132. try:
  1133. _check(lmdb.mdb_cursor_get(
  1134. cur, &key_v, &data_v, lmdb.MDB_NEXT_DUP))
  1135. except KeyNotFoundError:
  1136. break
  1137. i += 1
  1138. else:
  1139. _check(lmdb.mdb_stat(
  1140. self.txn, lmdb.mdb_cursor_dbi(cur), &stat))
  1141. ret = Keyset(stat.ms_entries, KLEN)
  1142. try:
  1143. _check(lmdb.mdb_cursor_get(
  1144. cur, &key_v, &data_v, lmdb.MDB_FIRST))
  1145. except KeyNotFoundError:
  1146. return tuple()
  1147. while True:
  1148. memcpy(
  1149. ret.data + ret.itemsize * i, key_v.mv_data, KLEN)
  1150. try:
  1151. _check(lmdb.mdb_cursor_get(
  1152. cur, &key_v, NULL, lmdb.MDB_NEXT))
  1153. except KeyNotFoundError:
  1154. break
  1155. i += 1
  1156. return ret.to_tuple()
  1157. finally:
  1158. #pass
  1159. self._cur_close(cur)
  1160. # Key conversion methods.
  1161. cdef object from_key(self, const Key key):
  1162. """
  1163. Convert a single key into one term.
  1164. :param Key key: The key to be converted.
  1165. """
  1166. cdef Buffer pk_t
  1167. #self.lookup_term(key, &pk_t)
  1168. return deserialize_to_rdflib(self.lookup_term(key))
  1169. cdef inline Buffer lookup_term(self, const Key key, Buffer *data):
  1170. """
  1171. look up a term by key.
  1172. :param Key key: The key to be looked up.
  1173. :param Buffer *data: Buffer structure containing the serialized term.
  1174. """
  1175. cdef:
  1176. lmdb.MDB_val key_v, data_v
  1177. key_v.mv_data = key
  1178. key_v.mv_size = KLEN
  1179. _check(
  1180. lmdb.mdb_get(self.txn, self.get_dbi('t:st'), &key_v, &data_v),
  1181. f'Error getting data for key \'{key}\'.')
  1182. return <Buffer>data
  1183. cdef tuple from_trp_key(self, TripleKey key):
  1184. """
  1185. Convert a triple key into a tuple of 3 terms.
  1186. :param TripleKey key: The triple key to be converted.
  1187. """
  1188. #logger.debug(f'From triple key: {key[: TRP_KLEN]}')
  1189. return (
  1190. self.from_key(key),
  1191. self.from_key(key + KLEN),
  1192. self.from_key(key + DBL_KLEN))
  1193. cdef inline void _to_key(self, term, Key *key) except *:
  1194. """
  1195. Convert a triple, quad or term into a key.
  1196. The key is the checksum of the serialized object, therefore unique for
  1197. that object.
  1198. :param rdflib.Term term: An RDFLib term (URIRef, BNode, Literal).
  1199. :param Key key: Pointer to the key that will be produced.
  1200. :rtype: void
  1201. """
  1202. cdef:
  1203. Hash128 thash
  1204. Buffer pk_t
  1205. serialize_from_rdflib(term, &pk_t)
  1206. hash128(&pk_t, &thash)
  1207. #logger.debug('Hash to search for: {}'.format(thash[: HLEN]))
  1208. key_v.mv_data = thash
  1209. key_v.mv_size = HLEN
  1210. dbi = self.get_dbi('th:t')
  1211. logger.debug(f'DBI: {dbi}')
  1212. _check(lmdb.mdb_get(self.txn, dbi, &key_v, &data_v))
  1213. #logger.debug('Found key: {}'.format((<Key>data_v.mv_data)[: KLEN]))
  1214. key[0] = <Key>data_v.mv_data
  1215. cdef inline void _to_triple_key(
  1216. self, tuple terms, TripleKey *tkey) except *:
  1217. """
  1218. Convert a tuple of 3 terms into a triple key.
  1219. """
  1220. cdef:
  1221. unsigned char i = 0
  1222. Key key
  1223. while i < 3:
  1224. self._to_key(terms[i], &key)
  1225. memcpy(tkey[0] + (KLEN * i), key, KLEN)
  1226. if key is NULL:
  1227. # A term in the triple is not found.
  1228. tkey = NULL
  1229. return
  1230. i += 1
  1231. cdef void _append(
  1232. self, Buffer *value, Key *nkey,
  1233. unsigned char *dblabel=b'', lmdb.MDB_txn *txn=NULL,
  1234. unsigned int flags=0) except *:
  1235. """
  1236. Append one or more keys and values to the end of a database.
  1237. :param lmdb.Cursor cur: The write cursor to act on.
  1238. :param list(bytes) values: Value(s) to append.
  1239. :rtype: list(memoryview)
  1240. :return: Last key(s) inserted.
  1241. """
  1242. cdef:
  1243. unsigned char key[KLEN]
  1244. lmdb.MDB_cursor *cur
  1245. if txn is NULL:
  1246. txn = self.txn
  1247. cur = self._cur_open(dblabel, txn=txn)
  1248. try:
  1249. _check(lmdb.mdb_cursor_get(cur, &key_v, NULL, lmdb.MDB_LAST))
  1250. except KeyNotFoundError:
  1251. memcpy(nkey[0], FIRST_KEY, KLEN)
  1252. else:
  1253. memcpy(key, key_v.mv_data, KLEN)
  1254. self._next_key(key, nkey)
  1255. finally:
  1256. #pass
  1257. self._cur_close(cur)
  1258. key_v.mv_data = nkey
  1259. key_v.mv_size = KLEN
  1260. data_v.mv_data = value[0].addr
  1261. data_v.mv_size = value[0].sz
  1262. #logger.debug('Appending value {} to db {} with key: {}'.format(
  1263. # value[: vlen], dblabel.decode(), nkey[0][:KLEN]))
  1264. #logger.debug('data size: {}'.format(data_v.mv_size))
  1265. lmdb.mdb_put(
  1266. txn, self.get_dbi(dblabel), &key_v, &data_v,
  1267. flags | lmdb.MDB_APPEND)
  1268. cdef void _next_key(self, const Key key, Key *nkey) except *:
  1269. """
  1270. Calculate the next closest byte sequence in lexicographical order.
  1271. This is used to fill the next available slot after the last one in
  1272. LMDB. Keys are byte strings, which is a convenient way to keep key
  1273. lengths as small as possible since they are referenced in several
  1274. indices.
  1275. This function assumes that all the keys are padded with the `start`
  1276. value up to the `max_len` length.
  1277. :param bytes n: Current byte sequence to add to.
  1278. """
  1279. cdef:
  1280. size_t i = KLEN
  1281. memcpy(nkey[0], key, KLEN)
  1282. #logger.debug('Last key in _next_key: {}'.format(key[0]))
  1283. while i > 0:
  1284. i -= 1
  1285. if nkey[0][i] < 255:
  1286. nkey[0][i] += 1
  1287. break
  1288. # If the value exceeds 255, i.e. the current value is the last one
  1289. else:
  1290. # If we are already at the leftmost byte, and this is already
  1291. # at 255, the sequence is exhausted.
  1292. if i == 0:
  1293. raise RuntimeError(
  1294. 'BAD DAY: Sequence exhausted. No more '
  1295. 'combinations are possible.')
  1296. # Move one position up and try to increment that.
  1297. else:
  1298. nkey[0][i] = KEY_START
  1299. #logger.debug('New key: {}'.format(nkey[0][:KLEN]))