lmdb_triplestore.pyx 54 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572
  1. import logging
  2. from cython.parallel import prange
  3. from rdflib import Graph
  4. from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as RDFLIB_DEFAULT_GRAPH_URI
  5. from lakesuperior.model.graph.graph import Imr
  6. from lakesuperior.store.base_lmdb_store import (
  7. KeyExistsError, KeyNotFoundError, LmdbError)
  8. from lakesuperior.store.base_lmdb_store cimport _check
  9. from libc.stdlib cimport malloc, free
  10. from libc.string cimport memcpy
  11. cimport lakesuperior.cy_include.collections as cc
  12. cimport lakesuperior.cy_include.cylmdb as lmdb
  13. from lakesuperior.model.base cimport (
  14. KLEN, DBL_KLEN, TRP_KLEN, QUAD_KLEN,
  15. KeyIdx, Key, DoubleKey, TripleKey, QuadKey,
  16. Buffer, buffer_dump
  17. )
  18. from lakesuperior.model.graph.graph cimport SimpleGraph, Imr
  19. from lakesuperior.model.graph.term cimport Term
  20. from lakesuperior.model.graph.triple cimport BufferTriple
  21. from lakesuperior.store.base_lmdb_store cimport (
  22. BaseLmdbStore, data_v, dbi, key_v)
  23. from lakesuperior.model.graph.term cimport (
  24. deserialize_to_rdflib, serialize_from_rdflib)
  25. from lakesuperior.model.structures.keyset cimport Keyset
  26. from lakesuperior.model.structures.hash cimport (
  27. HLEN_128 as HLEN, Hash128, hash128)
  28. lookup_rank = [0, 2, 1]
  29. """
  30. Order in which keys are looked up if two terms are bound.
  31. The indices with the smallest average number of values per key should be
  32. looked up first.
  33. 0 = s:po
  34. 1 = p:so
  35. 2 = o:sp
  36. If we want to get fancy, this can be rebalanced from time to time by
  37. looking up the number of keys in (s:po, p:so, o:sp).
  38. """
  39. lookup_ordering = [
  40. [0, 1, 2], # spo
  41. [1, 0, 2], # pso
  42. [2, 0, 1], # osp
  43. ]
  44. lookup_ordering_2bound = [
  45. [1, 2, 0], # po:s
  46. [0, 2, 1], # so:p
  47. [0, 1, 2], # sp:o
  48. ]
  49. logger = logging.getLogger(__name__)
  50. cdef class LmdbTriplestore(BaseLmdbStore):
  51. """
  52. Low-level storage layer.
  53. This class extends the RDFLib-compatible :py:class:`BaseLmdbStore` and maps
  54. triples and contexts to key-value records in LMDB.
  55. This class uses the original LMDB C API rather than the Python bindings,
  56. because several data manipulations happen after retrieval from the store,
  57. which are more efficiently performed at the C level.
  58. """
  59. dbi_labels = [
  60. # Main data
  61. # Term key to serialized term content
  62. 't:st',
  63. # Joined triple keys to context key
  64. 'spo:c',
  65. # This has empty values and is used to keep track of empty contexts.
  66. 'c:',
  67. # Prefix to namespace
  68. 'pfx:ns',
  69. # Indices
  70. # Namespace to prefix
  71. 'ns:pfx',
  72. # Term hash to triple key
  73. 'th:t',
  74. # Lookups
  75. 's:po',
  76. 'p:so',
  77. 'o:sp',
  78. 'po:s',
  79. 'so:p',
  80. 'sp:o',
  81. 'c:spo',
  82. ]
  83. lookup_indices = [
  84. b's:po',
  85. b'p:so',
  86. b'o:sp',
  87. b'po:s',
  88. b'so:p',
  89. b'sp:o',
  90. ]
  91. dbi_flags = {
  92. 'c': lmdb.MDB_INTEGERKEY,
  93. 's:po': INT_KEY_MASK,
  94. 'p:so': INT_KEY_MASK,
  95. 'o:sp': INT_KEY_MASK,
  96. 'po:s': INT_DUP_MASK,
  97. 'so:p': INT_DUP_MASK,
  98. 'sp:o': INT_DUP_MASK,
  99. 'c:spo': INT_KEY_MASK,
  100. 'spo:c': INT_DUP_MASK,
  101. }
  102. flags = 0
  103. options = {
  104. 'map_size': 1024 ** 4 # 1Tb.
  105. }
  106. # DB management methods.
  107. cpdef dict stats(self):
  108. """
  109. Gather statistics about the database."""
  110. st = self._stats()
  111. st['num_triples'] = st['db_stats']['spo:c']['ms_entries']
  112. return st
  113. cpdef size_t _len(self, context=None) except -1:
  114. """
  115. Return the length of the dataset.
  116. The RDFLib interface defines `__len__` in a nonstandard way that
  117. causes a Cython compilation error, so this method is called by the
  118. `__len__` method of its Python counterpart.
  119. """
  120. cdef:
  121. size_t ct
  122. Key ck
  123. if context is not None:
  124. ck = [self._to_key_idx(context)]
  125. key_v.mv_data = &ck
  126. key_v.mv_size = KLEN
  127. cur = self._cur_open('c:spo')
  128. try:
  129. _check(lmdb.mdb_cursor_get(
  130. cur, &key_v, NULL, lmdb.MDB_SET))
  131. _check(lmdb.mdb_cursor_count(cur, &ct))
  132. except KeyNotFoundError:
  133. return 0
  134. else:
  135. return ct
  136. finally:
  137. #pass
  138. self._cur_close(cur)
  139. else:
  140. return self.stats()['num_triples']
  141. ## PRIVATE METHODS ##
  142. # Triple and graph methods.
  143. cpdef add(self, triple, context=None, quoted=False):
  144. """
  145. Add a triple and start indexing.
  146. :param tuple(rdflib.Identifier) triple: Tuple of three identifiers.
  147. :param context: Context identifier. ``None`` inserts in the default
  148. graph.
  149. :type context: rdflib.Identifier or None
  150. :param bool quoted: Not used.
  151. """
  152. cdef:
  153. lmdb.MDB_cursor *icur
  154. lmdb.MDB_val spo_v, c_v, null_v
  155. unsigned char i
  156. Hash128 thash
  157. QuadKey spock
  158. Buffer pk_t
  159. c = self._normalize_context(context)
  160. if c is None:
  161. c = RDFLIB_DEFAULT_GRAPH_URI
  162. # TODO: figure out how the RDFLib dispatcher is inherited
  163. # (and if there is a use for it in a first place)
  164. #Store.add(self, triple, context)
  165. s, p, o = triple
  166. #logger.debug('Trying to add a triple.')
  167. icur = self._cur_open('th:t')
  168. try:
  169. for i, term_obj in enumerate((s, p, o, c)):
  170. serialize_from_rdflib(term_obj, &pk_t)
  171. hash128(&pk_t, &thash)
  172. try:
  173. key_v.mv_data = thash
  174. key_v.mv_size = HLEN
  175. _check(lmdb.mdb_get(
  176. self.txn, self.get_dbi('th:t'), &key_v, &data_v))
  177. spock[i] = (<Key>data_v.mv_data)[0]
  178. #logger.debug('Hash {} found. Not adding.'.format(thash[: HLEN]))
  179. except KeyNotFoundError:
  180. # If term_obj is not found, add it...
  181. #logger.debug('Hash {} not found. Adding to DB.'.format(
  182. # thash[: HLEN]))
  183. spock[i] = self._append(&pk_t, dblabel=b't:st')
  184. # ...and index it.
  185. #logger.debug('Indexing on th:t: {}: {}'.format(
  186. # thash[: HLEN], spock[i])
  187. key_v.mv_data = thash
  188. key_v.mv_size = HLEN
  189. data_v.mv_data = spock + i * KLEN
  190. data_v.mv_size = KLEN
  191. _check(
  192. lmdb.mdb_cursor_put(icur, &key_v, &data_v, 0),
  193. 'Error setting key {}.'.format(thash))
  194. finally:
  195. #pass
  196. self._cur_close(icur)
  197. #logger.debug('Triple add action completed.')
  198. spo_v.mv_data = spock
  199. spo_v.mv_size = TRP_KLEN
  200. c_v.mv_data = spock + TRP_KLEN
  201. c_v.mv_size = KLEN
  202. null_v.mv_data = b''
  203. null_v.mv_size = 0
  204. #logger.debug('Adding context.')
  205. try:
  206. _check(lmdb.mdb_put(
  207. self.txn, self.get_dbi('c:'), &c_v, &null_v,
  208. lmdb.MDB_NOOVERWRITE))
  209. except KeyExistsError:
  210. pass
  211. #logger.debug('Added c:.')
  212. try:
  213. # Add triple:context association.
  214. _check(lmdb.mdb_put(
  215. self.txn, self.get_dbi('spo:c'), &spo_v, &c_v,
  216. lmdb.MDB_NODUPDATA))
  217. except KeyExistsError:
  218. pass
  219. #logger.debug('Added spo:c.')
  220. try:
  221. # Index context:triple association.
  222. _check(lmdb.mdb_put(
  223. self.txn, self.get_dbi('c:spo'), &c_v, &spo_v,
  224. lmdb.MDB_NODUPDATA))
  225. except KeyExistsError:
  226. pass
  227. #logger.debug('Added c:spo.')
  228. #logger.debug('All main data entered. Indexing.')
  229. self._index_triple(IDX_OP_ADD, [spock[0], spock[1], spock[2]])
  230. cpdef add_graph(self, graph):
  231. """
  232. Add a graph to the database.
  233. This creates an empty graph by associating the graph URI with the
  234. pickled `None` value. This prevents from removing the graph when all
  235. triples are removed.
  236. This may be called by read-only operations:
  237. https://github.com/RDFLib/rdflib/blob/master/rdflib/graph.py#L1623
  238. In which case it needs to open a write transaction. This is not ideal
  239. but the only way to handle datasets in RDFLib.
  240. :param rdflib.URIRef graph: URI of the named graph to add.
  241. """
  242. cdef Buffer _sc
  243. if isinstance(graph, Graph):
  244. graph = graph.identifier
  245. # FIXME This is all wrong.
  246. serialize_from_rdflib(graph, &_sc)
  247. self._add_graph(&_sc)
  248. cdef void _add_graph(self, Buffer *pk_gr) except *:
  249. """
  250. Add a graph.
  251. :param pk_gr: Pickled context URIRef object.
  252. :type pk_gr: Buffer*
  253. """
  254. cdef:
  255. Hash128 chash
  256. Key ck
  257. lmdb.MDB_txn *tmp_txn
  258. hash128(pk_gr, &chash)
  259. #logger.debug('Adding a graph.')
  260. if not self._key_exists(chash, HLEN, b'th:t'):
  261. # Insert context term if not existing.
  262. if self.is_txn_rw:
  263. tmp_txn = self.txn
  264. else:
  265. _check(lmdb.mdb_txn_begin(self.dbenv, NULL, 0, &tmp_txn))
  266. # Open new R/W transactions.
  267. #logger.debug('Opening a temporary RW transaction.')
  268. try:
  269. #logger.debug('Working in existing RW transaction.')
  270. # Use existing R/W transaction.
  271. # Main entry.
  272. ck[0] = self._append(pk_gr, b't:st', txn=tmp_txn)
  273. # Index.
  274. key_v.mv_data = chash
  275. key_v.mv_size = HLEN
  276. data_v.mv_data = ck
  277. data_v.mv_size = KLEN
  278. _check(lmdb.mdb_put(
  279. self.txn, self.get_dbi(b'th:t'), &key_v, &data_v, 0
  280. ))
  281. # Add to list of contexts.
  282. key_v.mv_data = ck
  283. key_v.mv_size = KLEN
  284. data_v.mv_data = ck # Whatever, length is zero anyways
  285. data_v.mv_size = 0
  286. _check(lmdb.mdb_put(
  287. self.txn, self.get_dbi(b'c:'), &key_v, &data_v, 0
  288. ))
  289. if not self.is_txn_rw:
  290. _check(lmdb.mdb_txn_commit(tmp_txn))
  291. except:
  292. if not self.is_txn_rw:
  293. lmdb.mdb_txn_abort(tmp_txn)
  294. raise
  295. cpdef void _remove(self, tuple triple_pattern, context=None) except *:
  296. cdef:
  297. unsigned char spok[TRP_KLEN]
  298. void* cur
  299. cc.ArrayIter it
  300. Key ck
  301. lmdb.MDB_val spok_v, ck_v
  302. #logger.debug('Removing triple: {}'.format(triple_pattern))
  303. if context is not None:
  304. try:
  305. ck = [self._to_key_idx(context)]
  306. except KeyNotFoundError:
  307. # If context is specified but not found, return to avoid
  308. # deleting the wrong triples.
  309. return
  310. # Get the matching pattern.
  311. match_set = self.triple_keys(triple_pattern, context)
  312. dcur = self._cur_open('spo:c')
  313. icur = self._cur_open('c:spo')
  314. try:
  315. spok_v.mv_size = TRP_KLEN
  316. # If context was specified, remove only associations with that context.
  317. cc.array_iter_init(&it, match_set.data)
  318. if context is not None:
  319. #logger.debug('Removing triples in matching context.')
  320. ck_v.mv_data = ck
  321. ck_v.mv_size = KLEN
  322. while cc.array_iter_next(&it, &cur) != cc.CC_ITER_END:
  323. spok_v.mv_data = cur
  324. # Delete spo:c entry.
  325. try:
  326. _check(lmdb.mdb_cursor_get(
  327. dcur, &spok_v, &ck_v, lmdb.MDB_GET_BOTH))
  328. except KeyNotFoundError:
  329. pass
  330. else:
  331. _check(lmdb.mdb_cursor_del(dcur, 0))
  332. # Restore ck after delete.
  333. ck_v.mv_data = ck
  334. # Delete c:spo entry.
  335. try:
  336. _check(lmdb.mdb_cursor_get(
  337. icur, &ck_v, &spok_v, lmdb.MDB_GET_BOTH))
  338. except KeyNotFoundError:
  339. pass
  340. else:
  341. _check(lmdb.mdb_cursor_del(icur, 0))
  342. # Delete lookup indices, only if no other context
  343. # association is present.
  344. # spok_v has changed on mdb_cursor_del. Restore.
  345. spok_v.mv_data = cur
  346. try:
  347. _check(lmdb.mdb_cursor_get(
  348. dcur, &spok_v, NULL, lmdb.MDB_SET))
  349. except KeyNotFoundError:
  350. self._index_triple(IDX_OP_REMOVE, <TripleKey>cur)
  351. i += 1
  352. # If no context is specified, remove all associations.
  353. else:
  354. #logger.debug('Removing triples in all contexts.')
  355. # Loop over all SPO matching the triple pattern.
  356. while cc.array_iter_next(&it, &cur) != cc.CC_ITER_END:
  357. spok_v.mv_data = cur
  358. # Loop over all context associations for this SPO.
  359. try:
  360. _check(lmdb.mdb_cursor_get(
  361. dcur, &spok_v, &ck_v, lmdb.MDB_SET_KEY))
  362. except KeyNotFoundError:
  363. # Move on to the next SPO.
  364. continue
  365. else:
  366. ck = <Key>ck_v.mv_data
  367. logger.debug(f'Removing {<TripleKey>cur} from main.')
  368. while True:
  369. # Delete c:spo association.
  370. try:
  371. _check(lmdb.mdb_cursor_get(
  372. icur, &ck_v, &spok_v, lmdb.MDB_GET_BOTH))
  373. except KeyNotFoundError:
  374. pass
  375. else:
  376. lmdb.mdb_cursor_del(icur, 0)
  377. # Restore the pointer to the deleted SPO.
  378. spok_v.mv_data = cur
  379. # Move on to next associated context.
  380. try:
  381. _check(lmdb.mdb_cursor_get(
  382. dcur, &spok_v, &ck_v, lmdb.MDB_NEXT_DUP))
  383. except KeyNotFoundError:
  384. break
  385. # Then delete the spo:c association.
  386. try:
  387. _check(lmdb.mdb_cursor_get(
  388. dcur, &spok_v, &ck_v, lmdb.MDB_SET))
  389. except KeyNotFoundError:
  390. pass
  391. else:
  392. lmdb.mdb_cursor_del(dcur, lmdb.MDB_NODUPDATA)
  393. self._index_triple(IDX_OP_REMOVE, <TripleKey>cur)
  394. #ck_v.mv_data = ck # Unnecessary?
  395. finally:
  396. i += 1
  397. finally:
  398. #logger.debug('Closing spo:c in _remove.')
  399. self._cur_close(dcur)
  400. #logger.debug('Closing c:spo in _remove.')
  401. self._cur_close(icur)
  402. cdef void _index_triple(self, int op, TripleKey spok) except *:
  403. """
  404. Update index for a triple and context (add or remove).
  405. :param str op: one of ``IDX_OP_ADD`` or ``IDX_OP_REMOVE``.
  406. :param TripleKey spok: Triple key to index.
  407. """
  408. cdef:
  409. Key keys[3]
  410. DoubleKey dbl_keys[3]
  411. size_t i = 0
  412. lmdb.MDB_val key_v, dbl_key_v
  413. keys = [
  414. [spok[0]], # sk
  415. [spok[1]], # pk
  416. [spok[2]], # ok
  417. ]
  418. #keys[0] = spok[:1] # sk
  419. #keys[1] = spok[1:2] # pk
  420. #keys[2] = spok[2:3] # ok
  421. dbl_keys = [
  422. [spok[1], spok[2]], # pok
  423. [spok[0], spok[2]], # sok
  424. [spok[0], spok[1]], # spk
  425. ]
  426. #dbl_keys[0] = spok[1:3] # pok
  427. #dbl_keys[1] = [spok[0], spok[2]] # sok
  428. #dbl_keys[2] = spok[:2] # spk
  429. #logger.debug('''Indices:
  430. #spok: {}
  431. #sk: {}
  432. #pk: {}
  433. #ok: {}
  434. #pok: {}
  435. #sok: {}
  436. #spk: {}
  437. #'''.format(
  438. # spok[:TRP_KLEN],
  439. # keys[0][:KLEN], keys[1][:KLEN], keys[2][:KLEN],
  440. # dbl_keys[0][:DBL_KLEN], dbl_keys[1][:DBL_KLEN], dbl_keys[2][:DBL_KLEN]))
  441. key_v.mv_size = KLEN
  442. dbl_key_v.mv_size = DBL_KLEN
  443. #logger.debug('Start indexing: {}.'.format(spok[: TRP_KLEN]))
  444. if op == IDX_OP_REMOVE:
  445. logger.debug(f'Remove {spok[0]} from indices.')
  446. else:
  447. logger.debug(f'Add {spok[0]} to indices.')
  448. while i < 3:
  449. cur1 = self._cur_open(self.lookup_indices[i]) # s:po, p:so, o:sp
  450. cur2 = self._cur_open(self.lookup_indices[i + 3])# po:s, so:p, sp:o
  451. try:
  452. key_v.mv_data = keys[i]
  453. dbl_key_v.mv_data = dbl_keys[i]
  454. # Removal op indexing.
  455. if op == IDX_OP_REMOVE:
  456. try:
  457. _check(lmdb.mdb_cursor_get(
  458. cur1, &key_v, &dbl_key_v, lmdb.MDB_GET_BOTH))
  459. logger.debug(
  460. f'Removed: {keys[i]}, {dbl_keys[i]}')
  461. except KeyNotFoundError:
  462. logger.debug(
  463. f'Not found: {keys[i]}, {dbl_keys[i]}')
  464. pass
  465. else:
  466. _check(lmdb.mdb_cursor_del(cur1, 0))
  467. # Restore pointers after delete.
  468. key_v.mv_data = keys[i]
  469. dbl_key_v.mv_data = dbl_keys[i]
  470. try:
  471. _check(lmdb.mdb_cursor_get(
  472. cur2, &dbl_key_v, &key_v, lmdb.MDB_GET_BOTH))
  473. logger.debug(f'Removed: {dbl_keys[i]}, {keys[i]}')
  474. except KeyNotFoundError:
  475. logger.debug(f'Not found: {dbl_keys[i]}, {keys[i]}')
  476. pass
  477. else:
  478. _check(lmdb.mdb_cursor_del(cur2, 0))
  479. # Addition op indexing.
  480. elif op == IDX_OP_ADD:
  481. logger.debug('Adding to index `{}`: {}, {}'.format(
  482. self.lookup_indices[i],
  483. <Key>key_v.mv_data,
  484. <DoubleKey>dbl_key_v.mv_data
  485. ))
  486. try:
  487. _check(lmdb.mdb_cursor_put(
  488. cur1, &key_v, &dbl_key_v, lmdb.MDB_NODUPDATA))
  489. except KeyExistsError:
  490. logger.debug(f'Key {keys[i]} exists already.')
  491. pass
  492. logger.debug('Adding to index `{}`: {}, {}'.format(
  493. self.lookup_indices[i + 3],
  494. <DoubleKey>dbl_key_v.mv_data,
  495. <Key>key_v.mv_data
  496. ))
  497. try:
  498. _check(lmdb.mdb_cursor_put(
  499. cur2, &dbl_key_v, &key_v, lmdb.MDB_NODUPDATA))
  500. except KeyExistsError:
  501. logger.debug(f'Double key {dbl_keys[i]} exists already.')
  502. pass
  503. else:
  504. raise ValueError(
  505. 'Index operation \'{}\' is not supported.'.format(op))
  506. i += 1
  507. finally:
  508. #pass
  509. self._cur_close(cur1)
  510. self._cur_close(cur2)
  511. cpdef void _remove_graph(self, object gr_uri) except *:
  512. """
  513. Delete a context.
  514. """
  515. cdef:
  516. Hash128 chash
  517. Key ck
  518. lmdb.MDB_val ck_v, chash_v
  519. Buffer pk_c
  520. #logger.debug('Deleting context: {}'.format(gr_uri))
  521. #logger.debug('Pickled context: {}'.format(serialize(gr_uri)))
  522. # Gather information on the graph prior to deletion.
  523. try:
  524. ck = [self._to_key_idx(gr_uri)]
  525. except KeyNotFoundError:
  526. return
  527. # Remove all triples and indices associated with the graph.
  528. self._remove((None, None, None), gr_uri)
  529. # Remove the graph if it is in triples.
  530. self._remove((gr_uri, None, None))
  531. self._remove((None, None, gr_uri))
  532. # Clean up all terms related to the graph.
  533. serialize_from_rdflib(gr_uri, &pk_c)
  534. hash128(&pk_c, &chash)
  535. ck_v.mv_size = KLEN
  536. chash_v.mv_size = HLEN
  537. try:
  538. ck_v.mv_data = ck
  539. _check(lmdb.mdb_del(self.txn, self.get_dbi(b'c:'), &ck_v, NULL))
  540. ck_v.mv_data = ck
  541. _check(lmdb.mdb_del(self.txn, self.get_dbi(b't:st'), &ck_v, NULL))
  542. chash_v.mv_data = chash
  543. _check(lmdb.mdb_del(self.txn, self.get_dbi(b'th:t'), &chash_v, NULL))
  544. except KeyNotFoundError:
  545. pass
  546. # Lookup methods.
  547. def contexts(self, triple=None):
  548. """
  549. Get a list of all contexts.
  550. :rtype: set(URIRef)
  551. """
  552. cdef:
  553. size_t sz, i
  554. KeyIdx* match
  555. try:
  556. self.all_contexts(&match, &sz, triple)
  557. ret = set()
  558. for i in range(sz):
  559. ret.add(self.from_key(match + i))
  560. finally:
  561. free(match)
  562. return ret
  563. def triples(self, triple_pattern, context=None):
  564. """
  565. Generator over matching triples.
  566. :param tuple triple_pattern: 3 RDFLib terms
  567. :param context: Context graph, if available.
  568. :type context: rdflib.Graph or None
  569. :rtype: Iterator
  570. :return: Generator over triples and contexts in which each result has
  571. the following format::
  572. (s, p, o), generator(contexts)
  573. Where the contexts generator lists all context that the triple appears
  574. in.
  575. """
  576. cdef:
  577. size_t i = 0, j = 0
  578. void* it_cur
  579. lmdb.MDB_val key_v, data_v
  580. cc.ArrayIter it
  581. # This sounds strange, RDFLib should be passing None at this point,
  582. # but anyway...
  583. context = self._normalize_context(context)
  584. logger.debug(
  585. 'Getting triples for: {}, {}'.format(triple_pattern, context))
  586. rset = self.triple_keys(triple_pattern, context)
  587. #logger.debug('Triple keys found: {}'.format(rset.data[:rset.size]))
  588. cur = self._cur_open('spo:c')
  589. try:
  590. key_v.mv_size = TRP_KLEN
  591. cc.array_iter_init(&it, rset.data)
  592. while cc.array_iter_next(&it, &it_cur) != cc.CC_ITER_END:
  593. #logger.debug('Checking contexts for triples: {}'.format(
  594. # (rset.data + i * TRP_KLEN)[:TRP_KLEN]))
  595. key_v.mv_data = it_cur
  596. # Get contexts associated with each triple.
  597. contexts = []
  598. # This shall never be MDB_NOTFOUND.
  599. _check(lmdb.mdb_cursor_get(cur, &key_v, &data_v, lmdb.MDB_SET))
  600. while True:
  601. c_uri = self.from_key(<Key>data_v.mv_data)
  602. contexts.append(Imr(uri=c_uri, store=self))
  603. try:
  604. _check(lmdb.mdb_cursor_get(
  605. cur, &key_v, &data_v, lmdb.MDB_NEXT_DUP))
  606. except KeyNotFoundError:
  607. break
  608. #logger.debug('Triple keys before yield: {}: {}.'.format(
  609. # (<TripleKey>key_v.mv_data)[:TRP_KLEN], tuple(contexts)))
  610. yield self.from_trp_key(
  611. (<TripleKey>key_v.mv_data)), tuple(contexts)
  612. #logger.debug('After yield.')
  613. finally:
  614. self._cur_close(cur)
  615. cpdef SimpleGraph graph_lookup(
  616. self, triple_pattern, context=None, uri=None, copy=False
  617. ):
  618. """
  619. Create a SimpleGraph or Imr instance from buffers from the store.
  620. The instance is only valid within the LMDB transaction that created it.
  621. :param tuple triple_pattern: 3 RDFLib terms
  622. :param context: Context graph, if available.
  623. :type context: rdflib.Graph or None
  624. :param str uri: URI for the resource. If provided, the resource
  625. returned will be an Imr, otherwise a SimpleGraph.
  626. :rtype: Iterator
  627. :return: Generator over triples and contexts in which each result has
  628. the following format::
  629. (s, p, o), generator(contexts)
  630. Where the contexts generator lists all context that the triple appears
  631. in.
  632. """
  633. cdef:
  634. void* spok
  635. size_t cur = 0
  636. Buffer* buffers
  637. BufferTriple* btrp
  638. SimpleGraph gr
  639. cc.ArrayIter it
  640. gr = Imr(uri=uri) if uri else SimpleGraph()
  641. #logger.debug(
  642. # 'Getting triples for: {}, {}'.format(triple_pattern, context))
  643. match = self.triple_keys(triple_pattern, context)
  644. btrp = <BufferTriple*>gr.pool.alloc(match.ct, sizeof(BufferTriple))
  645. buffers = <Buffer*>gr.pool.alloc(3 * match.ct, sizeof(Buffer))
  646. cc.array_iter_init(&it, match.data)
  647. while cc.array_iter_next(&it, &spok):
  648. btrp[cur].s = buffers + cur * 3
  649. btrp[cur].p = buffers + cur * 3 + 1
  650. btrp[cur].o = buffers + cur * 3 + 2
  651. #logger.info('Looking up key: {}'.format(spok[:KLEN]))
  652. self.lookup_term(<KeyIdx*>spok, buffers + cur * 3)
  653. #logger.info(f'Found triple s: {buffer_dump(btrp[cur].s)}')
  654. #logger.info('Looking up key: {}'.format(spok[KLEN:DBL_KLEN]))
  655. self.lookup_term(<KeyIdx*>(spok + KLEN), buffers + cur * 3 + 1)
  656. #logger.info(f'Found triple p: {buffer_dump(btrp[cur].p)}')
  657. #logger.info('Looking up key: {}'.format(spok[DBL_KLEN:TRP_KLEN]))
  658. self.lookup_term(<KeyIdx*>(spok + DBL_KLEN), buffers + cur * 3 + 2)
  659. #logger.info(f'Found triple o: {buffer_dump(btrp[cur].o)}')
  660. gr.add_triple(btrp + cur, copy)
  661. cur += 1
  662. return gr
  663. cdef Keyset triple_keys(self, tuple triple_pattern, context=None):
  664. """
  665. Top-level lookup method.
  666. This method is used by `triples` which returns native Python tuples,
  667. as well as by other methods that need to iterate and filter triple
  668. keys without incurring in the overhead of converting them to triples.
  669. :param tuple triple_pattern: 3 RDFLib terms
  670. :param context: Context graph or URI, or None.
  671. :type context: rdflib.term.Identifier or None
  672. """
  673. # TODO: Improve performance by allowing passing contexts as a tuple.
  674. cdef:
  675. size_t ct = 0, flt_j = 0, i = 0, j = 0, c_size
  676. void* cur
  677. cc.ArrayIter it
  678. lmdb.MDB_cursor *icur
  679. lmdb.MDB_val key_v, data_v
  680. Key tk, ck
  681. TripleKey spok
  682. Keyset flt_res, ret
  683. if context is not None:
  684. #serialize(context, &pk_c, &c_size)
  685. try:
  686. ck = [self._to_key_idx(context)]
  687. except KeyNotFoundError:
  688. # Context not found.
  689. return Keyset()
  690. icur = self._cur_open('c:spo')
  691. try:
  692. key_v.mv_data = ck
  693. key_v.mv_size = KLEN
  694. # s p o c
  695. if all(triple_pattern):
  696. #logger.debug('Lookup: s p o c')
  697. for i, term in enumerate(triple_pattern):
  698. try:
  699. tk = [self._to_key_idx(term)]
  700. except KeyNotFoundError:
  701. # Context not found.
  702. return Keyset()
  703. if tk is NULL:
  704. # A term in the triple is not found.
  705. return Keyset()
  706. spok[i] = tk[0]
  707. data_v.mv_data = spok
  708. data_v.mv_size = TRP_KLEN
  709. #logger.debug(
  710. # 'Found spok {}. Matching with context {}'.format(
  711. # (<TripleKey>data_v.mv_data)[: TRP_KLEN],
  712. # (<Key>key_v.mv_data)[: KLEN]))
  713. try:
  714. _check(lmdb.mdb_cursor_get(
  715. icur, &key_v, &data_v, lmdb.MDB_GET_BOTH))
  716. except KeyNotFoundError:
  717. # Triple not found.
  718. #logger.debug('spok / ck pair not found.')
  719. return Keyset()
  720. ret = Keyset(1)
  721. cc.array_add(ret.data, &spok)
  722. return ret
  723. # ? ? ? c
  724. elif not any(triple_pattern):
  725. # Get all triples from the context
  726. #logger.debug('Lookup: ? ? ? c')
  727. try:
  728. _check(lmdb.mdb_cursor_get(
  729. icur, &key_v, &data_v, lmdb.MDB_SET))
  730. except KeyNotFoundError:
  731. # Triple not found.
  732. return Keyset()
  733. _check(lmdb.mdb_cursor_count(icur, &ct))
  734. ret = Keyset(ct)
  735. #logger.debug(f'Entries in c:spo: {ct}')
  736. #logger.debug(f'Allocated {ret.size} bytes.')
  737. #logger.debug('Looking in key: {}'.format(
  738. # (<unsigned char *>key_v.mv_data)[:key_v.mv_size]))
  739. _check(lmdb.mdb_cursor_get(
  740. icur, &key_v, &data_v, lmdb.MDB_GET_MULTIPLE))
  741. while True:
  742. #logger.debug('Data page: {}'.format(
  743. # (<unsigned char *>data_v.mv_data)[: data_v.mv_size]))
  744. cc.array_add(ret.data, data_v.mv_data)
  745. try:
  746. _check(lmdb.mdb_cursor_get(
  747. icur, &key_v, &data_v, lmdb.MDB_NEXT_MULTIPLE))
  748. except KeyNotFoundError:
  749. return ret
  750. # Regular lookup. Filter _lookup() results by context.
  751. else:
  752. try:
  753. res = self._lookup(triple_pattern)
  754. except KeyNotFoundError:
  755. return Keyset(0)
  756. #logger.debug('Allocating for context filtering.')
  757. key_v.mv_data = ck
  758. key_v.mv_size = KLEN
  759. data_v.mv_size = TRP_KLEN
  760. flt_res = Keyset(res.ct)
  761. cc.array_iter_init(&it, res.data)
  762. while cc.array_iter_next(&it, &cur) != cc.CC_ITER_END:
  763. #logger.debug('Checking row #{}'.format(flt_j))
  764. data_v.mv_data = cur
  765. #logger.debug('Checking c:spo {}, {}'.format(
  766. # (<unsigned char *>key_v.mv_data)[: key_v.mv_size],
  767. # (<unsigned char *>data_v.mv_data)[: data_v.mv_size]))
  768. try:
  769. # Verify that the triple is associated with the
  770. # context being searched.
  771. _check(lmdb.mdb_cursor_get(
  772. icur, &key_v, &data_v, lmdb.MDB_GET_BOTH))
  773. except KeyNotFoundError:
  774. #logger.debug('Discarding source[{}].'.format(j))
  775. continue
  776. else:
  777. cc.array_add(flt_res.data, cur)
  778. return flt_res
  779. finally:
  780. self._cur_close(icur)
  781. # Unfiltered lookup. No context checked.
  782. else:
  783. #logger.debug('No context in query.')
  784. try:
  785. res = self._lookup(triple_pattern)
  786. except KeyNotFoundError:
  787. return Keyset()
  788. #logger.debug('Res data before triple_keys return: {}'.format(
  789. # res.data[: res.size]))
  790. return res
  791. cdef Keyset _lookup(self, tuple triple_pattern):
  792. """
  793. Look up triples in the indices based on a triple pattern.
  794. :rtype: Iterator
  795. :return: Matching triple keys.
  796. """
  797. cdef:
  798. TripleKey spok
  799. lmdb.MDB_stat db_stat
  800. size_t ct = 0, i = 0
  801. lmdb.MDB_val spok_v, ck_v
  802. s, p, o = triple_pattern
  803. if s is not None:
  804. if p is not None:
  805. # s p o
  806. if o is not None:
  807. spok_v.mv_data = spok
  808. spok_v.mv_size = TRP_KLEN
  809. try:
  810. spok = [
  811. self._to_key_idx(triple_pattern[0]),
  812. self._to_key_idx(triple_pattern[1]),
  813. self._to_key_idx(triple_pattern[2]),
  814. ]
  815. _check(lmdb.mdb_get(
  816. self.txn, self.get_dbi('spo:c'), &spok_v, &ck_v))
  817. except KeyNotFoundError:
  818. return Keyset()
  819. matches = Keyset(1)
  820. cc.array_add(matches.data, &spok)
  821. return matches
  822. # s p ?
  823. else:
  824. return self._lookup_2bound(0, s, 1, p)
  825. else:
  826. # s ? o
  827. if o is not None:
  828. return self._lookup_2bound(0, s, 2, o)
  829. # s ? ?
  830. else:
  831. return self._lookup_1bound(0, s)
  832. else:
  833. if p is not None:
  834. # ? p o
  835. if o is not None:
  836. return self._lookup_2bound(1, p, 2, o)
  837. # ? p ?
  838. else:
  839. return self._lookup_1bound(1, p)
  840. else:
  841. # ? ? o
  842. if o is not None:
  843. return self._lookup_1bound(2, o)
  844. # ? ? ?
  845. else:
  846. # Get all triples in the database.
  847. #logger.debug('Getting all DB triples.')
  848. dcur = self._cur_open('spo:c')
  849. try:
  850. _check(
  851. lmdb.mdb_stat(
  852. self.txn, lmdb.mdb_cursor_dbi(dcur), &db_stat
  853. ), 'Error gathering DB stats.'
  854. )
  855. ct = db_stat.ms_entries
  856. ret = Keyset(ct)
  857. #logger.debug(f'Triples found: {ct}')
  858. if ct == 0:
  859. return Keyset()
  860. _check(lmdb.mdb_cursor_get(
  861. dcur, &key_v, &data_v, lmdb.MDB_FIRST))
  862. while True:
  863. cc.array_add(ret.data, key_v.mv_data)
  864. try:
  865. _check(lmdb.mdb_cursor_get(
  866. dcur, &key_v, &data_v, lmdb.MDB_NEXT_NODUP))
  867. except KeyNotFoundError:
  868. break
  869. i += 1
  870. return ret
  871. finally:
  872. self._cur_close(dcur)
  873. cdef Keyset _lookup_1bound(self, unsigned char idx, term):
  874. """
  875. Lookup triples for a pattern with one bound term.
  876. :param str idx_name: The index to look up as one of the keys of
  877. ``_lookup_ordering``.
  878. :param rdflib.URIRef term: Bound term to search for.
  879. :rtype: Iterator(bytes)
  880. :return: SPO keys matching the pattern.
  881. """
  882. cdef:
  883. unsigned int dbflags
  884. unsigned char term_order[3]
  885. size_t ct, ret_offset = 0, src_pos, ret_pos
  886. size_t j # Must be signed for older OpenMP versions
  887. lmdb.MDB_cursor *icur
  888. lmdb.MDB_val key_v, data_v
  889. Key luk
  890. TripleKey spok
  891. #logger.debug(f'lookup 1bound: {idx}, {term}')
  892. try:
  893. luk = [self._to_key_idx(term)]
  894. except KeyNotFoundError:
  895. return Keyset()
  896. logging.debug('luk: {}'.format(luk))
  897. term_order = lookup_ordering[idx]
  898. icur = self._cur_open(self.lookup_indices[idx])
  899. logging.debug(f'DB label: {self.lookup_indices[idx]}')
  900. logging.debug('term order: {}'.format(term_order[: 3]))
  901. try:
  902. key_v.mv_data = luk
  903. key_v.mv_size = KLEN
  904. _check(lmdb.mdb_cursor_get(icur, &key_v, &data_v, lmdb.MDB_SET))
  905. _check(lmdb.mdb_cursor_count(icur, &ct))
  906. # Allocate memory for results.
  907. ret = Keyset(ct)
  908. #logger.debug(f'Entries for {self.lookup_indices[idx]}: {ct}')
  909. #logger.debug(f'Allocated {ret.size} bytes of data.')
  910. #logger.debug('First row: {}'.format(
  911. # (<unsigned char *>data_v.mv_data)[:DBL_KLEN]))
  912. #logger.debug('asm_rng: {}'.format(asm_rng[:3]))
  913. #logger.debug('luk: {}'.format(luk))
  914. _check(lmdb.mdb_cursor_get(icur, &key_v, &data_v, lmdb.MDB_SET))
  915. _check(lmdb.mdb_cursor_get(
  916. icur, &key_v, &data_v, lmdb.MDB_GET_MULTIPLE))
  917. while True:
  918. #logger.debug('ret_offset: {}'.format(ret_offset))
  919. #logger.debug(f'Page size: {data_v.mv_size}')
  920. #logger.debug(
  921. # 'Got data in 1bound ({}): {}'.format(
  922. # data_v.mv_size,
  923. # (<unsigned char *>data_v.mv_data)[: data_v.mv_size]))
  924. for j in range(data_v.mv_size // DBL_KLEN):
  925. src_pos = DBL_KLEN * j
  926. spok[term_order[0]] = luk[0]
  927. spok[term_order[1]] = <KeyIdx>(data_v.mv_data + src_pos)
  928. spok[term_order[2]] = <KeyIdx>(
  929. data_v.mv_data + src_pos + KLEN)
  930. cc.array_add(ret.data, spok)
  931. #ret_pos = ret_offset + ret.itemsize * j
  932. ## TODO Try to fit this in the Key / TripleKey schema.
  933. #memcpy(ret.data + ret_pos + asm_rng[0], luk, KLEN)
  934. #memcpy(ret.data + ret_pos + asm_rng[1],
  935. # data_v.mv_data + src_pos, KLEN)
  936. #memcpy(ret.data + ret_pos + asm_rng[2],
  937. # data_v.mv_data + src_pos + KLEN, KLEN)
  938. # Increment MUST be done before MDB_NEXT_MULTIPLE otherwise
  939. # data_v.mv_size will be overwritten with the *next* page size
  940. # and cause corruption in the output data.
  941. #ret_offset += data_v.mv_size // DBL_KLEN * ret.itemsize
  942. try:
  943. # Get results by the page.
  944. _check(lmdb.mdb_cursor_get(
  945. icur, &key_v, &data_v, lmdb.MDB_NEXT_MULTIPLE))
  946. except KeyNotFoundError:
  947. # For testing only. Errors will be caught in triples()
  948. # when looking for a context.
  949. #if ret_offset + ret.itemsize < ret.size:
  950. # raise RuntimeError(
  951. # 'Retrieved less values than expected: {} of {}.'
  952. # .format(src_offset, ret.size))
  953. return ret
  954. #logger.debug('Assembled data in 1bound ({}): {}'.format(ret.size, ret.data[: ret.size]))
  955. finally:
  956. self._cur_close(icur)
  957. cdef Keyset _lookup_2bound(
  958. self, unsigned char idx1, term1, unsigned char idx2, term2):
  959. """
  960. Look up triples for a pattern with two bound terms.
  961. :param str idx1: The index to look up as one of the keys of
  962. ``lookup_ordering_2bound``.
  963. :param rdflib.URIRef term1: First bound term to search for.
  964. :rtype: Iterator(bytes)
  965. :return: SPO keys matching the pattern.
  966. """
  967. cdef:
  968. unsigned char luk1_offset, luk2_offset
  969. unsigned int dbflags
  970. unsigned char term_order[3] # Lookup ordering
  971. size_t ct, i = 0, ret_offset = 0, ret_pos, src_pos
  972. size_t j # Must be signed for older OpenMP versions
  973. lmdb.MDB_cursor *icur
  974. Keyset ret
  975. #Key luk1, luk2
  976. DoubleKey luk
  977. TripleKey spok
  978. logging.debug(
  979. f'2bound lookup for term {term1} at position {idx1} '
  980. f'and term {term2} at position {idx2}.')
  981. try:
  982. luk1 = [self._to_key_idx(term1)]
  983. luk2 = [self._to_key_idx(term2)]
  984. except KeyNotFoundError:
  985. return Keyset()
  986. logging.debug('luk1: {}'.format(luk1))
  987. logging.debug('luk2: {}'.format(luk2))
  988. for i in range(3):
  989. if (
  990. idx1 in lookup_ordering_2bound[i][: 2]
  991. and idx2 in lookup_ordering_2bound[i][: 2]):
  992. term_order = lookup_ordering_2bound[i]
  993. if term_order[0] == idx1:
  994. luk1_offset = 0
  995. luk2_offset = 1
  996. else:
  997. luk1_offset = 1
  998. luk2_offset = 0
  999. dblabel = self.lookup_indices[i + 3] # skip 1bound index labels
  1000. break
  1001. if i == 2:
  1002. raise ValueError(
  1003. 'Indices {} and {} not found in LU keys.'.format(
  1004. idx1, idx2))
  1005. #logger.debug('Term order: {}'.format(term_order[:3]))
  1006. #logger.debug('LUK offsets: {}, {}'.format(luk1_offset, luk2_offset))
  1007. # Compose terms in lookup key.
  1008. luk[luk1_offset] = luk1
  1009. luk[luk2_offset] = luk2
  1010. #logger.debug('Lookup key: {}'.format(luk))
  1011. icur = self._cur_open(dblabel)
  1012. #logger.debug('Database label: {}'.format(dblabel))
  1013. try:
  1014. key_v.mv_data = luk
  1015. key_v.mv_size = DBL_KLEN
  1016. # Count duplicates for key and allocate memory for result set.
  1017. _check(lmdb.mdb_cursor_get(icur, &key_v, &data_v, lmdb.MDB_SET))
  1018. _check(lmdb.mdb_cursor_count(icur, &ct))
  1019. ret = Keyset(ct)
  1020. #logger.debug('Entries for {}: {}'.format(self.lookup_indices[idx], ct))
  1021. #logger.debug('First row: {}'.format(
  1022. # (<unsigned char *>data_v.mv_data)[:DBL_KLEN]))
  1023. #logger.debug('term_order: {}'.format(asm_rng[:3]))
  1024. #logger.debug('luk: {}'.format(luk))
  1025. _check(lmdb.mdb_cursor_get(icur, &key_v, &data_v, lmdb.MDB_SET))
  1026. _check(lmdb.mdb_cursor_get(
  1027. icur, &key_v, &data_v, lmdb.MDB_GET_MULTIPLE))
  1028. while True:
  1029. #logger.debug('Got data in 2bound ({}): {}'.format(
  1030. # data_v.mv_size,
  1031. # (<unsigned char *>data_v.mv_data)[: data_v.mv_size]))
  1032. for j in range(data_v.mv_size // KLEN):
  1033. src_pos = KLEN * j
  1034. spok[term_order[0]] = luk[0]
  1035. spok[term_order[1]] = luk[1]
  1036. spok[term_order[2]] = <KeyIdx>(data_v.mv_data + src_pos)
  1037. cc.array_add(ret.data, spok)
  1038. #src_pos = KLEN * j
  1039. #ret_pos = (ret_offset + ret.itemsize * j)
  1040. ##logger.debug('Page offset: {}'.format(pg_offset))
  1041. ##logger.debug('Ret offset: {}'.format(ret_offset))
  1042. #memcpy(ret.data + ret_pos + asm_rng[0], luk, KLEN)
  1043. #memcpy(ret.data + ret_pos + asm_rng[1], luk + KLEN, KLEN)
  1044. #memcpy(ret.data + ret_pos + asm_rng[2],
  1045. # data_v.mv_data + src_pos, KLEN)
  1046. #logger.debug('Assembled triple: {}'.format((ret.data + ret_offset)[: TRP_KLEN]))
  1047. #ret_offset += data_v.mv_size // KLEN * ret.itemsize
  1048. try:
  1049. # Get results by the page.
  1050. _check(lmdb.mdb_cursor_get(
  1051. icur, &key_v, &data_v, lmdb.MDB_NEXT_MULTIPLE))
  1052. except KeyNotFoundError:
  1053. # For testing only. Errors will be caught in triples()
  1054. # when looking for a context.
  1055. #if ret_offset + ret.itemsize < ret.size:
  1056. # raise RuntimeError(
  1057. # 'Retrieved less values than expected: {} of {}.'
  1058. # .format(pg_offset, ret.size))
  1059. #logger.debug('Assembled data in 2bound ({}): {}'.format(ret.size, ret.data[: ret.size]))
  1060. return ret
  1061. finally:
  1062. self._cur_close(icur)
  1063. cdef void _all_term_keys(self, term_type, cc.HashSet** tkeys) except *:
  1064. """
  1065. Return all keys of a (``s:po``, ``p:so``, ``o:sp``) index.
  1066. """
  1067. cdef:
  1068. size_t i = 0
  1069. lmdb.MDB_stat stat
  1070. cc.HashSetConf tkeys_conf
  1071. idx_label = self.lookup_indices['spo'.index(term_type)]
  1072. #logger.debug('Looking for all terms in index: {}'.format(idx_label))
  1073. icur = self._cur_open(idx_label)
  1074. try:
  1075. _check(lmdb.mdb_stat(self.txn, lmdb.mdb_cursor_dbi(icur), &stat))
  1076. cc.hashset_conf_init(&tkeys_conf)
  1077. tkeys_conf.initial_capacity = 1024
  1078. tkeys_conf.load_factor = .75
  1079. tkeys_conf.key_length = KLEN
  1080. tkeys_conf.key_compare = cc.CC_CMP_POINTER
  1081. tkeys_conf.hash = cc.POINTER_HASH
  1082. cc.hashset_new_conf(&tkeys_conf, tkeys)
  1083. try:
  1084. _check(lmdb.mdb_cursor_get(
  1085. icur, &key_v, NULL, lmdb.MDB_FIRST))
  1086. except KeyNotFoundError:
  1087. return
  1088. while True:
  1089. cc.hashset_add(tkeys[0], key_v.mv_data)
  1090. rc = lmdb.mdb_cursor_get(
  1091. icur, &key_v, NULL, lmdb.MDB_NEXT_NODUP)
  1092. try:
  1093. _check(rc)
  1094. except KeyNotFoundError:
  1095. return
  1096. i += 1
  1097. finally:
  1098. #pass
  1099. self._cur_close(icur)
  1100. def all_terms(self, term_type):
  1101. """
  1102. Return all terms of a type (``s``, ``p``, or ``o``) in the store.
  1103. """
  1104. cdef:
  1105. void* cur
  1106. cc.HashSet* tkeys
  1107. cc.HashSetIter it
  1108. ret = set()
  1109. try:
  1110. self._all_term_keys(term_type, &tkeys)
  1111. cc.hashset_iter_init(&it, tkeys)
  1112. while cc.hashset_iter_next(&it, &cur):
  1113. #logger.debug('Yielding: {}'.format(key))
  1114. ret.add(self.from_key(<Key>cur))
  1115. finally:
  1116. if tkeys:
  1117. free(tkeys)
  1118. return ret
  1119. cpdef tuple all_namespaces(self):
  1120. """
  1121. Return all registered namespaces.
  1122. """
  1123. cdef:
  1124. size_t i = 0
  1125. lmdb.MDB_stat stat
  1126. ret = []
  1127. dcur = self._cur_open('pfx:ns')
  1128. try:
  1129. try:
  1130. _check(lmdb.mdb_cursor_get(
  1131. dcur, &key_v, &data_v, lmdb.MDB_FIRST))
  1132. except KeyNotFoundError:
  1133. return tuple()
  1134. while True:
  1135. ret.append((
  1136. (<unsigned char *>key_v.mv_data)[: key_v.mv_size].decode(),
  1137. (<unsigned char *>data_v.mv_data)[: data_v.mv_size].decode()))
  1138. #logger.debug('Found namespace: {}:{}'.format(<unsigned char *>key_v.mv_data, <unsigned char *>data_v.mv_data))
  1139. try:
  1140. _check(lmdb.mdb_cursor_get(
  1141. dcur, &key_v, &data_v, lmdb.MDB_NEXT))
  1142. except KeyNotFoundError:
  1143. return tuple(ret)
  1144. i += 1
  1145. finally:
  1146. #pass
  1147. self._cur_close(dcur)
  1148. cdef void all_contexts(
  1149. self, KeyIdx** ctx, size_t* sz, triple=None
  1150. ) except *:
  1151. """
  1152. Get a list of all contexts.
  1153. :rtype: Iterator(lakesuperior.model.graph.graph.Imr)
  1154. """
  1155. cdef:
  1156. size_t ct
  1157. lmdb.MDB_cursor_op seek_op, scan_op
  1158. lmdb.MDB_stat stat
  1159. lmdb.MDB_val key_v
  1160. TripleKey spok
  1161. cur = (
  1162. self._cur_open('spo:c') if triple and all(triple)
  1163. else self._cur_open('c:'))
  1164. key_v.mv_data = &spok
  1165. if triple and all(triple):
  1166. lmdb_seek_op = lmdb.MDB_SET_KEY
  1167. lmdb_scan_op = lmdb.MDB_NEXT_DUP
  1168. spok = [
  1169. self._to_key_idx(triple[0]),
  1170. self._to_key_idx(triple[1]),
  1171. self._to_key_idx(triple[2]),
  1172. ]
  1173. key_v.mv_size = TRP_KLEN
  1174. else:
  1175. lmdb_seek_op = lmdb.MDB_FIRST
  1176. lmdb_scan_op = lmdb.MDB_NEXT
  1177. key_v.mv_size = 0
  1178. try:
  1179. _check(lmdb.mdb_stat(
  1180. self.txn, lmdb.mdb_cursor_dbi(cur), &stat))
  1181. try:
  1182. _check(lmdb.mdb_cursor_get(
  1183. cur, &key_v, &data_v, seek_op))
  1184. except KeyNotFoundError:
  1185. ctx[0] = NULL
  1186. return
  1187. ctx[0] = <KeyIdx*>malloc(stat.ms_entries * KLEN)
  1188. sz[0] = 0
  1189. while True:
  1190. ctx[0][sz[0]] = (<Key>data_v.mv_data)[0]
  1191. try:
  1192. _check(lmdb.mdb_cursor_get(
  1193. cur, &key_v, &data_v, scan_op))
  1194. except KeyNotFoundError:
  1195. break
  1196. sz[0] += 1
  1197. finally:
  1198. #pass
  1199. self._cur_close(cur)
  1200. # Key conversion methods.
  1201. cdef inline void lookup_term(self, const Key tk, Buffer* data) except *:
  1202. """
  1203. look up a term by key.
  1204. :param Key key: The key to be looked up.
  1205. :param Buffer *data: Buffer structure containing the serialized term.
  1206. """
  1207. cdef:
  1208. lmdb.MDB_val key_v, data_v
  1209. key_v.mv_data = tk
  1210. key_v.mv_size = KLEN
  1211. _check(
  1212. lmdb.mdb_get(
  1213. self.txn, self.get_dbi('t:st'), &key_v, &data_v
  1214. ),
  1215. f'Error getting data for key \'{tk[0]}\'.')
  1216. data.addr = data_v.mv_data
  1217. data.sz = data_v.mv_size
  1218. #logger.info('Found term: {}'.format(buffer_dump(data)))
  1219. cdef object from_key(self, const Key tk):
  1220. """
  1221. Convert a single key into one term.
  1222. :param Key key: The key to be converted.
  1223. """
  1224. cdef Buffer pk_t
  1225. self.lookup_term(tk, &pk_t)
  1226. # TODO Make Term a class and return that.
  1227. return deserialize_to_rdflib(&pk_t)
  1228. cdef tuple from_trp_key(self, const TripleKey spok):
  1229. """
  1230. Convert a triple key into a tuple of 3 terms.
  1231. :param TripleKey spok: The triple key to be converted.
  1232. """
  1233. #logger.debug(f'From triple key: {key[: TRP_KLEN]}')
  1234. return (
  1235. self.from_key(spok),
  1236. self.from_key(spok + KLEN),
  1237. self.from_key(spok + DBL_KLEN))
  1238. cdef inline KeyIdx _to_key_idx(self, term):
  1239. """
  1240. Convert a triple, quad or term into a key index (bare number).
  1241. The key is the checksum of the serialized object, therefore unique for
  1242. that object.
  1243. :param rdflib.Term term: An RDFLib term (URIRef, BNode, Literal).
  1244. :param Key key: Pointer to the key that will be produced.
  1245. :rtype: void
  1246. """
  1247. cdef:
  1248. Hash128 thash
  1249. Buffer pk_t
  1250. serialize_from_rdflib(term, &pk_t)
  1251. hash128(&pk_t, &thash)
  1252. #logger.debug('Hash to search for: {}'.format(thash[: HLEN]))
  1253. key_v.mv_data = thash
  1254. key_v.mv_size = HLEN
  1255. dbi = self.get_dbi('th:t')
  1256. #logger.debug(f'DBI: {dbi}')
  1257. _check(lmdb.mdb_get(self.txn, dbi, &key_v, &data_v))
  1258. #logger.debug('Found key: {}'.format((<Key>data_v.mv_data)[: KLEN]))
  1259. return (<Key>data_v.mv_data)[0]
  1260. cdef KeyIdx _append(
  1261. self, Buffer *value,
  1262. unsigned char *dblabel=b'', lmdb.MDB_txn *txn=NULL,
  1263. unsigned int flags=0
  1264. ):
  1265. """
  1266. Append one or more keys and values to the end of a database.
  1267. :param lmdb.Cursor cur: The write cursor to act on.
  1268. :param list(bytes) values: Value(s) to append.
  1269. :rtype: KeyIdx
  1270. :return: Index of key inserted.
  1271. """
  1272. cdef:
  1273. lmdb.MDB_cursor *cur
  1274. KeyIdx new_idx
  1275. Key key
  1276. if txn is NULL:
  1277. txn = self.txn
  1278. cur = self._cur_open(dblabel, txn=txn)
  1279. try:
  1280. _check(lmdb.mdb_cursor_get(cur, &key_v, NULL, lmdb.MDB_LAST))
  1281. except KeyNotFoundError:
  1282. new_idx = 0
  1283. else:
  1284. memcpy(&new_idx, key_v.mv_data, KLEN)
  1285. new_idx += 1
  1286. finally:
  1287. #pass
  1288. self._cur_close(cur)
  1289. key = [new_idx]
  1290. key_v.mv_data = key
  1291. key_v.mv_size = KLEN
  1292. data_v.mv_data = value.addr
  1293. data_v.mv_size = value.sz
  1294. #logger.debug('Appending value {} to db {} with key: {}'.format(
  1295. # value[: vlen], dblabel.decode(), key[0]))
  1296. #logger.debug('data size: {}'.format(data_v.mv_size))
  1297. lmdb.mdb_put(
  1298. txn, self.get_dbi(dblabel), &key_v, &data_v,
  1299. flags | lmdb.MDB_APPEND)
  1300. #cdef inline KeyIdx bytes_to_idx(self, const unsigned char* bs):
  1301. # """
  1302. # Convert a byte string as stored in LMDB to a size_t key index.
  1303. # TODO Force big endian?
  1304. # """
  1305. # cdef KeyIdx ret
  1306. # memcpy(&ret, bs, KLEN)
  1307. # return ret
  1308. #cdef inline unsigned char* idx_to_bytes(KeyIdx idx):
  1309. # """
  1310. # Convert a size_t key index to bytes.
  1311. # TODO Force big endian?
  1312. # """
  1313. # cdef unsigned char* ret
  1314. # memcpy(&ret, idx, KLEN)
  1315. # return ret