lmdb_triplestore.pyx 53 KB


  1. import logging
  2. import sys
  3. from cython.parallel import prange
  4. from rdflib import Graph
  5. from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as RDFLIB_DEFAULT_GRAPH_URI
  6. from lakesuperior.model.graph.graph import Imr
  7. from lakesuperior.store.base_lmdb_store import (
  8. KeyExistsError, KeyNotFoundError, LmdbError)
  9. from lakesuperior.store.base_lmdb_store cimport _check
  10. from libc.stdlib cimport malloc, free
  11. from libc.string cimport memcpy
  12. cimport lakesuperior.cy_include.collections as cc
  13. cimport lakesuperior.cy_include.cylmdb as lmdb
  14. from lakesuperior.model.base cimport (
  15. KLEN, DBL_KLEN, TRP_KLEN, QUAD_KLEN,
  16. KeyIdx, Key, DoubleKey, TripleKey, QuadKey,
  17. Buffer, buffer_dump
  18. )
  19. from lakesuperior.model.graph.graph cimport SimpleGraph, Imr
  20. from lakesuperior.model.graph.term cimport Term
  21. from lakesuperior.model.graph.triple cimport BufferTriple
  22. from lakesuperior.store.base_lmdb_store cimport (
  23. BaseLmdbStore, data_v, dbi, key_v)
  24. from lakesuperior.model.graph.term cimport (
  25. deserialize_to_rdflib, serialize_from_rdflib)
  26. from lakesuperior.model.structures.keyset cimport Keyset
  27. from lakesuperior.model.structures.hash cimport (
  28. HLEN_128 as HLEN, Hash128, hash128)
  29. # Integer keys and values are stored in the system's native byte order.
  30. # Therefore they must be parsed left-to-right if the system is big-endian,
  31. # and right-to-left if little-endian, in order to maintain the correct
  32. # sorting order.
  33. BIG_ENDIAN = sys.byteorder == 'big'
  34. LSUP_REVERSEKEY = 0 if BIG_ENDIAN else lmdb.MDB_REVERSEKEY
  35. LSUP_REVERSEDUP = 0 if BIG_ENDIAN else lmdb.MDB_REVERSEDUP
  36. INT_KEY_MASK = lmdb.MDB_INTEGERKEY | LSUP_REVERSEKEY
  37. INT_DUP_KEY_MASK = (
  38. lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED | lmdb.MDB_INTEGERKEY
  39. | LSUP_REVERSEKEY | LSUP_REVERSEDUP
  40. )
  41. INT_DUP_MASK = (
  42. lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED | lmdb.MDB_INTEGERDUP
  43. | LSUP_REVERSEKEY | LSUP_REVERSEDUP
  44. )
  45. lookup_rank = [0, 2, 1]
  46. """
  47. Order in which keys are looked up if two terms are bound.
  48. The indices with the smallest average number of values per key should be
  49. looked up first.
  50. 0 = s:po
  51. 1 = p:so
  52. 2 = o:sp
  53. If we want to get fancy, this can be rebalanced from time to time by
  54. looking up the number of keys in (s:po, p:so, o:sp).
  55. """
  56. lookup_ordering = [
  57. [0, 1, 2], # spo
  58. [1, 0, 2], # pso
  59. [2, 0, 1], # osp
  60. ]
  61. lookup_ordering_2bound = [
  62. [1, 2, 0], # po:s
  63. [0, 2, 1], # so:p
  64. [0, 1, 2], # sp:o
  65. ]
  66. logger = logging.getLogger(__name__)
  67. cdef class LmdbTriplestore(BaseLmdbStore):
  68. """
  69. Low-level storage layer.
  70. This class extends the RDFLib-compatible :py:class:`BaseLmdbStore` and maps
  71. triples and contexts to key-value records in LMDB.
  72. This class uses the original LMDB C API rather than the Python bindings,
  73. because several data manipulations happen after retrieval from the store,
  74. which are more efficiently performed at the C level.
  75. """
  76. dbi_labels = [
  77. # Main data
  78. # Term key to serialized term content
  79. 't:st',
  80. # Joined triple keys to context key
  81. 'spo:c',
  82. # This has empty values and is used to keep track of empty contexts.
  83. 'c:',
  84. # Prefix to namespace
  85. 'pfx:ns',
  86. # Indices
  87. # Namespace to prefix
  88. 'ns:pfx',
  89. # Term hash to triple key
  90. 'th:t',
  91. # Lookups
  92. 's:po',
  93. 'p:so',
  94. 'o:sp',
  95. 'po:s',
  96. 'so:p',
  97. 'sp:o',
  98. 'c:spo',
  99. ]
  100. lookup_indices = [
  101. b's:po',
  102. b'p:so',
  103. b'o:sp',
  104. b'po:s',
  105. b'so:p',
  106. b'sp:o',
  107. ]
  108. dbi_flags = {
  109. 'c': INT_KEY_MASK,
  110. 't:st': INT_KEY_MASK,
  111. 's:po': INT_DUP_KEY_MASK,
  112. 'p:so': INT_DUP_KEY_MASK,
  113. 'o:sp': INT_DUP_KEY_MASK,
  114. 'po:s': INT_DUP_MASK,
  115. 'so:p': INT_DUP_MASK,
  116. 'sp:o': INT_DUP_MASK,
  117. 'c:spo': INT_DUP_KEY_MASK,
  118. 'spo:c': INT_DUP_MASK,
  119. }
  120. logger.debug(f'DBI flags: {dbi_flags}')
  121. flags = 0
  122. options = {
  123. 'map_size': 1024 ** 4 # 1Tb.
  124. }
  125. # DB management methods.
  126. cpdef dict stats(self):
  127. """
  128. Gather statistics about the database."""
  129. st = self._stats()
  130. st['num_triples'] = st['db_stats']['spo:c']['ms_entries']
  131. return st
  132. cpdef size_t _len(self, context=None) except -1:
  133. """
  134. Return the length of the dataset.
  135. The RDFLib interface defines `__len__` in a nonstandard way that
  136. causes a Cython compilation error, so this method is called by the
  137. `__len__` method of its Python counterpart.
  138. """
  139. cdef:
  140. size_t ct
  141. Key ck
  142. if context is not None:
  143. ck = [self._to_key_idx(context)]
  144. key_v.mv_data = &ck
  145. key_v.mv_size = KLEN
  146. cur = self._cur_open('c:spo')
  147. try:
  148. _check(lmdb.mdb_cursor_get(
  149. cur, &key_v, NULL, lmdb.MDB_SET))
  150. _check(lmdb.mdb_cursor_count(cur, &ct))
  151. except KeyNotFoundError:
  152. return 0
  153. else:
  154. return ct
  155. finally:
  156. #pass
  157. self._cur_close(cur)
  158. else:
  159. return self.stats()['num_triples']
  160. ## PRIVATE METHODS ##
  161. # Triple and graph methods.
  162. cpdef add(self, triple, context=None, quoted=False):
  163. """
  164. Add a triple and start indexing.
  165. :param tuple(rdflib.Identifier) triple: Tuple of three identifiers.
  166. :param context: Context identifier. ``None`` inserts in the default
  167. graph.
  168. :type context: rdflib.Identifier or None
  169. :param bool quoted: Not used.
  170. """
  171. cdef:
  172. lmdb.MDB_cursor *icur
  173. lmdb.MDB_val spo_v, c_v, null_v, key_v, data_v
  174. unsigned char i
  175. Hash128 thash
  176. QuadKey spock
  177. Buffer pk_t
  178. c = self._normalize_context(context)
  179. if c is None:
  180. c = RDFLIB_DEFAULT_GRAPH_URI
  181. # TODO: figure out how the RDFLib dispatcher is inherited
  182. # (and if there is a use for it in a first place)
  183. #Store.add(self, triple, context)
  184. s, p, o = triple
  185. #logger.debug('Trying to add a triple.')
  186. icur = self._cur_open('th:t')
  187. try:
  188. for i, term_obj in enumerate((s, p, o, c)):
  189. serialize_from_rdflib(term_obj, &pk_t)
  190. hash128(&pk_t, &thash)
  191. try:
  192. key_v.mv_data = thash
  193. key_v.mv_size = HLEN
  194. _check(lmdb.mdb_get(
  195. self.txn, self.get_dbi('th:t'), &key_v, &data_v))
  196. spock[i] = (<Key>data_v.mv_data)[0]
  197. logger.info('Hash {} with key {} found. Not adding.'.format(thash[:HLEN], spock[i]))
  198. except KeyNotFoundError:
  199. # If term_obj is not found, add it...
  200. logger.info('Hash {} not found. Adding to DB.'.format(
  201. thash[: HLEN]))
  202. spock[i] = self._append(&pk_t, dblabel=b't:st')
  203. # ...and index it.
  204. logger.info('Indexing on th:t: {}: {}'.format(
  205. thash[: HLEN], spock[i]))
  206. key_v.mv_data = thash
  207. key_v.mv_size = HLEN
  208. data_v.mv_data = spock + i
  209. data_v.mv_size = KLEN
  210. _check(
  211. lmdb.mdb_cursor_put(icur, &key_v, &data_v, 0),
  212. 'Error setting key {}.'.format(thash))
  213. finally:
  214. #pass
  215. self._cur_close(icur)
  216. #logger.debug('Triple add action completed.')
  217. spo_v.mv_data = spock # address of sk in spock
  218. logger.info('Inserting quad: {}'.format([spock[0], spock[1], spock[2], spock[3]]))
  219. spo_v.mv_size = TRP_KLEN # Grab 3 keys
  220. c_v.mv_data = spock + 3 # address of ck in spock
  221. c_v.mv_size = KLEN
  222. null_v.mv_data = b''
  223. null_v.mv_size = 0
  224. #logger.debug('Adding context.')
  225. try:
  226. _check(lmdb.mdb_put(
  227. self.txn, self.get_dbi('c:'), &c_v, &null_v,
  228. lmdb.MDB_NOOVERWRITE))
  229. except KeyExistsError:
  230. pass
  231. #logger.debug('Added c:.')
  232. try:
  233. # Add triple:context association.
  234. #logger.debug('Adding spo:c: {}, {}'.format((<unsigned char*>spo_v.mv_data)[:TRP_KLEN], (<unsigned char*>c_v.mv_data)[:KLEN]))
  235. _check(lmdb.mdb_put(
  236. self.txn, self.get_dbi('spo:c'), &spo_v, &c_v,
  237. lmdb.MDB_NODUPDATA))
  238. except KeyExistsError:
  239. pass
  240. #logger.debug('Added spo:c.')
  241. try:
  242. # Index context:triple association.
  243. #logger.debug('Adding c:spo: {}, {}'.format((<unsigned char*>c_v.mv_data)[:KLEN], (<unsigned char*>spo_v.mv_data)[:TRP_KLEN]))
  244. _check(lmdb.mdb_put(
  245. self.txn, self.get_dbi('c:spo'), &c_v, &spo_v,
  246. lmdb.MDB_NODUPDATA))
  247. except KeyExistsError:
  248. pass
  249. #logger.debug('Added c:spo.')
  250. #logger.debug('All main data entered. Indexing.')
  251. logger.info(f'indexing add: {[spock[0], spock[1], spock[2]]}')
  252. self._index_triple(IDX_OP_ADD, [spock[0], spock[1], spock[2]])
  253. cpdef add_graph(self, graph):
  254. """
  255. Add a graph to the database.
  256. This creates an empty graph by associating the graph URI with the
  257. pickled `None` value. This prevents from removing the graph when all
  258. triples are removed.
  259. This may be called by read-only operations:
  260. https://github.com/RDFLib/rdflib/blob/master/rdflib/graph.py#L1623
  261. In which case it needs to open a write transaction. This is not ideal
  262. but the only way to handle datasets in RDFLib.
  263. :param rdflib.URIRef graph: URI of the named graph to add.
  264. """
  265. cdef Buffer _sc
  266. if isinstance(graph, Graph):
  267. graph = graph.identifier
  268. # FIXME This is all wrong.
  269. serialize_from_rdflib(graph, &_sc)
  270. self._add_graph(&_sc)
  271. cdef void _add_graph(self, Buffer *pk_gr) except *:
  272. """
  273. Add a graph.
  274. :param pk_gr: Pickled context URIRef object.
  275. :type pk_gr: Buffer*
  276. """
  277. cdef:
  278. Hash128 chash
  279. Key ck
  280. lmdb.MDB_txn *tmp_txn
  281. hash128(pk_gr, &chash)
  282. #logger.debug('Adding a graph.')
  283. if not self._key_exists(chash, HLEN, b'th:t'):
  284. # Insert context term if not existing.
  285. if self.is_txn_rw:
  286. tmp_txn = self.txn
  287. else:
  288. _check(lmdb.mdb_txn_begin(self.dbenv, NULL, 0, &tmp_txn))
  289. # Open new R/W transactions.
  290. #logger.debug('Opening a temporary RW transaction.')
  291. try:
  292. #logger.debug('Working in existing RW transaction.')
  293. # Use existing R/W transaction.
  294. # Main entry.
  295. ck = [self._append(pk_gr, b't:st', txn=tmp_txn)]
  296. logger.info(f'Added new ck with key#: {ck[0]}')
  297. # Index.
  298. key_v.mv_data = chash
  299. key_v.mv_size = HLEN
  300. data_v.mv_data = ck
  301. data_v.mv_size = KLEN
  302. _check(lmdb.mdb_put(
  303. self.txn, self.get_dbi(b'th:t'), &key_v, &data_v, 0
  304. ))
  305. # Add to list of contexts.
  306. key_v.mv_data = ck
  307. key_v.mv_size = KLEN
  308. data_v.mv_data = ck # Whatever, length is zero anyways
  309. data_v.mv_size = 0
  310. _check(lmdb.mdb_put(
  311. self.txn, self.get_dbi(b'c:'), &key_v, &data_v, 0
  312. ))
  313. if not self.is_txn_rw:
  314. _check(lmdb.mdb_txn_commit(tmp_txn))
  315. except:
  316. if not self.is_txn_rw:
  317. lmdb.mdb_txn_abort(tmp_txn)
  318. raise
  319. cpdef void _remove(self, tuple triple_pattern, context=None) except *:
  320. cdef:
  321. unsigned char spok[TRP_KLEN]
  322. void* cur
  323. cc.ArrayIter it
  324. Key ck
  325. lmdb.MDB_val spok_v, ck_v
  326. #logger.debug('Removing triple: {}'.format(triple_pattern))
  327. if context is not None:
  328. try:
  329. ck = [self._to_key_idx(context)]
  330. except KeyNotFoundError:
  331. # If context is specified but not found, return to avoid
  332. # deleting the wrong triples.
  333. return
  334. # Get the matching pattern.
  335. match_set = self.triple_keys(triple_pattern, context)
  336. dcur = self._cur_open('spo:c')
  337. icur = self._cur_open('c:spo')
  338. try:
  339. spok_v.mv_size = TRP_KLEN
  340. # If context was specified, remove only associations with that context.
  341. cc.array_iter_init(&it, match_set.data)
  342. if context is not None:
  343. #logger.debug('Removing triples in matching context.')
  344. ck_v.mv_data = ck
  345. ck_v.mv_size = KLEN
  346. while cc.array_iter_next(&it, &cur) != cc.CC_ITER_END:
  347. spok_v.mv_data = cur
  348. # Delete spo:c entry.
  349. try:
  350. _check(lmdb.mdb_cursor_get(
  351. dcur, &spok_v, &ck_v, lmdb.MDB_GET_BOTH))
  352. except KeyNotFoundError:
  353. pass
  354. else:
  355. _check(lmdb.mdb_cursor_del(dcur, 0))
  356. # Restore ck after delete.
  357. ck_v.mv_data = ck
  358. # Delete c:spo entry.
  359. try:
  360. _check(lmdb.mdb_cursor_get(
  361. icur, &ck_v, &spok_v, lmdb.MDB_GET_BOTH))
  362. except KeyNotFoundError:
  363. pass
  364. else:
  365. _check(lmdb.mdb_cursor_del(icur, 0))
  366. # Delete lookup indices, only if no other context
  367. # association is present.
  368. # spok_v has changed on mdb_cursor_del. Restore.
  369. spok_v.mv_data = cur
  370. try:
  371. _check(lmdb.mdb_cursor_get(
  372. dcur, &spok_v, NULL, lmdb.MDB_SET))
  373. except KeyNotFoundError:
  374. self._index_triple(IDX_OP_REMOVE, <TripleKey>cur)
  375. i += 1
  376. # If no context is specified, remove all associations.
  377. else:
  378. #logger.debug('Removing triples in all contexts.')
  379. # Loop over all SPO matching the triple pattern.
  380. while cc.array_iter_next(&it, &cur) != cc.CC_ITER_END:
  381. spok_v.mv_data = cur
  382. # Loop over all context associations for this SPO.
  383. try:
  384. _check(lmdb.mdb_cursor_get(
  385. dcur, &spok_v, &ck_v, lmdb.MDB_SET_KEY))
  386. except KeyNotFoundError:
  387. # Move on to the next SPO.
  388. continue
  389. else:
  390. ck = <Key>ck_v.mv_data
  391. logger.debug(f'Removing {<TripleKey>cur} from main.')
  392. while True:
  393. # Delete c:spo association.
  394. try:
  395. _check(lmdb.mdb_cursor_get(
  396. icur, &ck_v, &spok_v, lmdb.MDB_GET_BOTH))
  397. except KeyNotFoundError:
  398. pass
  399. else:
  400. lmdb.mdb_cursor_del(icur, 0)
  401. # Restore the pointer to the deleted SPO.
  402. spok_v.mv_data = cur
  403. # Move on to next associated context.
  404. try:
  405. _check(lmdb.mdb_cursor_get(
  406. dcur, &spok_v, &ck_v, lmdb.MDB_NEXT_DUP))
  407. except KeyNotFoundError:
  408. break
  409. # Then delete the spo:c association.
  410. try:
  411. _check(lmdb.mdb_cursor_get(
  412. dcur, &spok_v, &ck_v, lmdb.MDB_SET))
  413. except KeyNotFoundError:
  414. pass
  415. else:
  416. lmdb.mdb_cursor_del(dcur, lmdb.MDB_NODUPDATA)
  417. self._index_triple(IDX_OP_REMOVE, <TripleKey>cur)
  418. #ck_v.mv_data = ck # Unnecessary?
  419. finally:
  420. i += 1
  421. finally:
  422. #logger.debug('Closing spo:c in _remove.')
  423. self._cur_close(dcur)
  424. #logger.debug('Closing c:spo in _remove.')
  425. self._cur_close(icur)
  426. cdef void _index_triple(self, int op, TripleKey spok) except *:
  427. """
  428. Update index for a triple and context (add or remove).
  429. :param str op: one of ``IDX_OP_ADD`` or ``IDX_OP_REMOVE``.
  430. :param TripleKey spok: Triple key to index.
  431. """
  432. cdef:
  433. Key keys[3]
  434. DoubleKey dbl_keys[3]
  435. size_t i = 0
  436. lmdb.MDB_val key_v, dbl_key_v
  437. keys = [
  438. [spok[0]], # sk
  439. [spok[1]], # pk
  440. [spok[2]], # ok
  441. ]
  442. dbl_keys = [
  443. [spok[1], spok[2]], # pok
  444. [spok[0], spok[2]], # sok
  445. [spok[0], spok[1]], # spk
  446. ]
  447. logger.info(f'''Indices:
  448. spok: {[spok[0], spok[1], spok[2]]}
  449. sk: {keys[0]}
  450. pk: {keys[1]}
  451. ok: {keys[2]}
  452. pok: {dbl_keys[0]}
  453. sok: {dbl_keys[1]}
  454. spk: {dbl_keys[2]}
  455. ''')
  456. key_v.mv_size = KLEN
  457. dbl_key_v.mv_size = DBL_KLEN
  458. #logger.debug('Start indexing: {}.'.format(spok[: TRP_KLEN]))
  459. if op == IDX_OP_REMOVE:
  460. logger.debug(f'Remove {spok[0]} from indices.')
  461. else:
  462. logger.debug(f'Add {spok[0]} to indices.')
  463. while i < 3:
  464. cur1 = self._cur_open(self.lookup_indices[i]) # s:po, p:so, o:sp
  465. cur2 = self._cur_open(self.lookup_indices[i + 3])# po:s, so:p, sp:o
  466. try:
  467. key_v.mv_data = keys[i]
  468. dbl_key_v.mv_data = dbl_keys[i]
  469. # Removal op indexing.
  470. if op == IDX_OP_REMOVE:
  471. try:
  472. _check(lmdb.mdb_cursor_get(
  473. cur1, &key_v, &dbl_key_v, lmdb.MDB_GET_BOTH))
  474. logger.info(
  475. f'Removed: {keys[i]}, {dbl_keys[i]}')
  476. except KeyNotFoundError:
  477. logger.info(
  478. f'Not found: {keys[i]}, {dbl_keys[i]}')
  479. pass
  480. else:
  481. _check(lmdb.mdb_cursor_del(cur1, 0))
  482. # Restore pointers after delete.
  483. key_v.mv_data = keys[i]
  484. dbl_key_v.mv_data = dbl_keys[i]
  485. try:
  486. _check(lmdb.mdb_cursor_get(
  487. cur2, &dbl_key_v, &key_v, lmdb.MDB_GET_BOTH))
  488. logger.info(f'Removed: {dbl_keys[i]}, {keys[i]}')
  489. except KeyNotFoundError:
  490. logger.info(f'Not found: {dbl_keys[i]}, {keys[i]}')
  491. pass
  492. else:
  493. _check(lmdb.mdb_cursor_del(cur2, 0))
  494. # Addition op indexing.
  495. elif op == IDX_OP_ADD:
  496. logger.info('Adding to index `{}`: {}, {}'.format(
  497. self.lookup_indices[i],
  498. <Key>key_v.mv_data,
  499. <DoubleKey>dbl_key_v.mv_data
  500. ))
  501. try:
  502. _check(lmdb.mdb_cursor_put(
  503. cur1, &key_v, &dbl_key_v, lmdb.MDB_NODUPDATA))
  504. except KeyExistsError:
  505. logger.info(f'Key {keys[i]} exists already.')
  506. pass
  507. logger.info('Adding to index `{}`: {}, {}'.format(
  508. self.lookup_indices[i + 3],
  509. <DoubleKey>dbl_key_v.mv_data,
  510. <Key>key_v.mv_data
  511. ))
  512. try:
  513. _check(lmdb.mdb_cursor_put(
  514. cur2, &dbl_key_v, &key_v, lmdb.MDB_NODUPDATA))
  515. except KeyExistsError:
  516. logger.info(f'Double key {dbl_keys[i]} exists already.')
  517. pass
  518. else:
  519. raise ValueError(
  520. 'Index operation \'{}\' is not supported.'.format(op))
  521. i += 1
  522. finally:
  523. #pass
  524. self._cur_close(cur1)
  525. self._cur_close(cur2)
  526. cpdef void _remove_graph(self, object gr_uri) except *:
  527. """
  528. Delete a context.
  529. """
  530. cdef:
  531. Hash128 chash
  532. Key ck
  533. lmdb.MDB_val ck_v, chash_v
  534. Buffer pk_c
  535. #logger.debug('Deleting context: {}'.format(gr_uri))
  536. #logger.debug('Pickled context: {}'.format(serialize(gr_uri)))
  537. # Gather information on the graph prior to deletion.
  538. try:
  539. ck = [self._to_key_idx(gr_uri)]
  540. except KeyNotFoundError:
  541. return
  542. # Remove all triples and indices associated with the graph.
  543. self._remove((None, None, None), gr_uri)
  544. # Remove the graph if it is in triples.
  545. self._remove((gr_uri, None, None))
  546. self._remove((None, None, gr_uri))
  547. # Clean up all terms related to the graph.
  548. serialize_from_rdflib(gr_uri, &pk_c)
  549. hash128(&pk_c, &chash)
  550. ck_v.mv_size = KLEN
  551. chash_v.mv_size = HLEN
  552. try:
  553. ck_v.mv_data = ck
  554. _check(lmdb.mdb_del(self.txn, self.get_dbi(b'c:'), &ck_v, NULL))
  555. ck_v.mv_data = ck
  556. _check(lmdb.mdb_del(self.txn, self.get_dbi(b't:st'), &ck_v, NULL))
  557. chash_v.mv_data = chash
  558. _check(lmdb.mdb_del(self.txn, self.get_dbi(b'th:t'), &chash_v, NULL))
  559. except KeyNotFoundError:
  560. pass
  561. # Lookup methods.
  562. def contexts(self, triple=None):
  563. """
  564. Get a list of all contexts.
  565. :rtype: set(URIRef)
  566. """
  567. cdef:
  568. size_t sz, i
  569. KeyIdx* match
  570. try:
  571. self.all_contexts(&match, &sz, triple)
  572. ret = set()
  573. for i in range(sz):
  574. ret.add(self.from_key(match + i))
  575. finally:
  576. free(match)
  577. return ret
  578. def triples(self, triple_pattern, context=None):
  579. """
  580. Generator over matching triples.
  581. :param tuple triple_pattern: 3 RDFLib terms
  582. :param context: Context graph, if available.
  583. :type context: rdflib.Graph or None
  584. :rtype: Iterator
  585. :return: Generator over triples and contexts in which each result has
  586. the following format::
  587. (s, p, o), generator(contexts)
  588. Where the contexts generator lists all context that the triple appears
  589. in.
  590. """
  591. cdef:
  592. size_t i = 0, j = 0
  593. void* it_cur
  594. lmdb.MDB_val key_v, data_v
  595. cc.ArrayIter it
  596. # This sounds strange, RDFLib should be passing None at this point,
  597. # but anyway...
  598. context = self._normalize_context(context)
  599. logger.info(
  600. 'Getting triples for: {}, {}'.format(triple_pattern, context))
  601. rset = self.triple_keys(triple_pattern, context)
  602. #logger.debug('Triple keys found: {}'.format(rset.data[:rset.size]))
  603. cur = self._cur_open('spo:c')
  604. try:
  605. key_v.mv_size = TRP_KLEN
  606. cc.array_iter_init(&it, rset.data)
  607. while cc.array_iter_next(&it, &it_cur) != cc.CC_ITER_END:
  608. logger.info('Checking contexts for triples: {} {} {}'.format(
  609. (<TripleKey>it_cur)[0],
  610. (<TripleKey>it_cur)[1],
  611. (<TripleKey>it_cur)[2],
  612. ))
  613. key_v.mv_data = it_cur
  614. # Get contexts associated with each triple.
  615. contexts = []
  616. # This shall never be MDB_NOTFOUND.
  617. _check(lmdb.mdb_cursor_get(cur, &key_v, &data_v, lmdb.MDB_SET))
  618. while True:
  619. c_uri = self.from_key(<Key>data_v.mv_data)
  620. contexts.append(Imr(uri=c_uri, store=self))
  621. try:
  622. _check(lmdb.mdb_cursor_get(
  623. cur, &key_v, &data_v, lmdb.MDB_NEXT_DUP))
  624. except KeyNotFoundError:
  625. break
  626. #logger.debug('Triple keys before yield: {}: {}.'.format(
  627. # (<TripleKey>key_v.mv_data)[:TRP_KLEN], tuple(contexts)))
  628. yield (
  629. (
  630. self.from_key(<Key>key_v.mv_data),
  631. self.from_key(<Key>key_v.mv_data + 1),
  632. self.from_key(<Key>key_v.mv_data + 2),
  633. ),
  634. tuple(contexts)
  635. )
  636. #logger.debug('After yield.')
  637. finally:
  638. self._cur_close(cur)
  639. cpdef SimpleGraph graph_lookup(
  640. self, triple_pattern, context=None, uri=None, copy=False
  641. ):
  642. """
  643. Create a SimpleGraph or Imr instance from buffers from the store.
  644. The instance is only valid within the LMDB transaction that created it.
  645. :param tuple triple_pattern: 3 RDFLib terms
  646. :param context: Context graph, if available.
  647. :type context: rdflib.Graph or None
  648. :param str uri: URI for the resource. If provided, the resource
  649. returned will be an Imr, otherwise a SimpleGraph.
  650. :rtype: Iterator
  651. :return: Generator over triples and contexts in which each result has
  652. the following format::
  653. (s, p, o), generator(contexts)
  654. Where the contexts generator lists all context that the triple appears
  655. in.
  656. """
  657. cdef:
  658. void* spok
  659. size_t cur = 0
  660. Buffer* buffers
  661. BufferTriple* btrp
  662. SimpleGraph gr
  663. cc.ArrayIter it
  664. gr = Imr(uri=uri) if uri else SimpleGraph()
  665. #logger.debug(
  666. # 'Getting triples for: {}, {}'.format(triple_pattern, context))
  667. match = self.triple_keys(triple_pattern, context)
  668. btrp = <BufferTriple*>gr.pool.alloc(match.ct, sizeof(BufferTriple))
  669. buffers = <Buffer*>gr.pool.alloc(3 * match.ct, sizeof(Buffer))
  670. cc.array_iter_init(&it, match.data)
  671. while cc.array_iter_next(&it, &spok):
  672. btrp[cur].s = buffers + cur * 3
  673. btrp[cur].p = buffers + cur * 3 + 1
  674. btrp[cur].o = buffers + cur * 3 + 2
  675. #logger.info('Looking up key: {}'.format(spok[:KLEN]))
  676. self.lookup_term(<KeyIdx*>spok, buffers + cur * 3)
  677. #logger.info(f'Found triple s: {buffer_dump(btrp[cur].s)}')
  678. #logger.info('Looking up key: {}'.format(spok[KLEN:DBL_KLEN]))
  679. self.lookup_term(<KeyIdx*>(spok + KLEN), buffers + cur * 3 + 1)
  680. #logger.info(f'Found triple p: {buffer_dump(btrp[cur].p)}')
  681. #logger.info('Looking up key: {}'.format(spok[DBL_KLEN:TRP_KLEN]))
  682. self.lookup_term(<KeyIdx*>(spok + DBL_KLEN), buffers + cur * 3 + 2)
  683. #logger.info(f'Found triple o: {buffer_dump(btrp[cur].o)}')
  684. gr.add_triple(btrp + cur, copy)
  685. cur += 1
  686. return gr
  687. cdef Keyset triple_keys(self, tuple triple_pattern, context=None):
  688. """
  689. Top-level lookup method.
  690. This method is used by `triples` which returns native Python tuples,
  691. as well as by other methods that need to iterate and filter triple
  692. keys without incurring in the overhead of converting them to triples.
  693. :param tuple triple_pattern: 3 RDFLib terms
  694. :param context: Context graph or URI, or None.
  695. :type context: rdflib.term.Identifier or None
  696. """
  697. # TODO: Improve performance by allowing passing contexts as a tuple.
  698. cdef:
  699. size_t ct = 0, i = 0
  700. void* cur
  701. cc.ArrayIter it
  702. lmdb.MDB_cursor *icur
  703. lmdb.MDB_val key_v, data_v
  704. Key tk, ck
  705. TripleKey spok
  706. Keyset flt_res, ret
  707. if context is not None:
  708. try:
  709. ck = [self._to_key_idx(context)]
  710. except KeyNotFoundError:
  711. # Context not found.
  712. return Keyset()
  713. icur = self._cur_open('c:spo')
  714. try:
  715. key_v.mv_data = ck
  716. key_v.mv_size = KLEN
  717. # s p o c
  718. if all(triple_pattern):
  719. #logger.debug('Lookup: s p o c')
  720. for i, term in enumerate(triple_pattern):
  721. try:
  722. tk = [self._to_key_idx(term)]
  723. except KeyNotFoundError:
  724. # Context not found.
  725. return Keyset()
  726. if tk is NULL:
  727. # A term in the triple is not found.
  728. return Keyset()
  729. spok[i] = tk[0]
  730. data_v.mv_data = spok
  731. data_v.mv_size = TRP_KLEN
  732. #logger.debug(
  733. # 'Found spok {}. Matching with context {}'.format(
  734. # (<TripleKey>data_v.mv_data)[: TRP_KLEN],
  735. # (<Key>key_v.mv_data)[: KLEN]))
  736. try:
  737. _check(lmdb.mdb_cursor_get(
  738. icur, &key_v, &data_v, lmdb.MDB_GET_BOTH))
  739. except KeyNotFoundError:
  740. # Triple not found.
  741. #logger.debug('spok / ck pair not found.')
  742. return Keyset()
  743. ret = Keyset(1)
  744. cc.array_add(ret.data, &spok)
  745. return ret
  746. # ? ? ? c
  747. elif not any(triple_pattern):
  748. # Get all triples from the context
  749. #logger.debug('Lookup: ? ? ? c')
  750. try:
  751. _check(lmdb.mdb_cursor_get(
  752. icur, &key_v, &data_v, lmdb.MDB_SET))
  753. except KeyNotFoundError:
  754. # Triple not found.
  755. return Keyset()
  756. _check(lmdb.mdb_cursor_count(icur, &ct))
  757. ret = Keyset(ct)
  758. #logger.debug(f'Entries in c:spo: {ct}')
  759. #logger.debug(f'Allocated {ret.size} bytes.')
  760. #logger.debug('Looking in key: {}'.format(
  761. # (<unsigned char *>key_v.mv_data)[:key_v.mv_size]))
  762. _check(lmdb.mdb_cursor_get(
  763. icur, &key_v, &data_v, lmdb.MDB_GET_MULTIPLE))
  764. while True:
  765. #logger.debug('Data page: {}'.format(
  766. # (<unsigned char *>data_v.mv_data)[: data_v.mv_size]))
  767. cc.array_add(ret.data, data_v.mv_data)
  768. try:
  769. _check(lmdb.mdb_cursor_get(
  770. icur, &key_v, &data_v, lmdb.MDB_NEXT_MULTIPLE))
  771. except KeyNotFoundError:
  772. return ret
  773. # Regular lookup. Filter _lookup() results by context.
  774. else:
  775. try:
  776. res = self._lookup(triple_pattern)
  777. except KeyNotFoundError:
  778. return Keyset()
  779. #logger.debug('Allocating for context filtering.')
  780. key_v.mv_data = ck
  781. key_v.mv_size = KLEN
  782. data_v.mv_size = TRP_KLEN
  783. flt_res = Keyset(res.ct)
  784. cc.array_iter_init(&it, res.data)
  785. while cc.array_iter_next(&it, &cur) != cc.CC_ITER_END:
  786. #logger.debug('Checking row #{}'.format(flt_j))
  787. data_v.mv_data = cur
  788. #logger.debug('Checking c:spo {}, {}'.format(
  789. # (<unsigned char *>key_v.mv_data)[: key_v.mv_size],
  790. # (<unsigned char *>data_v.mv_data)[: data_v.mv_size]))
  791. try:
  792. # Verify that the triple is associated with the
  793. # context being searched.
  794. _check(lmdb.mdb_cursor_get(
  795. icur, &key_v, &data_v, lmdb.MDB_GET_BOTH))
  796. except KeyNotFoundError:
  797. continue
  798. else:
  799. cc.array_add(flt_res.data, cur)
  800. return flt_res
  801. finally:
  802. self._cur_close(icur)
  803. # Unfiltered lookup. No context checked.
  804. else:
  805. #logger.debug('No context in query.')
  806. try:
  807. res = self._lookup(triple_pattern)
  808. except KeyNotFoundError:
  809. return Keyset()
  810. #logger.debug('Res data before triple_keys return: {}'.format(
  811. # res.data[: res.size]))
  812. return res
  813. cdef Keyset _lookup(self, tuple triple_pattern):
  814. """
  815. Look up triples in the indices based on a triple pattern.
  816. :rtype: Iterator
  817. :return: Matching triple keys.
  818. """
  819. cdef:
  820. size_t ct = 0, i = 0
  821. lmdb.MDB_stat db_stat
  822. lmdb.MDB_val spok_v, ck_v
  823. TripleKey spok
  824. Key sk, pk, ok, tk1, tk2, tk3
  825. s, p, o = triple_pattern
  826. try:
  827. if s is not None:
  828. sk = [self._to_key_idx(s)]
  829. if p is not None:
  830. pk = [self._to_key_idx(p)]
  831. if o is not None:
  832. ok = [self._to_key_idx(o)]
  833. except KeyNotFoundError:
  834. return Keyset()
  835. if s is not None:
  836. tk1 = sk
  837. if p is not None:
  838. tk2 = pk
  839. # s p o
  840. if o is not None:
  841. tk3 = ok
  842. spok_v.mv_data = spok
  843. spok_v.mv_size = TRP_KLEN
  844. try:
  845. spok = [tk1[0], tk2[0], tk3[0]]
  846. _check(lmdb.mdb_get(
  847. self.txn, self.get_dbi('spo:c'), &spok_v, &ck_v))
  848. except KeyNotFoundError:
  849. return Keyset()
  850. matches = Keyset(1)
  851. cc.array_add(matches.data, &spok)
  852. return matches
  853. # s p ?
  854. return self._lookup_2bound(0, tk1, 1, tk2)
  855. if o is not None: # s ? o
  856. tk2 = pk
  857. return self._lookup_2bound(0, tk1, 2, tk2)
  858. # s ? ?
  859. return self._lookup_1bound(0, tk1)
  860. if p is not None:
  861. tk1 = pk
  862. if o is not None: # ? p o
  863. tk2 = ok
  864. return self._lookup_2bound(1, tk1, 2, tk2)
  865. # ? p ?
  866. return self._lookup_1bound(1, tk1)
  867. if o is not None: # ? ? o
  868. tk1 = ok
  869. return self._lookup_1bound(2, tk1)
  870. # ? ? ?
  871. # Get all triples in the database.
  872. #logger.debug('Getting all DB triples.')
  873. dcur = self._cur_open('spo:c')
  874. try:
  875. _check(
  876. lmdb.mdb_stat(
  877. self.txn, lmdb.mdb_cursor_dbi(dcur), &db_stat
  878. ), 'Error gathering DB stats.'
  879. )
  880. ct = db_stat.ms_entries
  881. ret = Keyset(ct)
  882. #logger.debug(f'Triples found: {ct}')
  883. if ct == 0:
  884. return Keyset()
  885. _check(lmdb.mdb_cursor_get(
  886. dcur, &key_v, &data_v, lmdb.MDB_FIRST))
  887. while True:
  888. cc.array_add(ret.data, key_v.mv_data)
  889. try:
  890. _check(lmdb.mdb_cursor_get(
  891. dcur, &key_v, &data_v, lmdb.MDB_NEXT_NODUP))
  892. except KeyNotFoundError:
  893. break
  894. i += 1
  895. return ret
  896. finally:
  897. self._cur_close(dcur)
  898. cdef Keyset _lookup_1bound(self, unsigned char idx, Key luk):
  899. """
  900. Lookup triples for a pattern with one bound term.
  901. :param str idx_name: The index to look up as one of the keys of
  902. ``_lookup_ordering``.
  903. :param rdflib.URIRef term: Bound term to search for.
  904. :rtype: Iterator(bytes)
  905. :return: SPO keys matching the pattern.
  906. """
  907. cdef:
  908. unsigned int dbflags
  909. unsigned char term_order[3]
  910. size_t ct, i
  911. lmdb.MDB_cursor *icur
  912. lmdb.MDB_val key_v, data_v
  913. DoubleKey* lu_dset
  914. TripleKey spok
  915. logger.info(f'lookup 1bound: {idx}, {luk[0]}')
  916. term_order = lookup_ordering[idx]
  917. icur = self._cur_open(self.lookup_indices[idx])
  918. logging.debug(f'DB label: {self.lookup_indices[idx]}')
  919. logging.debug('term order: {}'.format(term_order[: 3]))
  920. try:
  921. key_v.mv_data = luk
  922. key_v.mv_size = KLEN
  923. _check(lmdb.mdb_cursor_get(icur, &key_v, &data_v, lmdb.MDB_SET))
  924. _check(lmdb.mdb_cursor_count(icur, &ct))
  925. # Allocate memory for results.
  926. ret = Keyset(ct)
  927. #logger.debug(f'Entries for {self.lookup_indices[idx]}: {ct}')
  928. #logger.debug(f'Allocated {ret.size} bytes of data.')
  929. #logger.debug('First row: {}'.format(
  930. # (<unsigned char *>data_v.mv_data)[:DBL_KLEN]))
  931. _check(lmdb.mdb_cursor_get(icur, &key_v, &data_v, lmdb.MDB_SET))
  932. _check(lmdb.mdb_cursor_get(
  933. icur, &key_v, &data_v, lmdb.MDB_GET_MULTIPLE))
  934. while True:
  935. lu_dset = <DoubleKey*>data_v.mv_data
  936. for i in range(data_v.mv_size // DBL_KLEN):
  937. logger.info('Got 2-terms in lookup_1bound: {} {}'.format(
  938. lu_dset[i][0], lu_dset[i][1]))
  939. spok[term_order[0]] = luk[0]
  940. spok[term_order[1]] = lu_dset[i][0]
  941. spok[term_order[2]] = lu_dset[i][1]
  942. logger.info('Assembled triple in lookup_1bound: {} {} {}'.format(
  943. spok[0], spok[1], spok[2]))
  944. cc.array_add(ret.data, spok)
  945. try:
  946. # Get results by the page.
  947. _check(lmdb.mdb_cursor_get(
  948. icur, &key_v, &data_v, lmdb.MDB_NEXT_MULTIPLE))
  949. except KeyNotFoundError:
  950. return ret
  951. #logger.debug('Assembled data in 1bound ({}): {}'.format(ret.size, ret.data[: ret.size]))
  952. finally:
  953. self._cur_close(icur)
  954. cdef Keyset _lookup_2bound(
  955. self, unsigned char idx1, term1, unsigned char idx2, term2):
  956. """
  957. Look up triples for a pattern with two bound terms.
  958. :param str idx1: The index to look up as one of the keys of
  959. ``lookup_ordering_2bound``.
  960. :param rdflib.URIRef term1: First bound term to search for.
  961. :rtype: Iterator(bytes)
  962. :return: SPO keys matching the pattern.
  963. """
  964. cdef:
  965. unsigned char luk1_offset, luk2_offset
  966. unsigned int dbflags
  967. unsigned char term_order[3] # Lookup ordering
  968. size_t ct, i = 0, ret_offset = 0, ret_pos, src_pos
  969. size_t j # Must be signed for older OpenMP versions
  970. lmdb.MDB_cursor *icur
  971. Keyset ret
  972. #Key luk1, luk2
  973. DoubleKey luk
  974. TripleKey spok
  975. logging.debug(
  976. f'2bound lookup for term {term1} at position {idx1} '
  977. f'and term {term2} at position {idx2}.')
  978. try:
  979. luk1 = [self._to_key_idx(term1)]
  980. luk2 = [self._to_key_idx(term2)]
  981. except KeyNotFoundError:
  982. return Keyset()
  983. logging.debug('luk1: {}'.format(luk1))
  984. logging.debug('luk2: {}'.format(luk2))
  985. for i in range(3):
  986. if (
  987. idx1 in lookup_ordering_2bound[i][: 2]
  988. and idx2 in lookup_ordering_2bound[i][: 2]):
  989. term_order = lookup_ordering_2bound[i]
  990. if term_order[0] == idx1:
  991. luk1_offset = 0
  992. luk2_offset = 1
  993. else:
  994. luk1_offset = 1
  995. luk2_offset = 0
  996. dblabel = self.lookup_indices[i + 3] # skip 1bound index labels
  997. break
  998. if i == 2:
  999. raise ValueError(
  1000. 'Indices {} and {} not found in LU keys.'.format(
  1001. idx1, idx2))
  1002. #logger.debug('Term order: {}'.format(term_order[:3]))
  1003. #logger.debug('LUK offsets: {}, {}'.format(luk1_offset, luk2_offset))
  1004. # Compose terms in lookup key.
  1005. luk[luk1_offset] = luk1
  1006. luk[luk2_offset] = luk2
  1007. #logger.debug('Lookup key: {}'.format(luk))
  1008. icur = self._cur_open(dblabel)
  1009. #logger.debug('Database label: {}'.format(dblabel))
  1010. try:
  1011. key_v.mv_data = luk
  1012. key_v.mv_size = DBL_KLEN
  1013. # Count duplicates for key and allocate memory for result set.
  1014. _check(lmdb.mdb_cursor_get(icur, &key_v, &data_v, lmdb.MDB_SET))
  1015. _check(lmdb.mdb_cursor_count(icur, &ct))
  1016. ret = Keyset(ct)
  1017. #logger.debug('Entries for {}: {}'.format(self.lookup_indices[idx], ct))
  1018. #logger.debug('First row: {}'.format(
  1019. # (<unsigned char *>data_v.mv_data)[:DBL_KLEN]))
  1020. #logger.debug('term_order: {}'.format(asm_rng[:3]))
  1021. #logger.debug('luk: {}'.format(luk))
  1022. _check(lmdb.mdb_cursor_get(icur, &key_v, &data_v, lmdb.MDB_SET))
  1023. _check(lmdb.mdb_cursor_get(
  1024. icur, &key_v, &data_v, lmdb.MDB_GET_MULTIPLE))
  1025. while True:
  1026. #logger.debug('Got data in 2bound ({}): {}'.format(
  1027. # data_v.mv_size,
  1028. # (<unsigned char *>data_v.mv_data)[: data_v.mv_size]))
  1029. for j in range(data_v.mv_size // KLEN):
  1030. src_pos = KLEN * j
  1031. spok[term_order[0]] = luk[0]
  1032. spok[term_order[1]] = luk[1]
  1033. spok[term_order[2]] = <KeyIdx>(data_v.mv_data + src_pos)
  1034. cc.array_add(ret.data, spok)
  1035. #src_pos = KLEN * j
  1036. #ret_pos = (ret_offset + ret.itemsize * j)
  1037. ##logger.debug('Page offset: {}'.format(pg_offset))
  1038. ##logger.debug('Ret offset: {}'.format(ret_offset))
  1039. #memcpy(ret.data + ret_pos + asm_rng[0], luk, KLEN)
  1040. #memcpy(ret.data + ret_pos + asm_rng[1], luk + KLEN, KLEN)
  1041. #memcpy(ret.data + ret_pos + asm_rng[2],
  1042. # data_v.mv_data + src_pos, KLEN)
  1043. #logger.debug('Assembled triple: {}'.format((ret.data + ret_offset)[: TRP_KLEN]))
  1044. #ret_offset += data_v.mv_size // KLEN * ret.itemsize
  1045. try:
  1046. # Get results by the page.
  1047. _check(lmdb.mdb_cursor_get(
  1048. icur, &key_v, &data_v, lmdb.MDB_NEXT_MULTIPLE))
  1049. except KeyNotFoundError:
  1050. # For testing only. Errors will be caught in triples()
  1051. # when looking for a context.
  1052. #if ret_offset + ret.itemsize < ret.size:
  1053. # raise RuntimeError(
  1054. # 'Retrieved less values than expected: {} of {}.'
  1055. # .format(pg_offset, ret.size))
  1056. #logger.debug('Assembled data in 2bound ({}): {}'.format(ret.size, ret.data[: ret.size]))
  1057. return ret
  1058. finally:
  1059. self._cur_close(icur)
  1060. cdef void _all_term_keys(self, term_type, cc.HashSet** tkeys) except *:
  1061. """
  1062. Return all keys of a (``s:po``, ``p:so``, ``o:sp``) index.
  1063. """
  1064. cdef:
  1065. size_t i = 0
  1066. lmdb.MDB_stat stat
  1067. cc.HashSetConf tkeys_conf
  1068. idx_label = self.lookup_indices['spo'.index(term_type)]
  1069. #logger.debug('Looking for all terms in index: {}'.format(idx_label))
  1070. icur = self._cur_open(idx_label)
  1071. try:
  1072. _check(lmdb.mdb_stat(self.txn, lmdb.mdb_cursor_dbi(icur), &stat))
  1073. cc.hashset_conf_init(&tkeys_conf)
  1074. tkeys_conf.initial_capacity = 1024
  1075. tkeys_conf.load_factor = .75
  1076. tkeys_conf.key_length = KLEN
  1077. tkeys_conf.key_compare = cc.CC_CMP_POINTER
  1078. tkeys_conf.hash = cc.POINTER_HASH
  1079. cc.hashset_new_conf(&tkeys_conf, tkeys)
  1080. try:
  1081. _check(lmdb.mdb_cursor_get(
  1082. icur, &key_v, NULL, lmdb.MDB_FIRST))
  1083. except KeyNotFoundError:
  1084. return
  1085. while True:
  1086. cc.hashset_add(tkeys[0], key_v.mv_data)
  1087. rc = lmdb.mdb_cursor_get(
  1088. icur, &key_v, NULL, lmdb.MDB_NEXT_NODUP)
  1089. try:
  1090. _check(rc)
  1091. except KeyNotFoundError:
  1092. return
  1093. i += 1
  1094. finally:
  1095. #pass
  1096. self._cur_close(icur)
  1097. def all_terms(self, term_type):
  1098. """
  1099. Return all terms of a type (``s``, ``p``, or ``o``) in the store.
  1100. """
  1101. cdef:
  1102. void* cur
  1103. cc.HashSet* tkeys
  1104. cc.HashSetIter it
  1105. ret = set()
  1106. try:
  1107. self._all_term_keys(term_type, &tkeys)
  1108. cc.hashset_iter_init(&it, tkeys)
  1109. while cc.hashset_iter_next(&it, &cur):
  1110. #logger.debug('Yielding: {}'.format(key))
  1111. ret.add(self.from_key(<Key>cur))
  1112. finally:
  1113. if tkeys:
  1114. free(tkeys)
  1115. return ret
  1116. cpdef tuple all_namespaces(self):
  1117. """
  1118. Return all registered namespaces.
  1119. """
  1120. cdef:
  1121. size_t i = 0
  1122. lmdb.MDB_stat stat
  1123. ret = []
  1124. dcur = self._cur_open('pfx:ns')
  1125. try:
  1126. try:
  1127. _check(lmdb.mdb_cursor_get(
  1128. dcur, &key_v, &data_v, lmdb.MDB_FIRST))
  1129. except KeyNotFoundError:
  1130. return tuple()
  1131. while True:
  1132. ret.append((
  1133. (<unsigned char *>key_v.mv_data)[: key_v.mv_size].decode(),
  1134. (<unsigned char *>data_v.mv_data)[: data_v.mv_size].decode()))
  1135. #logger.debug('Found namespace: {}:{}'.format(<unsigned char *>key_v.mv_data, <unsigned char *>data_v.mv_data))
  1136. try:
  1137. _check(lmdb.mdb_cursor_get(
  1138. dcur, &key_v, &data_v, lmdb.MDB_NEXT))
  1139. except KeyNotFoundError:
  1140. return tuple(ret)
  1141. i += 1
  1142. finally:
  1143. #pass
  1144. self._cur_close(dcur)
  1145. cdef void all_contexts(
  1146. self, KeyIdx** ctx, size_t* sz, triple=None
  1147. ) except *:
  1148. """
  1149. Get a list of all contexts.
  1150. :rtype: Iterator(lakesuperior.model.graph.graph.Imr)
  1151. """
  1152. cdef:
  1153. size_t ct
  1154. lmdb.MDB_cursor_op seek_op, scan_op
  1155. lmdb.MDB_stat stat
  1156. lmdb.MDB_val key_v
  1157. TripleKey spok
  1158. cur = (
  1159. self._cur_open('spo:c') if triple and all(triple)
  1160. else self._cur_open('c:'))
  1161. key_v.mv_data = &spok
  1162. if triple and all(triple):
  1163. lmdb_seek_op = lmdb.MDB_SET_KEY
  1164. lmdb_scan_op = lmdb.MDB_NEXT_DUP
  1165. spok = [
  1166. self._to_key_idx(triple[0]),
  1167. self._to_key_idx(triple[1]),
  1168. self._to_key_idx(triple[2]),
  1169. ]
  1170. key_v.mv_size = TRP_KLEN
  1171. else:
  1172. lmdb_seek_op = lmdb.MDB_FIRST
  1173. lmdb_scan_op = lmdb.MDB_NEXT
  1174. key_v.mv_size = 0
  1175. try:
  1176. _check(lmdb.mdb_stat(
  1177. self.txn, lmdb.mdb_cursor_dbi(cur), &stat))
  1178. try:
  1179. _check(lmdb.mdb_cursor_get(
  1180. cur, &key_v, &data_v, seek_op))
  1181. except KeyNotFoundError:
  1182. ctx[0] = NULL
  1183. return
  1184. ctx[0] = <KeyIdx*>malloc(stat.ms_entries * KLEN)
  1185. sz[0] = 0
  1186. while True:
  1187. ctx[0][sz[0]] = (<Key>data_v.mv_data)[0]
  1188. try:
  1189. _check(lmdb.mdb_cursor_get(
  1190. cur, &key_v, &data_v, scan_op))
  1191. except KeyNotFoundError:
  1192. break
  1193. sz[0] += 1
  1194. finally:
  1195. #pass
  1196. self._cur_close(cur)
  1197. # Key conversion methods.
  1198. cdef inline void lookup_term(self, const Key tk, Buffer* data) except *:
  1199. """
  1200. look up a term by key.
  1201. :param Key key: The key to be looked up.
  1202. :param Buffer *data: Buffer structure containing the serialized term.
  1203. """
  1204. cdef:
  1205. lmdb.MDB_val key_v, data_v
  1206. key_v.mv_data = tk
  1207. key_v.mv_size = KLEN
  1208. _check(
  1209. lmdb.mdb_get(
  1210. self.txn, self.get_dbi('t:st'), &key_v, &data_v
  1211. ),
  1212. f'Error getting data for key \'{tk[0]}\'.')
  1213. data.addr = data_v.mv_data
  1214. data.sz = data_v.mv_size
  1215. #logger.info('Found term: {}'.format(buffer_dump(data)))
  1216. cdef object from_key(self, const Key tk):
  1217. """
  1218. Convert a single key into one term.
  1219. :param Key key: The key to be converted.
  1220. """
  1221. cdef Buffer pk_t
  1222. logger.info(f'Looking up key: {tk[0]}')
  1223. self.lookup_term(tk, &pk_t)
  1224. logger.info(f'Serialized term found: {buffer_dump(&pk_t)}')
  1225. # TODO Make Term a class and return that.
  1226. return deserialize_to_rdflib(&pk_t)
  1227. cdef inline KeyIdx _to_key_idx(self, term):
  1228. """
  1229. Convert a triple, quad or term into a key index (bare number).
  1230. The key is the checksum of the serialized object, therefore unique for
  1231. that object.
  1232. :param rdflib.Term term: An RDFLib term (URIRef, BNode, Literal).
  1233. :param Key key: Pointer to the key that will be produced.
  1234. :rtype: void
  1235. """
  1236. cdef:
  1237. Hash128 thash
  1238. Buffer pk_t
  1239. serialize_from_rdflib(term, &pk_t)
  1240. hash128(&pk_t, &thash)
  1241. #logger.debug('Hash to search for: {}'.format(thash[: HLEN]))
  1242. key_v.mv_data = thash
  1243. key_v.mv_size = HLEN
  1244. dbi = self.get_dbi('th:t')
  1245. #logger.debug(f'DBI: {dbi}')
  1246. _check(lmdb.mdb_get(self.txn, dbi, &key_v, &data_v))
  1247. #logger.debug('Found key: {}'.format((<Key>data_v.mv_data)[: KLEN]))
  1248. return (<Key>data_v.mv_data)[0]
  1249. cdef KeyIdx _append(
  1250. self, Buffer *value,
  1251. unsigned char *dblabel=b'', lmdb.MDB_txn *txn=NULL,
  1252. unsigned int flags=0
  1253. ):
  1254. """
  1255. Append one or more keys and values to the end of a database.
  1256. :param lmdb.Cursor cur: The write cursor to act on.
  1257. :param list(bytes) values: Value(s) to append.
  1258. :rtype: KeyIdx
  1259. :return: Index of key inserted.
  1260. """
  1261. cdef:
  1262. lmdb.MDB_cursor *cur
  1263. KeyIdx new_idx
  1264. Key key
  1265. lmdb.MDB_val key_v, data_v
  1266. if txn is NULL:
  1267. txn = self.txn
  1268. cur = self._cur_open(dblabel, txn=txn)
  1269. try:
  1270. _check(lmdb.mdb_cursor_get(cur, &key_v, NULL, lmdb.MDB_LAST))
  1271. except KeyNotFoundError:
  1272. logger.debug('First key inserted here.')
  1273. new_idx = 0
  1274. else:
  1275. new_idx = (<Key>key_v.mv_data)[0] + 1
  1276. logger.debug(f'New index value: {new_idx}')
  1277. finally:
  1278. #pass
  1279. self._cur_close(cur)
  1280. key = [new_idx]
  1281. key_v.mv_data = key
  1282. logger.debug(f'Key: {key[0]}')
  1283. logger.debug(f'Key addr: 0x{<size_t>key:02x}')
  1284. logger.debug('Key data inserted: {}'.format((<unsigned char*>key_v.mv_data)[:KLEN]))
  1285. key_v.mv_size = KLEN
  1286. data_v.mv_data = value.addr
  1287. data_v.mv_size = value.sz
  1288. logger.info('Appending value {} to db {} with key: {}'.format(
  1289. buffer_dump(value), dblabel.decode(), key[0]))
  1290. #logger.debug('data size: {}'.format(data_v.mv_size))
  1291. lmdb.mdb_put(
  1292. txn, self.get_dbi(dblabel), &key_v, &data_v,
  1293. flags | lmdb.MDB_APPEND)
  1294. return new_idx
  1295. #cdef inline KeyIdx bytes_to_idx(self, const unsigned char* bs):
  1296. # """
  1297. # Convert a byte string as stored in LMDB to a size_t key index.
  1298. # TODO Force big endian?
  1299. # """
  1300. # cdef KeyIdx ret
  1301. # memcpy(&ret, bs, KLEN)
  1302. # return ret
  1303. #cdef inline unsigned char* idx_to_bytes(KeyIdx idx):
  1304. # """
  1305. # Convert a size_t key index to bytes.
  1306. # TODO Force big endian?
  1307. # """
  1308. # cdef unsigned char* ret
  1309. # memcpy(&ret, idx, KLEN)
  1310. # return ret