lmdb_store.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. import hashlib
  2. import logging
  3. import os
  4. from contextlib import ContextDecorator, ExitStack
  5. from os import makedirs
  6. from os.path import abspath
  7. from urllib.request import pathname2url
  8. from rdflib import Graph, Namespace, URIRef, Variable
  9. from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as RDFLIB_DEFAULT_GRAPH_URI
  10. from rdflib.store import Store, VALID_STORE, NO_STORE
  11. from lakesuperior import env
  12. from lakesuperior.store.ldp_rs.lmdb_triplestore import LmdbTriplestore
  13. logger = logging.getLogger(__name__)
  14. class LmdbStore(LmdbTriplestore, Store):
  15. """
  16. LMDB-backed store.
  17. This is an implementation of the RDFLib Store interface:
  18. https://github.com/RDFLib/rdflib/blob/master/rdflib/store.py
  19. Handles the interaction with a LMDB store and builds an abstraction layer
  20. for triples.
  21. This store class uses two LMDB environments (i.e. two files): one for the
  22. main (preservation-worthy) data and the other for the index data which
  23. can be rebuilt from the main database.
  24. There are 4 main data sets (preservation worthy data):
  25. - ``t:st`` (term key: serialized term; 1:1)
  26. - ``spo:c`` (joined S, P, O keys: context key; dupsort, dupfixed)
  27. - ``c:`` (context keys only, values are the empty bytestring; 1:1)
  28. - ``pfx:ns`` (prefix: pickled namespace; 1:1)
  29. And 6 indices to optimize lookup for all possible bound/unbound term
  30. combination in a triple:
  31. - ``th:t`` (term hash: term key; 1:1)
  32. - ``s:po`` (S key: joined P, O keys; dupsort, dupfixed)
  33. - ``p:so`` (P key: joined S, O keys; dupsort, dupfixed)
  34. - ``o:sp`` (O key: joined S, P keys; dupsort, dupfixed)
  35. - ``c:spo`` (context → triple association; dupsort, dupfixed)
  36. - ``ns:pfx`` (pickled namespace: prefix; 1:1)
  37. The default graph is defined in
  38. :data:`rdflib.graph.RDFLIB_DEFAULT_GRAPH_URI`. Adding
  39. triples without context will add to this graph. Looking up triples without
  40. context (also in a SPARQL query) will look in the union graph instead of
  41. in the default graph. Also, removing triples without specifying a context
  42. will remove triples from all contexts.
  43. """
  44. context_aware = True
  45. # This is a hassle to maintain for no apparent gain. If some use is devised
  46. # in the future, it may be revised.
  47. formula_aware = False
  48. graph_aware = True
  49. transaction_aware = True
  50. def __init__(self, path, identifier=None, create=True):
  51. LmdbTriplestore.__init__(self, path, open_env=True, create=create)
  52. self.identifier = identifier or URIRef(pathname2url(abspath(path)))
  53. def __len__(self, context=None):
  54. """
  55. Return length of the dataset.
  56. :param context: Context to restrict count to.
  57. :type context: rdflib.URIRef or rdflib.Graph
  58. """
  59. context = self._normalize_context(context)
  60. return self._len(context)
  61. # RDFLib DB management API
  62. def open(self, configuration=None, create=True):
  63. """
  64. Open the store environment.
  65. :param str configuration: If not specified on init, indicate the path
  66. to use for the store.
  67. :param bool create: Create the file and folder structure for the
  68. store environment.
  69. """
  70. if not self.is_open:
  71. #logger.debug('Store is not open.')
  72. try:
  73. self.open_env(create)
  74. except:
  75. return NO_STORE
  76. self._open = True
  77. return VALID_STORE
  78. def close(self, commit_pending_transaction=False):
  79. """
  80. Close the database connection.
  81. Do this at server shutdown.
  82. """
  83. self.close_env(commit_pending_transaction)
  84. # RDFLib triple methods.
  85. def remove(self, triple_pattern, context=None):
  86. """
  87. Remove triples by a pattern.
  88. :param tuple triple_pattern: 3-tuple of
  89. either RDF terms or None, indicating the triple(s) to be removed.
  90. ``None`` is used as a wildcard.
  91. :param context: Context to remove the triples from. If None (the
  92. default) the matching triples are removed from all contexts.
  93. :type context: rdflib.term.Identifier or None
  94. """
  95. #logger.debug('Removing triples by pattern: {} on context: {}'.format(
  96. # triple_pattern, context))
  97. context = self._normalize_context(context)
  98. self._remove(triple_pattern, context)
  99. def bind(self, prefix, namespace):
  100. """
  101. Bind a prefix to a namespace.
  102. :param str prefix: Namespace prefix.
  103. :param rdflib.URIRef namespace: Fully qualified URI of namespace.
  104. """
  105. prefix = prefix.encode()
  106. namespace = namespace.encode()
  107. if self.is_txn_rw:
  108. self.put(prefix, namespace, 'pfx:ns')
  109. self.put(namespace, prefix, 'ns:pfx')
  110. else:
  111. #logger.debug('Opening RW transaction.')
  112. with self.txn_ctx(write=True) as wtxn:
  113. self.put(prefix, namespace, 'pfx:ns')
  114. self.put(namespace, prefix, 'ns:pfx')
  115. def namespace(self, prefix):
  116. """
  117. Get the namespace for a prefix.
  118. :param str prefix: Namespace prefix.
  119. """
  120. ns = self.get_data(prefix.encode(), 'pfx:ns')
  121. return Namespace(ns.decode()) if ns is not None else None
  122. def prefix(self, namespace):
  123. """
  124. Get the prefix associated with a namespace.
  125. **Note:** A namespace can be only bound to one prefix in this
  126. implementation.
  127. :param rdflib.Namespace namespace: Fully qualified namespace.
  128. :rtype: str or None
  129. """
  130. prefix = self.get_data(str(namespace).encode(), 'ns:pfx')
  131. return prefix.decode() if prefix is not None else None
  132. def namespaces(self):
  133. """Get an iterator of all prefix: namespace bindings.
  134. :rtype: Iterator(tuple(str, rdflib.Namespace))
  135. """
  136. for pfx, ns in self.all_namespaces():
  137. yield (pfx, Namespace(ns))
  138. def remove_graph(self, graph):
  139. """
  140. Remove all triples from graph and the graph itself.
  141. :param rdflib.URIRef graph: URI of the named graph to remove.
  142. """
  143. if isinstance(graph, Graph):
  144. graph = graph.identifier
  145. self._remove_graph(graph)
  146. ## PRIVATE METHODS ##