query.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. import logging
  2. from io import BytesIO
  3. from rdflib import URIRef
  4. from lakesuperior import env
  5. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  6. from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
  7. from lakesuperior.store.ldp_rs.lmdb_store import LmdbStore, TxnManager
  8. logger = logging.getLogger(__name__)
  9. rdfly = env.app_globals.rdfly
  10. rdf_store = env.app_globals.rdf_store
  11. operands = ('_id', '=', '!=', '<', '>', '<=', '>=')
  12. """
  13. Available term comparators for term query.
  14. The ``_uri`` term is used to match URIRef terms, all other comparators are
  15. used against literals.
  16. """
  17. def triple_match(s=None, p=None, o=None, return_full=False):
  18. """
  19. Query store by matching triple patterns.
  20. Any of the ``s``, ``p`` or ``o`` terms can be None to represent a wildcard.
  21. This method is for triple matching only; it does not allow to query, nor
  22. exposes to the caller, any context.
  23. :param rdflib.term.Identifier s: Subject term.
  24. :param rdflib.term.Identifier p: Predicate term.
  25. :param rdflib.term.Identifier o: Object term.
  26. :param bool return_full: if ``False`` (the default), the returned values
  27. in the set are the URIs of the resources found. If True, the full set
  28. of matching triples is returned.
  29. :rtype: set(tuple(rdflib.term.Identifier){3}) or set(rdflib.URIRef)
  30. :return: Matching resource URIs if ``return_full`` is false, or
  31. matching triples otherwise.
  32. """
  33. with TxnManager(rdf_store) as txn:
  34. matches = rdf_store.triples((s, p, o), None)
  35. # Strip contexts and de-duplicate.
  36. qres = (
  37. {match[0] for match in matches} if return_full
  38. else {match[0][0] for match in matches})
  39. return qres
  40. def term_query(terms, or_logic=False):
  41. """
  42. Query resources by predicates, comparators and values.
  43. Comparators can be against literal or URIRef objects. For a list of
  44. comparators and their meanings, see the documentation and source for
  45. :py:data:`~lakesuperior.api.query.operands`.
  46. :param list(tuple{3}) terms: List of 3-tuples containing:
  47. - Predicate URI (rdflib.URIRef)
  48. - Comparator value (str)
  49. - Value to compare to (rdflib.URIRef or rdflib.Literal or str)
  50. :param bool or_logic: Whether to concatenate multiple query terms with OR
  51. logic (uses SPARQL ``UNION`` statements). The default is False (i.e.
  52. terms are concatenated as standard SPARQL statements).
  53. """
  54. qry_term_ls = []
  55. for i, term in enumerate(terms):
  56. if term['op'] not in operands:
  57. raise ValueError('Not a valid operand: {}'.format(term['op']))
  58. if term['op'] == '_id':
  59. qry_term = '?s {} {} .'.format(term['pred'], term['val'])
  60. else:
  61. oname = '?o_{}'.format(i)
  62. qry_term = '?s {0} {1}\nFILTER (str({1}) {2} "{3}") .'.format(
  63. term['pred'], oname, term['op'], term['val'])
  64. qry_term_ls.append(qry_term)
  65. if or_logic:
  66. qry_terms = '{\n' + '\n} UNION {\n'.join(qry_term_ls) + '\n}'
  67. else:
  68. qry_terms = '\n'.join(qry_term_ls)
  69. qry_str = '''
  70. SELECT ?s WHERE {{
  71. {}
  72. }}
  73. '''.format(qry_terms)
  74. logger.debug('Query: {}'.format(qry_str))
  75. with TxnManager(rdf_store) as txn:
  76. qres = rdfly.raw_query(qry_str)
  77. return {row[0] for row in qres}
  78. def fulltext_lookup(pattern):
  79. """
  80. Look up one term by partial match.
  81. *TODO: reserved for future use. A `Whoosh
  82. <https://whoosh.readthedocs.io/>`__ or similar full-text index is
  83. necessary for this.*
  84. """
  85. pass
  86. def sparql_query(qry_str, fmt):
  87. """
  88. Send a SPARQL query to the triplestore.
  89. :param str qry_str: SPARQL query string. SPARQL 1.1 Query Language
  90. (https://www.w3.org/TR/sparql11-query/) is supported.
  91. :param str fmt: Serialization format. This varies depending on the
  92. query type (SELECT, ASK, CONSTRUCT, etc.). [TODO Add reference to
  93. RDFLib serialization formats]
  94. :rtype: BytesIO
  95. :return: Serialized SPARQL results.
  96. """
  97. with TxnManager(rdf_store) as txn:
  98. qres = rdfly.raw_query(qry_str)
  99. out_stream = BytesIO(qres.serialize(format=fmt))
  100. return out_stream