toolbox.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. import logging
  2. import pickle
  3. import re
  4. from collections import defaultdict
  5. from hashlib import sha1
  6. from flask import g
  7. from rdflib import Graph
  8. from rdflib.term import URIRef, Variable
  9. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  10. from lakesuperior.globals import ROOT_RSRC_URI
  11. logger = logging.getLogger(__name__)
  12. class Toolbox:
  13. '''
  14. Utility class to translate and generate strings and other objects.
  15. '''
  16. def replace_term_domain(self, term, search, replace):
  17. '''
  18. Replace the domain of a term.
  19. :param rdflib.URIRef term: The term (URI) to change.
  20. :param str search: Domain string to replace.
  21. :param str replace: Domain string to use for replacement.
  22. :rtype: rdflib.URIRef
  23. '''
  24. s = str(term)
  25. if s.startswith(search):
  26. s = s.replace(search, replace)
  27. return URIRef(s)
  28. def uid_to_uri(self, uid):
  29. '''Convert a UID to a URI.
  30. :rtype: rdflib.URIRef
  31. '''
  32. return URIRef(g.webroot + uid)
  33. def uri_to_uid(self, uri):
  34. '''Convert an absolute URI (internal or external) to a UID.
  35. :rtype: str
  36. '''
  37. if uri.startswith(nsc['fcres']):
  38. return str(uri).replace(nsc['fcres'], '')
  39. else:
  40. return '/' + str(uri).replace(g.webroot, '').strip('/')
  41. def localize_uri_string(self, s):
  42. '''Convert URIs into URNs in a string using the application base URI.
  43. :param str: s Input string.
  44. :rtype: str
  45. '''
  46. if s.strip('/') == g.webroot:
  47. return str(ROOT_RSRC_URI)
  48. else:
  49. return s.rstrip('/').replace(
  50. g.webroot, str(nsc['fcres']))
  51. def localize_term(self, uri):
  52. '''
  53. Localize an individual term.
  54. :param rdflib.URIRef: urn Input URI.
  55. :rtype: rdflib.URIRef
  56. '''
  57. return URIRef(self.localize_uri_string(str(uri)))
  58. def localize_triple(self, trp):
  59. '''
  60. Localize terms in a triple.
  61. :param tuple(rdflib.URIRef) trp: The triple to be converted
  62. :rtype: tuple(rdflib.URIRef)
  63. '''
  64. s, p, o = trp
  65. if s.startswith(g.webroot):
  66. s = self.localize_term(s)
  67. if o.startswith(g.webroot):
  68. o = self.localize_term(o)
  69. return s, p, o
  70. def localize_graph(self, gr):
  71. '''
  72. Localize a graph.
  73. '''
  74. l_id = self.localize_term(gr.identifier)
  75. l_gr = Graph(identifier=l_id)
  76. for trp in gr:
  77. l_gr.add(self.localize_triple(trp))
  78. return l_gr
  79. def localize_payload(self, data):
  80. '''
  81. Localize an RDF stream with domain-specific URIs.
  82. :param bytes data: Binary RDF data.
  83. :rtype: bytes
  84. '''
  85. return data.replace(
  86. (g.webroot + '/').encode('utf-8'),
  87. (nsc['fcres'] + '/').encode('utf-8')
  88. ).replace(
  89. g.webroot.encode('utf-8'),
  90. (nsc['fcres'] + '/').encode('utf-8')
  91. )
  92. def localize_ext_str(self, s, urn):
  93. '''
  94. Convert global URIs to local in a SPARQL or RDF string.
  95. Also replace empty URIs (`<>`) with a fixed local URN and take care
  96. of fragments and relative URIs.
  97. This is a 3-pass replacement. First, global URIs whose webroot matches
  98. the application ones are replaced with internal URIs. Then, relative
  99. URIs are converted to absolute using the internal URI as the base;
  100. finally, the root node is appropriately addressed.
  101. '''
  102. esc_webroot = g.webroot.replace('/', '\\/')
  103. #loc_ptn = r'<({}\/?)?(.*?)?(\?.*?)?(#.*?)?>'.format(esc_webroot)
  104. loc_ptn1 = r'<{}\/?(.*?)>'.format(esc_webroot)
  105. loc_sub1 = '<{}/\\1>'.format(nsc['fcres'])
  106. s1 = re.sub(loc_ptn1, loc_sub1, s)
  107. loc_ptn2 = r'<([#?].*?)?>'
  108. loc_sub2 = '<{}\\1>'.format(urn)
  109. s2 = re.sub(loc_ptn2, loc_sub2, s1)
  110. loc_ptn3 = r'<{}([#?].*?)?>'.format(nsc['fcres'])
  111. loc_sub3 = '<{}\\1>'.format(ROOT_RSRC_URI)
  112. s3 = re.sub(loc_ptn3, loc_sub3, s2)
  113. return s3
  114. def globalize_string(self, s):
  115. '''Convert URNs into URIs in a string using the application base URI.
  116. :param string s: Input string.
  117. :rtype: string
  118. '''
  119. return s.replace(str(nsc['fcres']), g.webroot)
  120. def globalize_term(self, urn):
  121. '''
  122. Convert an URN into an URI using the application base URI.
  123. :param rdflib.URIRef urn: Input URN.
  124. :rtype: rdflib.URIRef
  125. '''
  126. return URIRef(self.globalize_string(str(urn)))
  127. def globalize_triple(self, trp):
  128. '''
  129. Globalize terms in a triple.
  130. :param tuple(rdflib.URIRef) trp: The triple to be converted
  131. :rtype: tuple(rdflib.URIRef)
  132. '''
  133. s, p, o = trp
  134. if s.startswith(nsc['fcres']):
  135. s = self.globalize_term(s)
  136. if o.startswith(nsc['fcres']):
  137. o = self.globalize_term(o)
  138. return s, p, o
  139. def globalize_imr(self, imr):
  140. '''
  141. Globalize an Imr.
  142. :rtype: rdflib.Graph
  143. '''
  144. g_gr = Graph(identifier=self.globalize_term(imr.uri))
  145. for trp in imr:
  146. g_gr.add(self.globalize_triple(trp))
  147. return g_gr
  148. def globalize_graph(self, gr):
  149. '''
  150. Globalize a graph.
  151. '''
  152. g_id = self.globalize_term(gr.identifier)
  153. g_gr = Graph(identifier=g_id)
  154. for trp in gr:
  155. g_gr.add(self.globalize_triple(trp))
  156. return g_gr
  157. def globalize_rsrc(self, rsrc):
  158. '''
  159. Globalize a resource.
  160. '''
  161. gr = rsrc.graph
  162. urn = rsrc.identifier
  163. global_gr = self.globalize_graph(gr)
  164. global_uri = self.globalize_term(urn)
  165. return global_gr.resource(global_uri)
  166. def parse_rfc7240(self, h_str):
  167. '''
  168. Parse ``Prefer`` header as per https://tools.ietf.org/html/rfc7240
  169. The ``cgi.parse_header`` standard method does not work with all
  170. possible use cases for this header.
  171. :param str h_str: The header(s) as a comma-separated list of Prefer
  172. statements, excluding the ``Prefer:`` token.
  173. '''
  174. parsed_hdr = defaultdict(dict)
  175. # Split up headers by comma
  176. hdr_list = [ x.strip() for x in h_str.split(',') ]
  177. for hdr in hdr_list:
  178. parsed_pref = defaultdict(dict)
  179. # Split up tokens by semicolon
  180. token_list = [ token.strip() for token in hdr.split(';') ]
  181. prefer_token = token_list.pop(0).split('=')
  182. prefer_name = prefer_token[0]
  183. # If preference has a '=', it has a value, else none.
  184. if len(prefer_token)>1:
  185. parsed_pref['value'] = prefer_token[1].strip('"')
  186. for param_token in token_list:
  187. # If the token list had a ';' the preference has a parameter.
  188. param_parts = [ prm.strip().strip('"') \
  189. for prm in param_token.split('=') ]
  190. param_value = param_parts[1] if len(param_parts) > 1 else None
  191. parsed_pref['parameters'][param_parts[0]] = param_value
  192. parsed_hdr[prefer_name] = parsed_pref
  193. return parsed_hdr
  194. def split_uuid(self, uuid):
  195. '''
  196. Split a UID into pairtree segments. This mimics FCREPO4 behavior.
  197. :param str uuid: UUID to split.
  198. :rtype: str
  199. '''
  200. path = '{}/{}/{}/{}/{}'.format(uuid[:2], uuid[2:4],
  201. uuid[4:6], uuid[6:8], uuid)
  202. return path