toolbox.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. import logging
  2. import os
  3. import re
  4. from collections import defaultdict
  5. from hashlib import sha1
  6. from rdflib import Graph
  7. from rdflib.term import URIRef, Variable
  8. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  9. from lakesuperior.store.ldp_rs import ROOT_RSRC_URI
  10. logger = logging.getLogger(__name__)
  11. __doc__ = ''' Utility to translate and generate strings and other objects. '''
  12. def fsize_fmt(num, suffix='b'):
  13. """
  14. Format an integer into 1024-block file size format.
  15. Adapted from Python 2 code on
  16. https://stackoverflow.com/a/1094933/3758232
  17. :param int num: Size value in bytes.
  18. :param str suffix: Suffix label (defaults to ``b``).
  19. :rtype: str
  20. :return: Formatted size to largest fitting unit.
  21. """
  22. for unit in ['','K','M','G','T','P','E','Z']:
  23. if abs(num) < 1024.0:
  24. return f'{num:3.1f} {unit}{suffix}'
  25. num /= 1024.0
  26. return f'{num:.1f} Y{suffix}'
  27. def get_tree_size(path, follow_symlinks=True):
  28. """
  29. Return total size of files in given path and subdirs.
  30. Ripped from https://www.python.org/dev/peps/pep-0471/
  31. """
  32. total = 0
  33. for entry in os.scandir(path):
  34. if entry.is_dir(follow_symlinks=follow_symlinks):
  35. total += get_tree_size(entry.path)
  36. else:
  37. total += entry.stat(
  38. follow_symlinks=follow_symlinks
  39. ).st_size
  40. return total
  41. def replace_term_domain(term, search, replace):
  42. '''
  43. Replace the domain of a term.
  44. :param rdflib.URIRef term: The term (URI) to change.
  45. :param str search: Domain string to replace.
  46. :param str replace: Domain string to use for replacement.
  47. :rtype: rdflib.URIRef
  48. '''
  49. s = str(term)
  50. if s.startswith(search):
  51. s = s.replace(search, replace)
  52. return URIRef(s)
  53. def parse_rfc7240(h_str):
  54. '''
  55. Parse ``Prefer`` header as per https://tools.ietf.org/html/rfc7240
  56. The ``cgi.parse_header`` standard method does not work with all
  57. possible use cases for this header.
  58. :param str h_str: The header(s) as a comma-separated list of Prefer
  59. statements, excluding the ``Prefer:`` token.
  60. '''
  61. parsed_hdr = defaultdict(dict)
  62. # Split up headers by comma
  63. hdr_list = [ x.strip() for x in h_str.split(',') ]
  64. for hdr in hdr_list:
  65. parsed_pref = defaultdict(dict)
  66. # Split up tokens by semicolon
  67. token_list = [ token.strip() for token in hdr.split(';') ]
  68. prefer_token = token_list.pop(0).split('=')
  69. prefer_name = prefer_token[0]
  70. # If preference has a '=', it has a value, else none.
  71. if len(prefer_token)>1:
  72. parsed_pref['value'] = prefer_token[1].strip('"')
  73. for param_token in token_list:
  74. # If the token list had a ';' the preference has a parameter.
  75. param_parts = [ prm.strip().strip('"') \
  76. for prm in param_token.split('=') ]
  77. param_value = param_parts[1] if len(param_parts) > 1 else None
  78. parsed_pref['parameters'][param_parts[0]] = param_value
  79. parsed_hdr[prefer_name] = parsed_pref
  80. return parsed_hdr
  81. def split_uuid(uuid):
  82. '''
  83. Split a UID into pairtree segments. This mimics FCREPO4 behavior.
  84. :param str uuid: UUID to split.
  85. :rtype: str
  86. '''
  87. path = '{}/{}/{}/{}/{}'.format(uuid[:2], uuid[2:4],
  88. uuid[4:6], uuid[6:8], uuid)
  89. return path
  90. def rel_uri_to_urn(uri, uid):
  91. """
  92. Convert a URIRef with a relative location (e.g. ``<>``) to an URN.
  93. :param URIRef uri: The URI to convert.
  94. :param str uid: Resource UID that the URI should be relative to.
  95. :return: Converted URN if the input is relative, otherwise the unchanged
  96. URI.
  97. :rtype: URIRef
  98. """
  99. # FIXME This only accounts for empty URIs, not all relative URIs.
  100. return nsc['fcres'][uid] if str(uri) == '' else uri
  101. #return URIRef(
  102. # re.sub('<#([^>]+)>', f'<{base_uri}#\\1>', str(uri))
  103. # .replace('<>', f'<{base_uri}>'))
  104. def rel_uri_to_urn_string(string, uid):
  105. """
  106. Convert relative URIs in a SPARQL or RDF string to internal URNs.
  107. :param str string: Input string.
  108. :param str uid Resource UID to build the base URN from.
  109. :rtype: str
  110. :return: Modified string.
  111. """
  112. urn = str(nsc['fcres'][uid])
  113. return (
  114. re.sub('<#([^>]+)>', f'<{urn}#\\1>', string).replace('<>', f'<{urn}>')
  115. )
  116. class RequestUtils:
  117. """
  118. Utilities that require access to an HTTP request context.
  119. Initialize this within a Flask request context.
  120. """
  121. def __init__(self):
  122. from flask import g
  123. self.webroot = g.webroot
  124. def uid_to_uri(self, uid):
  125. '''Convert a UID to a URI.
  126. :rtype: rdflib.URIRef
  127. '''
  128. return URIRef(self.webroot + uid)
  129. def uri_to_uid(self, uri):
  130. '''Convert an absolute URI (internal or external) to a UID.
  131. :rtype: str
  132. '''
  133. if uri.startswith(nsc['fcres']):
  134. return str(uri).replace(nsc['fcres'], '')
  135. else:
  136. return '/' + str(uri).replace(self.webroot, '').strip('/')
  137. def localize_uri_string(self, s):
  138. '''Convert URIs into URNs in a string using the application base URI.
  139. :param str: s Input string.
  140. :rtype: str
  141. '''
  142. if s.strip('/') == self.webroot:
  143. return str(ROOT_RSRC_URI)
  144. else:
  145. return s.rstrip('/').replace(
  146. self.webroot, str(nsc['fcres']))
  147. def localize_term(self, uri):
  148. '''
  149. Localize an individual term.
  150. :param rdflib.URIRef: urn Input URI.
  151. :rtype: rdflib.URIRef
  152. '''
  153. return URIRef(self.localize_uri_string(str(uri)))
  154. def localize_triple(self, trp):
  155. '''
  156. Localize terms in a triple.
  157. :param tuple(rdflib.URIRef) trp: The triple to be converted
  158. :rtype: tuple(rdflib.URIRef)
  159. '''
  160. s, p, o = trp
  161. if s.startswith(self.webroot):
  162. s = self.localize_term(s)
  163. if o.startswith(self.webroot):
  164. o = self.localize_term(o)
  165. return s, p, o
  166. def localize_graph(self, gr):
  167. '''
  168. Localize a graph.
  169. '''
  170. l_id = self.localize_term(gr.identifier)
  171. l_gr = Graph(identifier=l_id)
  172. for trp in gr:
  173. l_gr.add(self.localize_triple(trp))
  174. return l_gr
  175. def localize_payload(self, data):
  176. '''
  177. Localize an RDF stream with domain-specific URIs.
  178. :param bytes data: Binary RDF data.
  179. :rtype: bytes
  180. '''
  181. return data.replace(
  182. (self.webroot + '/').encode('utf-8'),
  183. (nsc['fcres'] + '/').encode('utf-8')
  184. ).replace(
  185. self.webroot.encode('utf-8'),
  186. (nsc['fcres'] + '/').encode('utf-8')
  187. )
  188. def localize_ext_str(self, s, urn):
  189. '''
  190. Convert global URIs to local in a SPARQL or RDF string.
  191. Also replace empty URIs (`<>`) with a fixed local URN and take care
  192. of fragments and relative URIs.
  193. This is a 3-pass replacement. First, global URIs whose webroot matches
  194. the application ones are replaced with internal URIs. Then, relative
  195. URIs are converted to absolute using the internal URI as the base;
  196. finally, the root node is appropriately addressed.
  197. '''
  198. esc_webroot = self.webroot.replace('/', '\\/')
  199. #loc_ptn = r'<({}\/?)?(.*?)?(\?.*?)?(#.*?)?>'.format(esc_webroot)
  200. loc_ptn1 = r'<{}\/?(.*?)>'.format(esc_webroot)
  201. loc_sub1 = '<{}/\\1>'.format(nsc['fcres'])
  202. s1 = re.sub(loc_ptn1, loc_sub1, s)
  203. loc_ptn2 = r'<([#?].*?)?>'
  204. loc_sub2 = '<{}\\1>'.format(urn)
  205. s2 = re.sub(loc_ptn2, loc_sub2, s1)
  206. loc_ptn3 = r'<{}([#?].*?)?>'.format(nsc['fcres'])
  207. loc_sub3 = '<{}\\1>'.format(ROOT_RSRC_URI)
  208. s3 = re.sub(loc_ptn3, loc_sub3, s2)
  209. return s3
  210. def globalize_string(self, s):
  211. '''Convert URNs into URIs in a string using the application base URI.
  212. :param string s: Input string.
  213. :rtype: string
  214. '''
  215. return s.replace(str(nsc['fcres']), self.webroot)
  216. def globalize_term(self, urn):
  217. '''
  218. Convert an URN into an URI using the application base URI.
  219. :param rdflib.URIRef urn: Input URN.
  220. :rtype: rdflib.URIRef
  221. '''
  222. return URIRef(self.globalize_string(str(urn)))
  223. def globalize_triple(self, trp):
  224. '''
  225. Globalize terms in a triple.
  226. :param tuple(rdflib.URIRef) trp: The triple to be converted
  227. :rtype: tuple(rdflib.URIRef)
  228. '''
  229. s, p, o = trp
  230. if s.startswith(nsc['fcres']):
  231. s = self.globalize_term(s)
  232. if o.startswith(nsc['fcres']):
  233. o = self.globalize_term(o)
  234. return s, p, o
  235. def globalize_imr(self, imr):
  236. '''
  237. Globalize an Imr.
  238. :rtype: rdflib.Graph
  239. '''
  240. g_gr = Graph(identifier=self.globalize_term(imr.uri))
  241. for trp in imr:
  242. g_gr.add(self.globalize_triple(trp))
  243. return g_gr
  244. def globalize_graph(self, gr):
  245. '''
  246. Globalize a graph.
  247. '''
  248. g_id = self.globalize_term(gr.identifier)
  249. g_gr = Graph(identifier=g_id)
  250. for trp in gr:
  251. g_gr.add(self.globalize_triple(trp))
  252. return g_gr
  253. def globalize_rsrc(self, rsrc):
  254. '''
  255. Globalize a resource.
  256. '''
  257. gr = rsrc.graph
  258. urn = rsrc.identifier
  259. global_gr = self.globalize_graph(gr)
  260. global_uri = self.globalize_term(urn)
  261. return global_gr.resource(global_uri)