toolbox.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. import logging
  2. import pickle
  3. from collections import defaultdict
  4. from hashlib import sha1
  5. from flask import g
  6. from rdflib.term import URIRef, Variable
  7. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  8. class Toolbox:
  9. '''
  10. Utility class to translate and generate strings and other objects.
  11. '''
  12. _logger = logging.getLogger(__name__)
  13. ROOT_NODE_URN = nsc['fcsystem'].root
  14. def replace_term_domain(self, term, search, replace):
  15. '''
  16. Replace the domain of a term.
  17. @param term (URIRef) The term (URI) to change.
  18. @param search (string) Domain string to replace.
  19. @param replace (string) Domain string to use for replacement.
  20. @return URIRef
  21. '''
  22. s = str(term)
  23. if s.startswith(search):
  24. s = s.replace(search, replace)
  25. return URIRef(s)
  26. def uuid_to_uri(self, uuid):
  27. '''Convert a UUID to a URI.
  28. @return URIRef
  29. '''
  30. uri = '{}/{}'.format(g.webroot, uuid) if uuid else g.webroot
  31. return URIRef(uri)
  32. def uri_to_uuid(self, uri):
  33. '''Convert an absolute URI (internal or external) to a UUID.
  34. @return string
  35. '''
  36. if uri == self.ROOT_NODE_URN:
  37. return None
  38. elif uri.startswith(nsc['fcres']):
  39. return str(uri).replace(nsc['fcres'], '')
  40. else:
  41. return str(uri).replace(g.webroot, '').strip('/')
  42. def localize_string(self, s):
  43. '''Convert URIs into URNs in a string using the application base URI.
  44. @param string s Input string.
  45. @return string
  46. '''
  47. if s.strip('/') == g.webroot:
  48. return str(self.ROOT_NODE_URN)
  49. else:
  50. return s.strip('/').replace(g.webroot+'/', str(nsc['fcres']))
  51. def localize_term(self, uri):
  52. '''
  53. Convert an URI into an URN.
  54. @param rdflib.term.URIRef urn Input URI.
  55. @return rdflib.term.URIRef
  56. '''
  57. return URIRef(self.localize_string(str(uri)))
  58. def localize_graph(self, gr):
  59. '''
  60. Locbalize a graph.
  61. '''
  62. q = '''
  63. CONSTRUCT {{ ?s ?p ?o . }} WHERE {{
  64. {{
  65. ?s ?p ?o .
  66. FILTER (
  67. STRSTARTS(str(?s), "{0}")
  68. ||
  69. STRSTARTS(str(?o), "{0}")
  70. ||
  71. STRSTARTS(str(?s), "{0}/")
  72. ||
  73. STRSTARTS(str(?o), "{0}/")
  74. ) .
  75. }}
  76. }}'''.format(g.webroot)
  77. flt_gr = gr.query(q)
  78. for t in flt_gr:
  79. local_s = self.localize_term(t[0])
  80. local_o = self.localize_term(t[2]) \
  81. if isinstance(t[2], URIRef) \
  82. else t[2]
  83. gr.remove(t)
  84. gr.add((local_s, t[1], local_o))
  85. return gr
  86. def globalize_string(self, s):
  87. '''Convert URNs into URIs in a string using the application base URI.
  88. @param string s Input string.
  89. @return string
  90. '''
  91. return s.replace(str(nsc['fcres']), g.webroot + '/')
  92. def globalize_term(self, urn):
  93. '''
  94. Convert an URN into an URI using the application base URI.
  95. @param rdflib.term.URIRef urn Input URN.
  96. @return rdflib.term.URIRef
  97. '''
  98. if urn == self.ROOT_NODE_URN:
  99. urn = nsc['fcres']
  100. return URIRef(self.globalize_string(str(urn)))
  101. def globalize_graph(self, gr):
  102. '''
  103. Globalize a graph.
  104. '''
  105. q = '''
  106. CONSTRUCT {{ ?s ?p ?o . }} WHERE {{
  107. {{
  108. ?s ?p ?o .
  109. FILTER (
  110. STRSTARTS(str(?s), "{0}")
  111. ||
  112. STRSTARTS(str(?o), "{0}")
  113. ||
  114. STRSTARTS(str(?s), "{1}")
  115. ||
  116. STRSTARTS(str(?o), "{1}")
  117. ) .
  118. }}
  119. }}'''.format(nsc['fcres'], self.ROOT_NODE_URN)
  120. flt_gr = gr.query(q)
  121. for t in flt_gr:
  122. global_s = self.globalize_term(t[0])
  123. global_o = self.globalize_term(t[2]) \
  124. if isinstance(t[2], URIRef) \
  125. else t[2]
  126. gr.remove(t)
  127. gr.add((global_s, t[1], global_o))
  128. return gr
  129. def globalize_rsrc(self, rsrc):
  130. '''
  131. Globalize a resource.
  132. '''
  133. gr = rsrc.graph
  134. urn = rsrc.identifier
  135. global_gr = self.globalize_graph(gr)
  136. global_uri = self.globalize_term(urn)
  137. return global_gr.resource(global_uri)
  138. def parse_rfc7240(self, h_str):
  139. '''
  140. Parse `Prefer` header as per https://tools.ietf.org/html/rfc7240
  141. The `cgi.parse_header` standard method does not work with all possible
  142. use cases for this header.
  143. @param h_str (string) The header(s) as a comma-separated list of Prefer
  144. statements, excluding the `Prefer: ` token.
  145. '''
  146. parsed_hdr = defaultdict(dict)
  147. # Split up headers by comma
  148. hdr_list = [ x.strip() for x in h_str.split(',') ]
  149. for hdr in hdr_list:
  150. parsed_pref = defaultdict(dict)
  151. # Split up tokens by semicolon
  152. token_list = [ token.strip() for token in hdr.split(';') ]
  153. prefer_token = token_list.pop(0).split('=')
  154. prefer_name = prefer_token[0]
  155. # If preference has a '=', it has a value, else none.
  156. if len(prefer_token)>1:
  157. parsed_pref['value'] = prefer_token[1].strip('"')
  158. for param_token in token_list:
  159. # If the token list had a ';' the preference has a parameter.
  160. print('Param token: {}'.format(param_token))
  161. param_parts = [ prm.strip().strip('"') \
  162. for prm in param_token.split('=') ]
  163. param_value = param_parts[1] if len(param_parts) > 1 else None
  164. parsed_pref['parameters'][param_parts[0]] = param_value
  165. parsed_hdr[prefer_name] = parsed_pref
  166. return parsed_hdr
  167. def rdf_cksum(self, gr):
  168. '''
  169. Generate a checksum for a graph.
  170. This is not straightforward because a graph is derived from an
  171. unordered data structure (RDF).
  172. What this method does is ordering the graph by subject, predicate,
  173. object, then creating a pickle string and a checksum of it.
  174. N.B. The context of the triples is ignored, so isomorphic graphs would
  175. have the same checksum regardless of the context(s) they are found in.
  176. @TODO This can be later reworked to use a custom hashing algorithm.
  177. @param rdflib.Graph gr The graph to be hashed.
  178. @return string SHA1 checksum.
  179. '''
  180. # Remove the messageDigest property, which very likely reflects the
  181. # previous state of the resource.
  182. gr.remove((Variable('s'), nsc['premis'].messageDigest, Variable('o')))
  183. ord_gr = sorted(list(gr), key=lambda x : (x[0], x[1], x[2]))
  184. hash = sha1(pickle.dumps(ord_gr)).hexdigest()
  185. return hash
  186. def split_uuid(self, uuid):
  187. '''
  188. Split a UUID into pairtree segments. This mimics FCREPO4 behavior.
  189. '''
  190. path = '{}/{}/{}/{}/{}'.format(uuid[:2], uuid[2:4],
  191. uuid[4:6], uuid[6:8], uuid)
  192. return path