toolbox.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. import logging
  2. import pickle
  3. from collections import defaultdict
  4. from hashlib import sha1
  5. from flask import g
  6. from rdflib.term import URIRef, Variable
  7. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  8. class Toolbox:
  9. '''
  10. Utility class to translate and generate strings and other objects.
  11. '''
  12. _logger = logging.getLogger(__name__)
  13. ROOT_NODE_URN = nsc['fcsystem'].root
  14. def uuid_to_uri(self, uuid):
  15. '''Convert a UUID to a URI.
  16. @return URIRef
  17. '''
  18. uri = '{}/{}'.format(g.webroot, uuid) if uuid else g.webroot
  19. return URIRef(uri)
  20. def uri_to_uuid(self, uri):
  21. '''Convert an absolute URI (internal or external) to a UUID.
  22. @return string
  23. '''
  24. if uri == self.ROOT_NODE_URN:
  25. return None
  26. elif uri.startswith(nsc['fcres']):
  27. return str(uri).replace(nsc['fcres'], '')
  28. else:
  29. return str(uri).replace(g.webroot, '').strip('/')
  30. def localize_string(self, s):
  31. '''Convert URIs into URNs in a string using the application base URI.
  32. @param string s Input string.
  33. @return string
  34. '''
  35. if s.strip('/') == g.webroot:
  36. return str(self.ROOT_NODE_URN)
  37. else:
  38. return s.strip('/').replace(g.webroot+'/', str(nsc['fcres']))
  39. def localize_term(self, uri):
  40. '''
  41. Convert an URI into an URN.
  42. @param rdflib.term.URIRef urn Input URI.
  43. @return rdflib.term.URIRef
  44. '''
  45. return URIRef(self.localize_string(str(uri)))
  46. def localize_graph(self, gr):
  47. '''
  48. Locbalize a graph.
  49. '''
  50. q = '''
  51. CONSTRUCT {{ ?s ?p ?o . }} WHERE {{
  52. {{
  53. ?s ?p ?o .
  54. FILTER (
  55. STRSTARTS(str(?s), "{0}")
  56. ||
  57. STRSTARTS(str(?o), "{0}")
  58. ||
  59. STRSTARTS(str(?s), "{0}/")
  60. ||
  61. STRSTARTS(str(?o), "{0}/")
  62. ) .
  63. }}
  64. }}'''.format(g.webroot)
  65. flt_gr = gr.query(q)
  66. for t in flt_gr:
  67. local_s = self.localize_term(t[0])
  68. local_o = self.localize_term(t[2]) \
  69. if isinstance(t[2], URIRef) \
  70. else t[2]
  71. gr.remove(t)
  72. gr.add((local_s, t[1], local_o))
  73. return gr
  74. def globalize_string(self, s):
  75. '''Convert URNs into URIs in a string using the application base URI.
  76. @param string s Input string.
  77. @return string
  78. '''
  79. return s.replace(str(nsc['fcres']), g.webroot + '/')
  80. def globalize_term(self, urn):
  81. '''
  82. Convert an URN into an URI using the application base URI.
  83. @param rdflib.term.URIRef urn Input URN.
  84. @return rdflib.term.URIRef
  85. '''
  86. if urn == self.ROOT_NODE_URN:
  87. urn = nsc['fcres']
  88. return URIRef(self.globalize_string(str(urn)))
  89. def globalize_graph(self, gr):
  90. '''
  91. Globalize a graph.
  92. '''
  93. q = '''
  94. CONSTRUCT {{ ?s ?p ?o . }} WHERE {{
  95. {{
  96. ?s ?p ?o .
  97. FILTER (
  98. STRSTARTS(str(?s), "{0}")
  99. ||
  100. STRSTARTS(str(?o), "{0}")
  101. ||
  102. STRSTARTS(str(?s), "{1}")
  103. ||
  104. STRSTARTS(str(?o), "{1}")
  105. ) .
  106. }}
  107. }}'''.format(nsc['fcres'], self.ROOT_NODE_URN)
  108. flt_gr = gr.query(q)
  109. for t in flt_gr:
  110. global_s = self.globalize_term(t[0])
  111. global_o = self.globalize_term(t[2]) \
  112. if isinstance(t[2], URIRef) \
  113. else t[2]
  114. gr.remove(t)
  115. gr.add((global_s, t[1], global_o))
  116. return gr
  117. def globalize_rsrc(self, rsrc):
  118. '''
  119. Globalize a resource.
  120. '''
  121. gr = rsrc.graph
  122. urn = rsrc.identifier
  123. global_gr = self.globalize_graph(gr)
  124. global_uri = self.globalize_term(urn)
  125. return global_gr.resource(global_uri)
  126. def parse_rfc7240(self, h_str):
  127. '''
  128. Parse `Prefer` header as per https://tools.ietf.org/html/rfc7240
  129. The `cgi.parse_header` standard method does not work with all possible
  130. use cases for this header.
  131. @param h_str (string) The header(s) as a comma-separated list of Prefer
  132. statements, excluding the `Prefer: ` token.
  133. '''
  134. parsed_hdr = defaultdict(dict)
  135. # Split up headers by comma
  136. hdr_list = [ x.strip() for x in h_str.split(',') ]
  137. for hdr in hdr_list:
  138. parsed_pref = defaultdict(dict)
  139. # Split up tokens by semicolon
  140. token_list = [ token.strip() for token in hdr.split(';') ]
  141. prefer_token = token_list.pop(0).split('=')
  142. prefer_name = prefer_token[0]
  143. # If preference has a '=', it has a value, else none.
  144. if len(prefer_token)>1:
  145. parsed_pref['value'] = prefer_token[1].strip('"')
  146. for param_token in token_list:
  147. # If the token list had a ';' the preference has a parameter.
  148. print('Param token: {}'.format(param_token))
  149. param_parts = [ prm.strip().strip('"') \
  150. for prm in param_token.split('=') ]
  151. param_value = param_parts[1] if len(param_parts) > 1 else None
  152. parsed_pref['parameters'][param_parts[0]] = param_value
  153. parsed_hdr[prefer_name] = parsed_pref
  154. return parsed_hdr
  155. def rdf_cksum(self, gr):
  156. '''
  157. Generate a checksum for a graph.
  158. This is not straightforward because a graph is derived from an
  159. unordered data structure (RDF).
  160. What this method does is ordering the graph by subject, predicate,
  161. object, then creating a pickle string and a checksum of it.
  162. N.B. The context of the triples is ignored, so isomorphic graphs would
  163. have the same checksum regardless of the context(s) they are found in.
  164. @TODO This can be later reworked to use a custom hashing algorithm.
  165. @param rdflib.Graph gr The graph to be hashed.
  166. @return string SHA1 checksum.
  167. '''
  168. # Remove the messageDigest property, which very likely reflects the
  169. # previous state of the resource.
  170. gr.remove((Variable('s'), nsc['premis'].messageDigest, Variable('o')))
  171. ord_gr = sorted(list(gr), key=lambda x : (x[0], x[1], x[2]))
  172. hash = sha1(pickle.dumps(ord_gr)).hexdigest()
  173. return hash
  174. def split_uuid(self, uuid):
  175. '''
  176. Split a UUID into pairtree segments. This mimics FCREPO4 behavior.
  177. '''
  178. path = '{}/{}/{}/{}/{}'.format(uuid[:2], uuid[2:4],
  179. uuid[4:6], uuid[6:8], uuid)
  180. return path