toolbox.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. import logging
  2. import pickle
  3. from collections import defaultdict
  4. from hashlib import sha1
  5. from flask import request, g
  6. from rdflib.term import Literal, URIRef, Variable
  7. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  8. class Toolbox:
  9. '''
  10. Utility class to translate and generate strings and other objects.
  11. '''
  12. _logger = logging.getLogger(__name__)
  13. ROOT_NODE_URN = nsc['fcsystem'].root
  14. def __init__(self):
  15. '''
  16. Set the base URL for the requests. This class has to be instantiated
  17. within a request context.
  18. '''
  19. self.base_url = request.host_url + g.url_prefix
  20. def uuid_to_uri(self, uuid):
  21. '''Convert a UUID to a URI.
  22. @return URIRef
  23. '''
  24. uri = '{}/{}'.format(self.base_url, uuid) if uuid else self.base_url
  25. return URIRef(uri)
  26. def uri_to_uuid(self, uri):
  27. '''Convert an absolute URI (internal or external) to a UUID.
  28. @return string
  29. '''
  30. if uri == self.ROOT_NODE_URN:
  31. return None
  32. elif uri.startswith(nsc['fcres']):
  33. return str(uri).replace(nsc['fcres'], '')
  34. else:
  35. return str(uri).replace(self.base_url, '').strip('/')
  36. def localize_string(self, s):
  37. '''Convert URIs into URNs in a string using the application base URI.
  38. @param string s Input string.
  39. @return string
  40. '''
  41. if s.strip('/') == self.base_url:
  42. return str(self.ROOT_NODE_URN)
  43. else:
  44. return s.strip('/').replace(self.base_url+'/', str(nsc['fcres']))
  45. def localize_term(self, uri):
  46. '''
  47. Convert an URI into an URN.
  48. @param rdflib.term.URIRef urn Input URI.
  49. @return rdflib.term.URIRef
  50. '''
  51. return URIRef(self.localize_string(str(uri)))
  52. def localize_graph(self, g):
  53. '''
  54. Locbalize a graph.
  55. '''
  56. q = '''
  57. CONSTRUCT {{ ?s ?p ?o . }} WHERE {{
  58. {{
  59. ?s ?p ?o .
  60. FILTER (
  61. STRSTARTS(str(?s), "{0}")
  62. ||
  63. STRSTARTS(str(?o), "{0}")
  64. ||
  65. STRSTARTS(str(?s), "{0}/")
  66. ||
  67. STRSTARTS(str(?o), "{0}/")
  68. ) .
  69. }}
  70. }}'''.format(self.base_url)
  71. flt_g = g.query(q)
  72. for t in flt_g:
  73. local_s = self.localize_term(t[0])
  74. local_o = self.localize_term(t[2]) \
  75. if isinstance(t[2], URIRef) \
  76. else t[2]
  77. g.remove(t)
  78. g.add((local_s, t[1], local_o))
  79. return g
  80. def globalize_string(self, s):
  81. '''Convert URNs into URIs in a string using the application base URI.
  82. @param string s Input string.
  83. @return string
  84. '''
  85. return s.replace(str(nsc['fcres']), self.base_url + '/')
  86. def globalize_term(self, urn):
  87. '''
  88. Convert an URN into an URI using the application base URI.
  89. @param rdflib.term.URIRef urn Input URN.
  90. @return rdflib.term.URIRef
  91. '''
  92. if urn == self.ROOT_NODE_URN:
  93. urn = nsc['fcres']
  94. return URIRef(self.globalize_string(str(urn)))
  95. def globalize_graph(self, g):
  96. '''
  97. Globalize a graph.
  98. '''
  99. q = '''
  100. CONSTRUCT {{ ?s ?p ?o . }} WHERE {{
  101. {{
  102. ?s ?p ?o .
  103. FILTER (
  104. STRSTARTS(str(?s), "{0}")
  105. ||
  106. STRSTARTS(str(?o), "{0}")
  107. ||
  108. STRSTARTS(str(?s), "{1}")
  109. ||
  110. STRSTARTS(str(?o), "{1}")
  111. ) .
  112. }}
  113. }}'''.format(nsc['fcres'], self.ROOT_NODE_URN)
  114. flt_g = g.query(q)
  115. for t in flt_g:
  116. global_s = self.globalize_term(t[0])
  117. global_o = self.globalize_term(t[2]) \
  118. if isinstance(t[2], URIRef) \
  119. else t[2]
  120. g.remove(t)
  121. g.add((global_s, t[1], global_o))
  122. return g
  123. def globalize_rsrc(self, rsrc):
  124. '''
  125. Globalize a resource.
  126. '''
  127. g = rsrc.graph
  128. urn = rsrc.identifier
  129. global_g = self.globalize_graph(g)
  130. global_uri = self.globalize_term(urn)
  131. return global_g.resource(global_uri)
  132. def parse_rfc7240(self, h_str):
  133. '''
  134. Parse `Prefer` header as per https://tools.ietf.org/html/rfc7240
  135. The `cgi.parse_header` standard method does not work with all possible
  136. use cases for this header.
  137. @param h_str (string) The header(s) as a comma-separated list of Prefer
  138. statements, excluding the `Prefer: ` token.
  139. '''
  140. parsed_hdr = defaultdict(dict)
  141. # Split up headers by comma
  142. hdr_list = [ x.strip() for x in h_str.split(',') ]
  143. for hdr in hdr_list:
  144. parsed_pref = defaultdict(dict)
  145. # Split up tokens by semicolon
  146. token_list = [ token.strip() for token in hdr.split(';') ]
  147. prefer_token = token_list.pop(0).split('=')
  148. prefer_name = prefer_token[0]
  149. # If preference has a '=', it has a value, else none.
  150. if len(prefer_token)>1:
  151. parsed_pref['value'] = prefer_token[1].strip('"')
  152. for param_token in token_list:
  153. # If the token list had a ';' the preference has a parameter.
  154. print('Param token: {}'.format(param_token))
  155. param_parts = [ prm.strip().strip('"') \
  156. for prm in param_token.split('=') ]
  157. param_value = param_parts[1] if len(param_parts) > 1 else None
  158. parsed_pref['parameters'][param_parts[0]] = param_value
  159. parsed_hdr[prefer_name] = parsed_pref
  160. return parsed_hdr
  161. def rdf_cksum(self, g):
  162. '''
  163. Generate a checksum for a graph.
  164. This is not straightforward because a graph is derived from an
  165. unordered data structure (RDF).
  166. What this method does is ordering the graph by subject, predicate,
  167. object, then creating a pickle string and a checksum of it.
  168. N.B. The context of the triples is ignored, so isomorphic graphs would
  169. have the same checksum regardless of the context(s) they are found in.
  170. @TODO This can be later reworked to use a custom hashing algorithm.
  171. @param rdflib.Graph g The graph to be hashed.
  172. @return string SHA1 checksum.
  173. '''
  174. # Remove the messageDigest property, which very likely reflects the
  175. # previous state of the resource.
  176. g.remove((Variable('s'), nsc['premis'].messageDigest, Variable('o')))
  177. ord_g = sorted(list(g), key=lambda x : (x[0], x[1], x[2]))
  178. hash = sha1(pickle.dumps(ord_g)).hexdigest()
  179. return hash