|
@@ -1,11 +1,10 @@
|
|
import logging
|
|
import logging
|
|
-import pickle
|
|
+import os
|
|
import re
|
|
import re
|
|
|
|
|
|
from collections import defaultdict
|
|
from collections import defaultdict
|
|
from hashlib import sha1
|
|
from hashlib import sha1
|
|
|
|
|
|
-from flask import g
|
|
|
|
from rdflib import Graph
|
|
from rdflib import Graph
|
|
from rdflib.term import URIRef, Variable
|
|
from rdflib.term import URIRef, Variable
|
|
|
|
|
|
@@ -15,26 +14,124 @@ from lakesuperior.globals import ROOT_RSRC_URI
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
+__doc__ = ''' Utility to translate and generate strings and other objects. '''
|
|
|
|
|
|
-class Toolbox:
|
|
+
|
|
|
|
+def fsize_fmt(num, suffix='b'):
|
|
|
|
+ """
|
|
|
|
+ Format an integer into 1024-block file size format.
|
|
|
|
+
|
|
|
|
+ Adapted from Python 2 code on
|
|
|
|
+ https://stackoverflow.com/a/1094933/3758232
|
|
|
|
+
|
|
|
|
+ :param int num: Size value in bytes.
|
|
|
|
+ :param str suffix: Suffix label (defaults to ``b``).
|
|
|
|
+
|
|
|
|
+ :rtype: str
|
|
|
|
+ :return: Formatted size to largest fitting unit.
|
|
|
|
+ """
|
|
|
|
+ for unit in ['','K','M','G','T','P','E','Z']:
|
|
|
|
+ if abs(num) < 1024.0:
|
|
|
|
+ return f'{num:3.1f} {unit}{suffix}'
|
|
|
|
+ num /= 1024.0
|
|
|
|
+ return f'{num:.1f} Y{suffix}'
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def get_tree_size(path, follow_symlinks=True):
|
|
|
|
+ """
|
|
|
|
+ Return total size of files in given path and subdirs.
|
|
|
|
+
|
|
|
|
+ Ripped from https://www.python.org/dev/peps/pep-0471/
|
|
|
|
+ """
|
|
|
|
+ total = 0
|
|
|
|
+ for entry in os.scandir(path):
|
|
|
|
+ if entry.is_dir(follow_symlinks=follow_symlinks):
|
|
|
|
+ total += get_tree_size(entry.path)
|
|
|
|
+ else:
|
|
|
|
+ total += entry.stat(
|
|
|
|
+ follow_symlinks=follow_symlinks
|
|
|
|
+ ).st_size
|
|
|
|
+
|
|
|
|
+ return total
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def replace_term_domain(term, search, replace):
|
|
|
|
+ '''
|
|
|
|
+ Replace the domain of a term.
|
|
|
|
+
|
|
|
|
+ :param rdflib.URIRef term: The term (URI) to change.
|
|
|
|
+ :param str search: Domain string to replace.
|
|
|
|
+ :param str replace: Domain string to use for replacement.
|
|
|
|
+
|
|
|
|
+ :rtype: rdflib.URIRef
|
|
'''
|
|
'''
|
|
- Utility class to translate and generate strings and other objects.
|
|
+ s = str(term)
|
|
|
|
+ if s.startswith(search):
|
|
|
|
+ s = s.replace(search, replace)
|
|
|
|
+
|
|
|
|
+ return URIRef(s)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def parse_rfc7240(h_str):
|
|
'''
|
|
'''
|
|
- def replace_term_domain(self, term, search, replace):
|
|
+ Parse ``Prefer`` header as per https://tools.ietf.org/html/rfc7240
|
|
- '''
|
|
|
|
- Replace the domain of a term.
|
|
|
|
|
|
|
|
- :param rdflib.URIRef term: The term (URI) to change.
|
|
+ The ``cgi.parse_header`` standard method does not work with all
|
|
- :param str search: Domain string to replace.
|
|
+ possible use cases for this header.
|
|
- :param str replace: Domain string to use for replacement.
|
|
+
|
|
|
|
+ :param str h_str: The header(s) as a comma-separated list of Prefer
|
|
|
|
+ statements, excluding the ``Prefer:`` token.
|
|
|
|
+ '''
|
|
|
|
+ parsed_hdr = defaultdict(dict)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ hdr_list = [ x.strip() for x in h_str.split(',') ]
|
|
|
|
+ for hdr in hdr_list:
|
|
|
|
+ parsed_pref = defaultdict(dict)
|
|
|
|
+
|
|
|
|
+ token_list = [ token.strip() for token in hdr.split(';') ]
|
|
|
|
+ prefer_token = token_list.pop(0).split('=')
|
|
|
|
+ prefer_name = prefer_token[0]
|
|
|
|
+
|
|
|
|
+ if len(prefer_token)>1:
|
|
|
|
+ parsed_pref['value'] = prefer_token[1].strip('"')
|
|
|
|
+
|
|
|
|
+ for param_token in token_list:
|
|
|
|
+
|
|
|
|
+ param_parts = [ prm.strip().strip('"') \
|
|
|
|
+ for prm in param_token.split('=') ]
|
|
|
|
+ param_value = param_parts[1] if len(param_parts) > 1 else None
|
|
|
|
+ parsed_pref['parameters'][param_parts[0]] = param_value
|
|
|
|
+
|
|
|
|
+ parsed_hdr[prefer_name] = parsed_pref
|
|
|
|
+
|
|
|
|
+ return parsed_hdr
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def split_uuid(uuid):
|
|
|
|
+ '''
|
|
|
|
+ Split a UID into pairtree segments. This mimics FCREPO4 behavior.
|
|
|
|
+
|
|
|
|
+ :param str uuid: UUID to split.
|
|
|
|
+
|
|
|
|
+ :rtype: str
|
|
|
|
+ '''
|
|
|
|
+ path = '{}/{}/{}/{}/{}'.format(uuid[:2], uuid[2:4],
|
|
|
|
+ uuid[4:6], uuid[6:8], uuid)
|
|
|
|
+
|
|
|
|
+ return path
|
|
|
|
|
|
- :rtype: rdflib.URIRef
|
|
|
|
- '''
|
|
|
|
- s = str(term)
|
|
|
|
- if s.startswith(search):
|
|
|
|
- s = s.replace(search, replace)
|
|
|
|
|
|
|
|
- return URIRef(s)
|
|
+
|
|
|
|
+class RequestUtils:
|
|
|
|
+ """
|
|
|
|
+ Utilities that require access to an HTTP request context.
|
|
|
|
+
|
|
|
|
+ Initialize this within a Flask request context.
|
|
|
|
+ """
|
|
|
|
+ def __init__(self):
|
|
|
|
+ from flask import g
|
|
|
|
+ self.webroot = g.webroot
|
|
|
|
|
|
|
|
|
|
def uid_to_uri(self, uid):
|
|
def uid_to_uri(self, uid):
|
|
@@ -42,7 +139,7 @@ class Toolbox:
|
|
|
|
|
|
:rtype: rdflib.URIRef
|
|
:rtype: rdflib.URIRef
|
|
'''
|
|
'''
|
|
- return URIRef(g.webroot + uid)
|
|
+ return URIRef(self.webroot + uid)
|
|
|
|
|
|
|
|
|
|
def uri_to_uid(self, uri):
|
|
def uri_to_uid(self, uri):
|
|
@@ -53,7 +150,7 @@ class Toolbox:
|
|
if uri.startswith(nsc['fcres']):
|
|
if uri.startswith(nsc['fcres']):
|
|
return str(uri).replace(nsc['fcres'], '')
|
|
return str(uri).replace(nsc['fcres'], '')
|
|
else:
|
|
else:
|
|
- return '/' + str(uri).replace(g.webroot, '').strip('/')
|
|
+ return '/' + str(uri).replace(self.webroot, '').strip('/')
|
|
|
|
|
|
|
|
|
|
def localize_uri_string(self, s):
|
|
def localize_uri_string(self, s):
|
|
@@ -63,11 +160,11 @@ class Toolbox:
|
|
|
|
|
|
:rtype: str
|
|
:rtype: str
|
|
'''
|
|
'''
|
|
- if s.strip('/') == g.webroot:
|
|
+ if s.strip('/') == self.webroot:
|
|
return str(ROOT_RSRC_URI)
|
|
return str(ROOT_RSRC_URI)
|
|
else:
|
|
else:
|
|
return s.rstrip('/').replace(
|
|
return s.rstrip('/').replace(
|
|
- g.webroot, str(nsc['fcres']))
|
|
+ self.webroot, str(nsc['fcres']))
|
|
|
|
|
|
|
|
|
|
def localize_term(self, uri):
|
|
def localize_term(self, uri):
|
|
@@ -90,9 +187,9 @@ class Toolbox:
|
|
:rtype: tuple(rdflib.URIRef)
|
|
:rtype: tuple(rdflib.URIRef)
|
|
'''
|
|
'''
|
|
s, p, o = trp
|
|
s, p, o = trp
|
|
- if s.startswith(g.webroot):
|
|
+ if s.startswith(self.webroot):
|
|
s = self.localize_term(s)
|
|
s = self.localize_term(s)
|
|
- if o.startswith(g.webroot):
|
|
+ if o.startswith(self.webroot):
|
|
o = self.localize_term(o)
|
|
o = self.localize_term(o)
|
|
|
|
|
|
return s, p, o
|
|
return s, p, o
|
|
@@ -119,10 +216,10 @@ class Toolbox:
|
|
:rtype: bytes
|
|
:rtype: bytes
|
|
'''
|
|
'''
|
|
return data.replace(
|
|
return data.replace(
|
|
- (g.webroot + '/').encode('utf-8'),
|
|
+ (self.webroot + '/').encode('utf-8'),
|
|
(nsc['fcres'] + '/').encode('utf-8')
|
|
(nsc['fcres'] + '/').encode('utf-8')
|
|
).replace(
|
|
).replace(
|
|
- g.webroot.encode('utf-8'),
|
|
+ self.webroot.encode('utf-8'),
|
|
(nsc['fcres'] + '/').encode('utf-8')
|
|
(nsc['fcres'] + '/').encode('utf-8')
|
|
)
|
|
)
|
|
|
|
|
|
@@ -139,7 +236,7 @@ class Toolbox:
|
|
URIs are converted to absolute using the internal URI as the base;
|
|
URIs are converted to absolute using the internal URI as the base;
|
|
finally, the root node is appropriately addressed.
|
|
finally, the root node is appropriately addressed.
|
|
'''
|
|
'''
|
|
- esc_webroot = g.webroot.replace('/', '\\/')
|
|
+ esc_webroot = self.webroot.replace('/', '\\/')
|
|
|
|
|
|
loc_ptn1 = r'<{}\/?(.*?)>'.format(esc_webroot)
|
|
loc_ptn1 = r'<{}\/?(.*?)>'.format(esc_webroot)
|
|
loc_sub1 = '<{}/\\1>'.format(nsc['fcres'])
|
|
loc_sub1 = '<{}/\\1>'.format(nsc['fcres'])
|
|
@@ -163,7 +260,7 @@ class Toolbox:
|
|
|
|
|
|
:rtype: string
|
|
:rtype: string
|
|
'''
|
|
'''
|
|
- return s.replace(str(nsc['fcres']), g.webroot)
|
|
+ return s.replace(str(nsc['fcres']), self.webroot)
|
|
|
|
|
|
|
|
|
|
def globalize_term(self, urn):
|
|
def globalize_term(self, urn):
|
|
@@ -231,52 +328,3 @@ class Toolbox:
|
|
|
|
|
|
return global_gr.resource(global_uri)
|
|
return global_gr.resource(global_uri)
|
|
|
|
|
|
-
|
|
|
|
- def parse_rfc7240(self, h_str):
|
|
|
|
- '''
|
|
|
|
- Parse ``Prefer`` header as per https://tools.ietf.org/html/rfc7240
|
|
|
|
-
|
|
|
|
- The ``cgi.parse_header`` standard method does not work with all
|
|
|
|
- possible use cases for this header.
|
|
|
|
-
|
|
|
|
- :param str h_str: The header(s) as a comma-separated list of Prefer
|
|
|
|
- statements, excluding the ``Prefer:`` token.
|
|
|
|
- '''
|
|
|
|
- parsed_hdr = defaultdict(dict)
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- hdr_list = [ x.strip() for x in h_str.split(',') ]
|
|
|
|
- for hdr in hdr_list:
|
|
|
|
- parsed_pref = defaultdict(dict)
|
|
|
|
-
|
|
|
|
- token_list = [ token.strip() for token in hdr.split(';') ]
|
|
|
|
- prefer_token = token_list.pop(0).split('=')
|
|
|
|
- prefer_name = prefer_token[0]
|
|
|
|
-
|
|
|
|
- if len(prefer_token)>1:
|
|
|
|
- parsed_pref['value'] = prefer_token[1].strip('"')
|
|
|
|
-
|
|
|
|
- for param_token in token_list:
|
|
|
|
-
|
|
|
|
- param_parts = [ prm.strip().strip('"') \
|
|
|
|
- for prm in param_token.split('=') ]
|
|
|
|
- param_value = param_parts[1] if len(param_parts) > 1 else None
|
|
|
|
- parsed_pref['parameters'][param_parts[0]] = param_value
|
|
|
|
-
|
|
|
|
- parsed_hdr[prefer_name] = parsed_pref
|
|
|
|
-
|
|
|
|
- return parsed_hdr
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- def split_uuid(self, uuid):
|
|
|
|
- '''
|
|
|
|
- Split a UID into pairtree segments. This mimics FCREPO4 behavior.
|
|
|
|
-
|
|
|
|
- :param str uuid: UUID to split.
|
|
|
|
-
|
|
|
|
- :rtype: str
|
|
|
|
- '''
|
|
|
|
- path = '{}/{}/{}/{}/{}'.format(uuid[:2], uuid[2:4],
|
|
|
|
- uuid[4:6], uuid[6:8], uuid)
|
|
|
|
-
|
|
|
|
- return path
|
|
|