소스 검색

Utils shakeup:

* Move toolbox under utils
* Separate request context-dependent method in a class
* Add file utils
* Fix stats page
Stefano Cossu 5 년 전
부모
커밋
c0fb232549

+ 8 - 2
lakesuperior/api/admin.py

@@ -27,8 +27,14 @@ def stats():
     """
     import lakesuperior.env_setup
     with env.app_globals.rdf_store.txn_ctx():
-        repo_stats = {'rsrc_stats': env.app_globals.rdfly.count_rsrc()}
-        repo_stats['store_stats'] = env.app_globals.rdf_store.stats()
+        repo_stats = {
+            'rsrc_stats': env.app_globals.rdfly.count_rsrc(),
+            'store_stats': env.app_globals.rdf_store.stats(),
+            'nonrdf_stats': {
+                'ct': env.app_globals.nonrdfly.file_ct,
+                'size': env.app_globals.nonrdfly.store_size,
+            },
+        }
 
     return repo_stats
 

+ 1 - 19
lakesuperior/endpoints/admin.py

@@ -5,6 +5,7 @@ from flask import Blueprint, jsonify, render_template
 from lakesuperior.api import admin as admin_api
 from lakesuperior.exceptions import (
     ChecksumValidationError, ResourceNotExistsError, TombstoneError)
+from lakesuperior.util.toolbox import fsize_fmt
 
 
 # Admin interface and REST API.
@@ -18,25 +19,6 @@ def stats():
     """
     Get repository statistics.
     """
-    def fsize_fmt(num, suffix='b'):
-        """
-        Format an integer into 1024-block file size format.
-
-        Adapted from Python 2 code on
-        https://stackoverflow.com/a/1094933/3758232
-
-        :param int num: Size value in bytes.
-        :param str suffix: Suffix label (defaults to ``b``).
-
-        :rtype: str
-        :return: Formatted size to largest fitting unit.
-        """
-        for unit in ['','K','M','G','T','P','E','Z']:
-            if abs(num) < 1024.0:
-                return f'{num:3.1f} {unit}{suffix}'
-            num /= 1024.0
-        return f'{num:.1f} Y{suffix}'
-
     repo_stats = admin_api.stats()
 
     return render_template(

+ 7 - 6
lakesuperior/endpoints/ldp.py

@@ -29,7 +29,8 @@ from lakesuperior.model.ldp.ldp_factory import LdpFactory
 from lakesuperior.model.ldp.ldp_nr import LdpNr
 from lakesuperior.model.ldp.ldp_rs import LdpRs
 from lakesuperior.model.ldp.ldpr import Ldpr
-from lakesuperior.toolbox import Toolbox
+from lakesuperior.util import toolbox
+from lakesuperior.util.toolbox import RequestUtils
 
 
 DEFAULT_RDF_MIMETYPE = 'text/turtle'
@@ -112,7 +113,7 @@ def log_request_start():
 
 @ldp.before_request
 def instantiate_req_vars():
-    g.tbox = Toolbox()
+    g.tbox = RequestUtils()
 
 
 @ldp.after_request
@@ -161,7 +162,7 @@ def get_resource(uid, out_fmt=None):
     # Then, business as usual.
     # Evaluate which representation is requested.
     if 'prefer' in request.headers:
-        prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
+        prefer = toolbox.parse_rfc7240(request.headers['prefer'])
         logger.debug('Parsed Prefer header: {}'.format(pformat(prefer)))
         if 'return' in prefer:
             repr_options = parse_repr_options(prefer['return'])
@@ -445,7 +446,7 @@ def delete_resource(uid):
     headers = std_headers.copy()
 
     if 'prefer' in request.headers:
-        prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
+        prefer = toolbox.parse_rfc7240(request.headers['prefer'])
         leave_tstone = 'no-tombstone' not in prefer
     else:
         leave_tstone = True
@@ -608,13 +609,13 @@ def set_post_put_params():
     """
     handling = 'strict'
     if 'prefer' in request.headers:
-        prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
+        prefer = toolbox.parse_rfc7240(request.headers['prefer'])
         logger.debug('Parsed Prefer header: {}'.format(prefer))
         if 'handling' in prefer:
             handling = prefer['handling']['value']
 
     try:
-        disposition = g.tbox.parse_rfc7240(
+        disposition = toolbox.parse_rfc7240(
                 request.headers['content-disposition'])
     except KeyError:
         disposition = None

+ 0 - 1
lakesuperior/endpoints/query.py

@@ -10,7 +10,6 @@ from lakesuperior import env
 from lakesuperior.api import query as query_api
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
-from lakesuperior.toolbox import Toolbox
 
 # Query endpoint. raw SPARQL queries exposing the underlying layout can be made
 # available. Also convenience methods that allow simple lookups based on simple

+ 9 - 30
lakesuperior/endpoints/templates/stats.html

@@ -4,38 +4,17 @@
 {% block title %}System Statistics{% endblock %}
 {% block content %}
     <h2>Repository</h2>
-    <p>Current resources: <strong>{{ '{:,}'.format(rsrc_stats['main']) }}</strong></p>
+    <p>Current resource count: <strong>{{ '{:,}'.format(rsrc_stats['main']) }}</strong></p>
     <p>Historic snapshots: <strong>{{ '{:,}'.format(rsrc_stats['hist']) }}</strong></p>
-    <p>Triples: <strong>{{ '{:,}'.format(store_stats['num_triples']) }}</strong></p>
-    <h2>LMDB Store</h2>
-    <p>Overall size on disk: <strong>{{ fsize_fmt(
-        store_stats['idx_db_size'] + store_stats['data_db_size']
-    )}}</strong></p>
-    <h3>Data</h3>
-    <p>Size on disk: <strong>{{ fsize_fmt(store_stats['data_db_size']) }}</strong></p>
+    <p>Total triples: <strong>{{ '{:,}'.format(store_stats['num_triples']) }}</strong></p>
+    <h2>Binary Store</h2>
+    <p>Current file count: <strong>{{ '{:,}'.format(nonrdf_stats['ct']) }}</strong></p>
+    <p>Current size on disk: <strong>{{ fsize_fmt(nonrdf_stats['size']) }}</strong></p>
+    <h2>RDF Store</h2>
+    <p>Overall size on disk: <strong>{{ fsize_fmt(store_stats['store_size'])}}</strong></p>
+    <h3>LMDB Store Details</h3>
     <p>Refer to the <a href="http://lmdb.readthedocs.io/en/release/#lmdb.Environment.stat">LMDB API documentation</a> for details about the parameters below.</p>
-    {% for db_label, db in store_stats['data_db_stats'].items() %}
-    <h4>{{ db_label }}</h4>
-    <table class="table table-striped">
-        <thead>
-            <tr>
-                <td>Property</td>
-                <td>Value</td>
-            </tr>
-        </thead>
-        <tbody>
-        {% for p, v in db.items() | sort %}
-            <tr>
-                <td>{{ p }}</td>
-                <td>{{ v }}</td>
-            </tr>
-        {% endfor %}
-        </tbody>
-    </table>
-    {% endfor %}
-    <h3>Indices</h3>
-    <p>Size on disk: <strong>{{ fsize_fmt(store_stats['idx_db_size']) }}</strong></p>
-    {% for db_label, db in store_stats['idx_db_stats'].items() %}
+    {% for db_label, db in store_stats['db_stats'].items() %}
     <h4>{{ db_label }}</h4>
     <table class="table table-striped">
         <thead>

+ 2 - 3
lakesuperior/model/ldp/ldpr.py

@@ -29,7 +29,7 @@ from lakesuperior.exceptions import (
     ServerManagedTermError, TombstoneError)
 from lakesuperior.model.rdf.graph import Graph
 from lakesuperior.store.ldp_rs.rsrc_centric_layout import VERS_CONT_LABEL
-from lakesuperior.toolbox import Toolbox
+from lakesuperior.util.toolbox import replace_term_domain
 
 
 rdfly = env.app_globals.rdfly
@@ -165,7 +165,6 @@ class Ldpr(metaclass=ABCMeta):
         self.uri = nsc['fcres'][uid]
         # @FIXME Not ideal, should separate app-context dependent functions in
         # a different toolbox.
-        self.tbox = Toolbox()
 
         self.provided_imr = provided_imr
 
@@ -552,7 +551,7 @@ class Ldpr(metaclass=ABCMeta):
                 pass
             else:
                 ver_add_gr.add((
-                    self.tbox.replace_term_domain(t[0], self.uri, ver_uri),
+                    replace_term_domain(t[0], self.uri, ver_uri),
                     t[1], t[2]))
 
         rdfly.modify_rsrc(ver_uid, add_trp=ver_add_gr)

+ 13 - 0
lakesuperior/store/ldp_nr/base_non_rdf_layout.py

@@ -1,7 +1,9 @@
 import logging
+import os
 
 from abc import ABCMeta, abstractmethod
 
+from lakesuperior.util.toolbox import get_tree_size
 
 logger = logging.getLogger(__name__)
 
@@ -23,6 +25,17 @@ class BaseNonRdfLayout(metaclass=ABCMeta):
         self.root = config['location']
 
 
+    @property
+    def store_size(self):
+        """Calculated the store size on disk."""
+        return get_tree_size(self.root)
+
+
+    @property
+    def file_ct(self):
+        """Calculated the store size on disk."""
+        return sum([len(files) for r, d, files in os.walk(self.root)])
+
     ## INTERFACE METHODS ##
 
     @abstractmethod

+ 2 - 0
lakesuperior/store/ldp_rs/lmdb_triplestore.pyx

@@ -8,6 +8,7 @@ from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as RDFLIB_DEFAULT_GRAPH_URI
 
 from lakesuperior.store.base_lmdb_store import (
         KeyExistsError, KeyNotFoundError, LmdbError)
+from lakesuperior.util.toolbox import get_tree_size
 
 from libc.stdlib cimport malloc, free
 
@@ -150,6 +151,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
         Gather statistics about the database."""
         st = self._stats()
         st['num_triples'] = st['db_stats']['spo:c']['ms_entries']
+        st['store_size'] = get_tree_size(self.env_path)
 
         return st
 

+ 1 - 1
lakesuperior/store/ldp_rs/rsrc_centric_layout.py

@@ -25,6 +25,7 @@ from lakesuperior.globals import ROOT_RSRC_URI
 from lakesuperior.exceptions import (InvalidResourceError,
         ResourceNotExistsError, TombstoneError, PathSegmentError)
 from lakesuperior.model.rdf.graph import Graph
+from lakesuperior.util.toolbox import get_tree_size
 
 
 META_GR_URI = nsc['fcsystem']['meta']
@@ -197,7 +198,6 @@ class RsrcCentricLayout:
         return self._attr_routes
 
 
-
     def bootstrap(self):
         """
         Delete all graphs and insert the basic triples.

+ 9 - 0
lakesuperior/util/benchmark.py

@@ -31,6 +31,7 @@ logging.disable(logging.WARN)
 
 
 @click.command()
+
 @click.option(
     '--mode', '-m', default=def_mode,
     help=(
@@ -40,6 +41,7 @@ logging.disable(logging.WARN)
         f'Default: {def_endpoint}'
     )
 )
+
 @click.option(
     '--endpoint', '-e', default=def_endpoint,
     help=(
@@ -47,18 +49,22 @@ logging.disable(logging.WARN)
         f'Default: {def_endpoint}'
     )
 )
+
 @click.option(
     '--count', '-c', default=def_ct,
     help='Number of resources to ingest. Default: {def_ct}')
+
 @click.option(
     '--parent', '-p', default=def_parent,
     help='Path to the container resource under which the new resources will be '
         'created. It must begin with a slash (`/`) character. '
         f'Default: {def_parent}')
+
 @click.option(
     '--delete-container', '-d', is_flag=True,
     help='Delete container resource and its children if already existing. By '
     'default, the container is not deleted and new resources are added to it.')
+
 @click.option(
     '--method', '-X', default='put',
     help=(
@@ -66,14 +72,17 @@ logging.disable(logging.WARN)
         'Default: PUT'
     )
 )
+
 @click.option(
     '--graph-size', '-s', default=def_gr_size,
     help=f'Number of triples in each graph. Default: {def_gr_size}')
+
 @click.option(
     '--resource-type', '-t', default='r',
     help='Type of resources to ingest. One of `r` (only LDP-RS, i.e. RDF), '
     '`n` (only  LDP-NR, i.e. binaries), or `b` (50/50% of both). '
     'Default: r')
+
 @click.option(
     '--plot', '-P', is_flag=True, help='Plot a graph of ingest timings. '
     'The graph figure is displayed on screen with basic manipulation and save '

+ 123 - 75
lakesuperior/toolbox.py → lakesuperior/util/toolbox.py

@@ -1,11 +1,10 @@
 import logging
-import pickle
+import os
 import re
 
 from collections import defaultdict
 from hashlib import sha1
 
-from flask import g
 from rdflib import Graph
 from rdflib.term import URIRef, Variable
 
@@ -15,26 +14,124 @@ from lakesuperior.globals import ROOT_RSRC_URI
 
 logger = logging.getLogger(__name__)
 
+__doc__ = ''' Utility to translate and generate strings and other objects. '''
 
-class Toolbox:
+
+def fsize_fmt(num, suffix='b'):
+    """
+    Format an integer into 1024-block file size format.
+
+    Adapted from Python 2 code on
+    https://stackoverflow.com/a/1094933/3758232
+
+    :param int num: Size value in bytes.
+    :param str suffix: Suffix label (defaults to ``b``).
+
+    :rtype: str
+    :return: Formatted size to largest fitting unit.
+    """
+    for unit in ['','K','M','G','T','P','E','Z']:
+        if abs(num) < 1024.0:
+            return f'{num:3.1f} {unit}{suffix}'
+        num /= 1024.0
+    return f'{num:.1f} Y{suffix}'
+
+
+def get_tree_size(path, follow_symlinks=True):
+    """
+    Return total size of files in given path and subdirs.
+
+    Ripped from https://www.python.org/dev/peps/pep-0471/
+    """
+    total = 0
+    for entry in os.scandir(path):
+        if entry.is_dir(follow_symlinks=follow_symlinks):
+            total += get_tree_size(entry.path)
+        else:
+            total += entry.stat(
+                follow_symlinks=follow_symlinks
+            ).st_size
+
+    return total
+
+
+def replace_term_domain(term, search, replace):
+    '''
+    Replace the domain of a term.
+
+    :param rdflib.URIRef term: The term (URI) to change.
+    :param str search: Domain string to replace.
+    :param str replace: Domain string to use for replacement.
+
+    :rtype: rdflib.URIRef
     '''
-    Utility class to translate and generate strings and other objects.
+    s = str(term)
+    if s.startswith(search):
+        s = s.replace(search, replace)
+
+    return URIRef(s)
+
+
+def parse_rfc7240(h_str):
     '''
-    def replace_term_domain(self, term, search, replace):
-        '''
-        Replace the domain of a term.
+    Parse ``Prefer`` header as per https://tools.ietf.org/html/rfc7240
 
-        :param rdflib.URIRef term: The term (URI) to change.
-        :param str search: Domain string to replace.
-        :param str replace: Domain string to use for replacement.
+    The ``cgi.parse_header`` standard method does not work with all
+    possible use cases for this header.
+
+    :param str h_str: The header(s) as a comma-separated list of Prefer
+        statements, excluding the ``Prefer:`` token.
+    '''
+    parsed_hdr = defaultdict(dict)
+
+    # Split up headers by comma
+    hdr_list = [ x.strip() for x in h_str.split(',') ]
+    for hdr in hdr_list:
+        parsed_pref = defaultdict(dict)
+        # Split up tokens by semicolon
+        token_list = [ token.strip() for token in hdr.split(';') ]
+        prefer_token = token_list.pop(0).split('=')
+        prefer_name = prefer_token[0]
+        # If preference has a '=', it has a value, else none.
+        if len(prefer_token)>1:
+            parsed_pref['value'] = prefer_token[1].strip('"')
+
+        for param_token in token_list:
+            # If the token list had a ';' the preference has a parameter.
+            param_parts = [ prm.strip().strip('"') \
+                    for prm in param_token.split('=') ]
+            param_value = param_parts[1] if len(param_parts) > 1 else None
+            parsed_pref['parameters'][param_parts[0]] = param_value
+
+        parsed_hdr[prefer_name] = parsed_pref
+
+    return parsed_hdr
+
+
+def split_uuid(uuid):
+    '''
+    Split a UID into pairtree segments. This mimics FCREPO4 behavior.
+
+    :param str uuid: UUID to split.
+
+    :rtype: str
+    '''
+    path = '{}/{}/{}/{}/{}'.format(uuid[:2], uuid[2:4],
+            uuid[4:6], uuid[6:8], uuid)
+
+    return path
 
-        :rtype: rdflib.URIRef
-        '''
-        s = str(term)
-        if s.startswith(search):
-            s = s.replace(search, replace)
 
-        return URIRef(s)
+
+class RequestUtils:
+    """
+    Utilities that require access to an HTTP request context.
+
+    Initialize this within a Flask request context.
+    """
+    def __init__(self):
+        from flask import g
+        self.webroot = g.webroot
 
 
     def uid_to_uri(self, uid):
@@ -42,7 +139,7 @@ class Toolbox:
 
         :rtype: rdflib.URIRef
         '''
-        return URIRef(g.webroot + uid)
+        return URIRef(self.webroot + uid)
 
 
     def uri_to_uid(self, uri):
@@ -53,7 +150,7 @@ class Toolbox:
         if uri.startswith(nsc['fcres']):
             return str(uri).replace(nsc['fcres'], '')
         else:
-            return '/' + str(uri).replace(g.webroot, '').strip('/')
+            return '/' + str(uri).replace(self.webroot, '').strip('/')
 
 
     def localize_uri_string(self, s):
@@ -63,11 +160,11 @@ class Toolbox:
 
         :rtype: str
         '''
-        if s.strip('/') == g.webroot:
+        if s.strip('/') == self.webroot:
             return str(ROOT_RSRC_URI)
         else:
             return s.rstrip('/').replace(
-                    g.webroot, str(nsc['fcres']))
+                    self.webroot, str(nsc['fcres']))
 
 
     def localize_term(self, uri):
@@ -90,9 +187,9 @@ class Toolbox:
         :rtype: tuple(rdflib.URIRef)
         '''
         s, p, o = trp
-        if s.startswith(g.webroot):
+        if s.startswith(self.webroot):
             s = self.localize_term(s)
-        if o.startswith(g.webroot):
+        if o.startswith(self.webroot):
             o = self.localize_term(o)
 
         return s, p, o
@@ -119,10 +216,10 @@ class Toolbox:
         :rtype: bytes
         '''
         return data.replace(
-            (g.webroot + '/').encode('utf-8'),
+            (self.webroot + '/').encode('utf-8'),
             (nsc['fcres'] + '/').encode('utf-8')
         ).replace(
-            g.webroot.encode('utf-8'),
+            self.webroot.encode('utf-8'),
             (nsc['fcres'] + '/').encode('utf-8')
         )
 
@@ -139,7 +236,7 @@ class Toolbox:
         URIs are converted to absolute using the internal URI as the base;
         finally, the root node is appropriately addressed.
         '''
-        esc_webroot = g.webroot.replace('/', '\\/')
+        esc_webroot = self.webroot.replace('/', '\\/')
         #loc_ptn = r'<({}\/?)?(.*?)?(\?.*?)?(#.*?)?>'.format(esc_webroot)
         loc_ptn1 = r'<{}\/?(.*?)>'.format(esc_webroot)
         loc_sub1 = '<{}/\\1>'.format(nsc['fcres'])
@@ -163,7 +260,7 @@ class Toolbox:
 
         :rtype: string
         '''
-        return s.replace(str(nsc['fcres']), g.webroot)
+        return s.replace(str(nsc['fcres']), self.webroot)
 
 
     def globalize_term(self, urn):
@@ -231,52 +328,3 @@ class Toolbox:
 
         return global_gr.resource(global_uri)
 
-
-    def parse_rfc7240(self, h_str):
-        '''
-        Parse ``Prefer`` header as per https://tools.ietf.org/html/rfc7240
-
-        The ``cgi.parse_header`` standard method does not work with all
-        possible use cases for this header.
-
-        :param str h_str: The header(s) as a comma-separated list of Prefer
-            statements, excluding the ``Prefer:`` token.
-        '''
-        parsed_hdr = defaultdict(dict)
-
-        # Split up headers by comma
-        hdr_list = [ x.strip() for x in h_str.split(',') ]
-        for hdr in hdr_list:
-            parsed_pref = defaultdict(dict)
-            # Split up tokens by semicolon
-            token_list = [ token.strip() for token in hdr.split(';') ]
-            prefer_token = token_list.pop(0).split('=')
-            prefer_name = prefer_token[0]
-            # If preference has a '=', it has a value, else none.
-            if len(prefer_token)>1:
-                parsed_pref['value'] = prefer_token[1].strip('"')
-
-            for param_token in token_list:
-                # If the token list had a ';' the preference has a parameter.
-                param_parts = [ prm.strip().strip('"') \
-                        for prm in param_token.split('=') ]
-                param_value = param_parts[1] if len(param_parts) > 1 else None
-                parsed_pref['parameters'][param_parts[0]] = param_value
-
-            parsed_hdr[prefer_name] = parsed_pref
-
-        return parsed_hdr
-
-
-    def split_uuid(self, uuid):
-        '''
-        Split a UID into pairtree segments. This mimics FCREPO4 behavior.
-
-        :param str uuid: UUID to split.
-
-        :rtype: str
-        '''
-        path = '{}/{}/{}/{}/{}'.format(uuid[:2], uuid[2:4],
-                uuid[4:6], uuid[6:8], uuid)
-
-        return path