Browse Source

Add report output for check_refint.

Stefano Cossu 6 years ago
parent
commit
84fa563ec5

+ 25 - 20
conftest.py

@@ -4,43 +4,48 @@ from os import makedirs, path
 from shutil import rmtree
 from tempfile import gettempdir
 
-from lakesuperior import env_setup, env
-from lakesuperior.app import create_app
+from lakesuperior import env
+from lakesuperior.config_parser import parse_config
+from lakesuperior.globals import AppGlobals
 from lakesuperior.util.generators import random_image
 
+
+# Override data directory locations.
+config = parse_config()
+data_dir = path.join(gettempdir(), 'lsup_test', 'data')
+config['application']['data_dir'] = data_dir
+config['application']['store']['ldp_nr']['location'] = (
+        path.join(data_dir, 'ldpnr_store'))
+config['application']['store']['ldp_rs']['location'] = (
+        path.join(data_dir, 'ldprs_store'))
+
+env.app_globals = AppGlobals(config)
+from lakesuperior.app import create_app
+
+
 @pytest.fixture(scope='module')
 def app():
-    # Override data directory locations.
-    data_dir = path.join(gettempdir(), 'lsup_test', 'data')
-    makedirs(data_dir, exist_ok=True)
-    env.app_globals.config['application']['data_dir'] = data_dir
-    env.app_globals.config['application']['store']['ldp_nr']['location'] = (
-            path.join(data_dir, 'ldpnr_store'))
-    env.app_globals.config['application']['store']['ldp_rs']['location'] = (
-            path.join(data_dir, 'ldprs_store'))
     app = create_app(env.app_globals.config['application'])
 
     yield app
 
-    # TODO improve this by using tempfile.TemporaryDirectory as a context
-    # manager.
-    print('Removing fixture data directory.')
-    rmtree(data_dir)
-
 
 @pytest.fixture(scope='module')
 def db(app):
     '''
     Set up and tear down test triplestore.
     '''
-    rdfly = env.app_globals.rdfly
-    rdfly.bootstrap()
+    makedirs(data_dir, exist_ok=True)
+    env.app_globals.rdfly.bootstrap()
     env.app_globals.nonrdfly.bootstrap()
+    print('Initialized data store.')
 
-    yield rdfly
+    yield env.app_globals.rdfly
 
-    print('Tearing down fixture graph store.')
-    rdfly.store.destroy(rdfly.store.path)
+    # TODO improve this by using tempfile.TemporaryDirectory as a context
+    # manager.
+    print('Removing fixture data directory.')
+    rmtree(data_dir)
 
 
 @pytest.fixture

+ 2 - 7
lakesuperior/api/admin.py

@@ -2,7 +2,6 @@ import logging
 
 from lakesuperior import env
 from lakesuperior.config_parser import parse_config
-from lakesuperior.globals import AppGlobals
 from lakesuperior.migrator import Migrator
 from lakesuperior.store.ldp_nr.default_layout import DefaultLayout as FileLayout
 from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
@@ -47,16 +46,12 @@ def migrate(src, dest, start_pts=None, list_file=None, **kwargs):
     return Migrator(src, dest, **kwargs).migrate(start_pts, list_file)
 
 
-def integrity_check(config_dir=None):
+def integrity_check():
     """
     Check integrity of the data set.
 
     At the moment this is limited to referential integrity. Other checks can
     be added and triggered by different argument flags.
     """
-    if config_dir:
-        env.app_globals = AppGlobals(parse_config(config_dir))
-    else:
-        import lakesuperior.env_setup
     with TxnManager(env.app_globals.rdfly.store):
-        return { t for t in env.app_globals.rdfly.find_refint_violations()}
+        return set(env.app_globals.rdfly.find_refint_violations())

+ 1 - 1
lakesuperior/env_setup.py

@@ -7,7 +7,7 @@ Default configuration.
 
 Import this module to initialize the configuration for a production setup::
 
-    >>> from lakesuperior import env_setup
+    >>> import lakesuperior.env_setup
 
 Will load the default configuration.
 """

+ 36 - 14
lakesuperior/lsup_admin.py

@@ -1,13 +1,18 @@
 import click
 import click_log
+import csv
 import json
 import logging
-import os
 import sys
 
+from os import getcwd, path
+
+import arrow
+
 from lakesuperior import env
 from lakesuperior.api import admin as admin_api
 from lakesuperior.config_parser import config
+from lakesuperior.globals import AppGlobals
 from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
 
 __doc__="""
@@ -23,12 +28,6 @@ for a list of tools and options.
 logger = logging.getLogger(__name__)
 click_log.basic_config(logger)
 
-#report = logging.getLogger('report')
-#report_formatter = logging.Formatter('"%(asctime)s",%(message)s')
-#report_fpath = '{}/lsup-report-{}'.format(
-#        env.config['application']['data_dir'],
-#        arrow.utcnow().format('YYYY-MM-DDTHH:mm:ss.S'))
-#report_handler = logging.FileHandler(report_fpath)
 
 @click.group()
 def admin():
@@ -69,7 +68,8 @@ def bootstrap():
     click.echo('Initializing binary store at {}'.format(nonrdfly.root))
     nonrdfly.bootstrap()
     click.echo('Binary store initialized.')
-    click.echo('Repository successfully set up. Go to town.')
+    click.echo('\nRepository successfully set up. Go to town.')
+    click.echo('If the HTTP server is running, it must be restarted.')
 
 
 @click.command()
@@ -117,19 +117,41 @@ def check_refint(config_folder=None, output=None):
     resources. For repositories set up with the `referential_integrity` option
     (the default), this is a pre-condition for a consistent data set.
 
-    Note: this check is run regardless of whether the repository enforces
+    If inconsistencies are found, a report is generated in CSV format with the
+    following columns: `s`, `p`, `o` (respectively the terms of the
+    triple containing the dangling relationship) and `missing` which
+    indicates which term is the missing URI (currently always set to `o`).
+
+    Note: this check can be run regardless of whether the repository enforces
     referential integrity.
     """
-    check_results = admin_api.integrity_check(config_folder)
+    if config_folder:
+        env.app_globals = AppGlobals(parse_config(config_dir))
+    else:
+        import lakesuperior.env_setup
+
+    check_results = admin_api.integrity_check()
+
     click.echo('Integrity check results:')
     if len(check_results):
         click.echo(click.style('Inconsistencies found!', fg='red', bold=True))
-        click.echo('Missing object in the following triples:')
-        for trp in check_results:
-            click.echo(' '.join([str(t) for t in trp[0]]))
+        if not output:
+            output = path.join(getcwd(), 'refint_report-{}.csv'.format(
+                arrow.utcnow().format('YYYY-MM-DDTHH:mm:ss.S')))
+        elif not output.endswith('.csv'):
+            output += '.csv'
+
+        with open(output, 'w', newline='') as fh:
+            writer = csv.writer(fh)
+            writer.writerow(('s', 'p', 'o', 'missing'))
+            for trp in check_results:
+                # ``o`` is always hardcoded for now.
+                writer.writerow([t.n3() for t in trp[0]] + ['o'])
+
+        click.echo('Report generated at {}'.format(output))
     else:
         click.echo(click.style('Clean. ', fg='green', bold=True)
-                + 'No inconsistency found.')
+                + 'No inconsistency found. No report generated.')
 
 
 @click.command()

+ 1 - 0
lakesuperior/store/ldp_rs/rsrc_centric_layout.py

@@ -557,6 +557,7 @@ class RsrcCentricLayout:
         :rtype: set
         :return: Triples referencing a repository URI that is not a resource.
         """
+        #import pdb; pdb.set_trace()
         for i, obj in enumerate(self.store.all_terms('o'), start=1):
             if (
                     isinstance(obj, URIRef)