Browse Source

Add report output for check_refint.

Stefano Cossu 7 years ago
parent
commit
84fa563ec5

+ 25 - 20
conftest.py

@@ -4,43 +4,48 @@ from os import makedirs, path
 from shutil import rmtree
 from shutil import rmtree
 from tempfile import gettempdir
 from tempfile import gettempdir
 
 
-from lakesuperior import env_setup, env
-from lakesuperior.app import create_app
+from lakesuperior import env
+from lakesuperior.config_parser import parse_config
+from lakesuperior.globals import AppGlobals
 from lakesuperior.util.generators import random_image
 from lakesuperior.util.generators import random_image
 
 
+
+# Override data directory locations.
+config = parse_config()
+data_dir = path.join(gettempdir(), 'lsup_test', 'data')
+config['application']['data_dir'] = data_dir
+config['application']['store']['ldp_nr']['location'] = (
+        path.join(data_dir, 'ldpnr_store'))
+config['application']['store']['ldp_rs']['location'] = (
+        path.join(data_dir, 'ldprs_store'))
+
+env.app_globals = AppGlobals(config)
+from lakesuperior.app import create_app
+
+
 @pytest.fixture(scope='module')
 @pytest.fixture(scope='module')
 def app():
 def app():
-    # Override data directory locations.
-    data_dir = path.join(gettempdir(), 'lsup_test', 'data')
-    makedirs(data_dir, exist_ok=True)
-    env.app_globals.config['application']['data_dir'] = data_dir
-    env.app_globals.config['application']['store']['ldp_nr']['location'] = (
-            path.join(data_dir, 'ldpnr_store'))
-    env.app_globals.config['application']['store']['ldp_rs']['location'] = (
-            path.join(data_dir, 'ldprs_store'))
     app = create_app(env.app_globals.config['application'])
     app = create_app(env.app_globals.config['application'])
 
 
     yield app
     yield app
 
 
-    # TODO improve this by using tempfile.TemporaryDirectory as a context
-    # manager.
-    print('Removing fixture data directory.')
-    rmtree(data_dir)
-
 
 
 @pytest.fixture(scope='module')
 @pytest.fixture(scope='module')
 def db(app):
 def db(app):
     '''
     '''
     Set up and tear down test triplestore.
     Set up and tear down test triplestore.
     '''
     '''
-    rdfly = env.app_globals.rdfly
-    rdfly.bootstrap()
+    makedirs(data_dir, exist_ok=True)
+    env.app_globals.rdfly.bootstrap()
     env.app_globals.nonrdfly.bootstrap()
     env.app_globals.nonrdfly.bootstrap()
+    print('Initialized data store.')
 
 
-    yield rdfly
+    yield env.app_globals.rdfly
 
 
-    print('Tearing down fixture graph store.')
-    rdfly.store.destroy(rdfly.store.path)
+    # TODO improve this by using tempfile.TemporaryDirectory as a context
+    # manager.
+    print('Removing fixture data directory.')
+    rmtree(data_dir)
 
 
 
 
 @pytest.fixture
 @pytest.fixture

+ 2 - 7
lakesuperior/api/admin.py

@@ -2,7 +2,6 @@ import logging
 
 
 from lakesuperior import env
 from lakesuperior import env
 from lakesuperior.config_parser import parse_config
 from lakesuperior.config_parser import parse_config
-from lakesuperior.globals import AppGlobals
 from lakesuperior.migrator import Migrator
 from lakesuperior.migrator import Migrator
 from lakesuperior.store.ldp_nr.default_layout import DefaultLayout as FileLayout
 from lakesuperior.store.ldp_nr.default_layout import DefaultLayout as FileLayout
 from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
 from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
@@ -47,16 +46,12 @@ def migrate(src, dest, start_pts=None, list_file=None, **kwargs):
     return Migrator(src, dest, **kwargs).migrate(start_pts, list_file)
     return Migrator(src, dest, **kwargs).migrate(start_pts, list_file)
 
 
 
 
-def integrity_check(config_dir=None):
+def integrity_check():
     """
     """
     Check integrity of the data set.
     Check integrity of the data set.
 
 
     At the moment this is limited to referential integrity. Other checks can
     At the moment this is limited to referential integrity. Other checks can
     be added and triggered by different argument flags.
     be added and triggered by different argument flags.
     """
     """
-    if config_dir:
-        env.app_globals = AppGlobals(parse_config(config_dir))
-    else:
-        import lakesuperior.env_setup
     with TxnManager(env.app_globals.rdfly.store):
     with TxnManager(env.app_globals.rdfly.store):
-        return { t for t in env.app_globals.rdfly.find_refint_violations()}
+        return set(env.app_globals.rdfly.find_refint_violations())

+ 1 - 1
lakesuperior/env_setup.py

@@ -7,7 +7,7 @@ Default configuration.
 
 
 Import this module to initialize the configuration for a production setup::
 Import this module to initialize the configuration for a production setup::
 
 
-    >>> from lakesuperior import env_setup
+    >>> import lakesuperior.env_setup
 
 
 Will load the default configuration.
 Will load the default configuration.
 """
 """

+ 36 - 14
lakesuperior/lsup_admin.py

@@ -1,13 +1,18 @@
 import click
 import click
 import click_log
 import click_log
+import csv
 import json
 import json
 import logging
 import logging
-import os
 import sys
 import sys
 
 
+from os import getcwd, path
+
+import arrow
+
 from lakesuperior import env
 from lakesuperior import env
 from lakesuperior.api import admin as admin_api
 from lakesuperior.api import admin as admin_api
 from lakesuperior.config_parser import config
 from lakesuperior.config_parser import config
+from lakesuperior.globals import AppGlobals
 from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
 from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
 
 
 __doc__="""
 __doc__="""
@@ -23,12 +28,6 @@ for a list of tools and options.
 logger = logging.getLogger(__name__)
 logger = logging.getLogger(__name__)
 click_log.basic_config(logger)
 click_log.basic_config(logger)
 
 
-#report = logging.getLogger('report')
-#report_formatter = logging.Formatter('"%(asctime)s",%(message)s')
-#report_fpath = '{}/lsup-report-{}'.format(
-#        env.config['application']['data_dir'],
-#        arrow.utcnow().format('YYYY-MM-DDTHH:mm:ss.S'))
-#report_handler = logging.FileHandler(report_fpath)
 
 
 @click.group()
 @click.group()
 def admin():
 def admin():
@@ -69,7 +68,8 @@ def bootstrap():
     click.echo('Initializing binary store at {}'.format(nonrdfly.root))
     click.echo('Initializing binary store at {}'.format(nonrdfly.root))
     nonrdfly.bootstrap()
     nonrdfly.bootstrap()
     click.echo('Binary store initialized.')
     click.echo('Binary store initialized.')
-    click.echo('Repository successfully set up. Go to town.')
+    click.echo('\nRepository successfully set up. Go to town.')
+    click.echo('If the HTTP server is running, it must be restarted.')
 
 
 
 
 @click.command()
 @click.command()
@@ -117,19 +117,41 @@ def check_refint(config_folder=None, output=None):
     resources. For repositories set up with the `referential_integrity` option
     resources. For repositories set up with the `referential_integrity` option
     (the default), this is a pre-condition for a consistent data set.
     (the default), this is a pre-condition for a consistent data set.
 
 
-    Note: this check is run regardless of whether the repository enforces
+    If inconsistencies are found, a report is generated in CSV format with the
+    following columns: `s`, `p`, `o` (respectively the terms of the
+    triple containing the dangling relationship) and `missing` which
+    indicates which term is the missing URI (currently always set to `o`).
+
+    Note: this check can be run regardless of whether the repository enforces
     referential integrity.
     referential integrity.
     """
     """
-    check_results = admin_api.integrity_check(config_folder)
+    if config_folder:
+        env.app_globals = AppGlobals(parse_config(config_dir))
+    else:
+        import lakesuperior.env_setup
+
+    check_results = admin_api.integrity_check()
+
     click.echo('Integrity check results:')
     click.echo('Integrity check results:')
     if len(check_results):
     if len(check_results):
         click.echo(click.style('Inconsistencies found!', fg='red', bold=True))
         click.echo(click.style('Inconsistencies found!', fg='red', bold=True))
-        click.echo('Missing object in the following triples:')
-        for trp in check_results:
-            click.echo(' '.join([str(t) for t in trp[0]]))
+        if not output:
+            output = path.join(getcwd(), 'refint_report-{}.csv'.format(
+                arrow.utcnow().format('YYYY-MM-DDTHH:mm:ss.S')))
+        elif not output.endswith('.csv'):
+            output += '.csv'
+
+        with open(output, 'w', newline='') as fh:
+            writer = csv.writer(fh)
+            writer.writerow(('s', 'p', 'o', 'missing'))
+            for trp in check_results:
+                # ``o`` is always hardcoded for now.
+                writer.writerow([t.n3() for t in trp[0]] + ['o'])
+
+        click.echo('Report generated at {}'.format(output))
     else:
     else:
         click.echo(click.style('Clean. ', fg='green', bold=True)
         click.echo(click.style('Clean. ', fg='green', bold=True)
-                + 'No inconsistency found.')
+                + 'No inconsistency found. No report generated.')
 
 
 
 
 @click.command()
 @click.command()

+ 1 - 0
lakesuperior/store/ldp_rs/rsrc_centric_layout.py

@@ -557,6 +557,7 @@ class RsrcCentricLayout:
         :rtype: set
         :rtype: set
         :return: Triples referencing a repository URI that is not a resource.
         :return: Triples referencing a repository URI that is not a resource.
         """
         """
+        #import pdb; pdb.set_trace()
         for i, obj in enumerate(self.store.all_terms('o'), start=1):
         for i, obj in enumerate(self.store.all_terms('o'), start=1):
             if (
             if (
                     isinstance(obj, URIRef)
                     isinstance(obj, URIRef)