Browse Source

Referential integrity check.

Stefano Cossu 7 years ago
parent
commit
16560083d6

+ 20 - 1
lakesuperior/api/admin.py

@@ -1,6 +1,8 @@
 import logging
 import logging
 
 
+from lakesuperior.config_parser import parse_config
 from lakesuperior.env import env
 from lakesuperior.env import env
+from lakesuperior.globals import AppGlobals
 from lakesuperior.migrator import Migrator
 from lakesuperior.migrator import Migrator
 from lakesuperior.store.ldp_nr.default_layout import DefaultLayout as FileLayout
 from lakesuperior.store.ldp_nr.default_layout import DefaultLayout as FileLayout
 from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
 from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
@@ -18,7 +20,8 @@ def stats():
     """
     """
     Get repository statistics.
     Get repository statistics.
 
 
-    @return dict Store statistics, resource statistics.
+    :rtype: dict
+    :return: Store statistics, resource statistics.
     """
     """
     import lakesuperior.env_setup
     import lakesuperior.env_setup
     repo_stats = {'rsrc_stats': env.app_globals.rdfly.count_rsrc()}
     repo_stats = {'rsrc_stats': env.app_globals.rdfly.count_rsrc()}
@@ -42,3 +45,19 @@ def migrate(src, dest, start_pts=None, list_file=None, **kwargs):
         start_pts = ('/',)
         start_pts = ('/',)
 
 
     return Migrator(src, dest, **kwargs).migrate(start_pts, list_file)
     return Migrator(src, dest, **kwargs).migrate(start_pts, list_file)
+
+
+def integrity_check(config_dir=None):
+    """
+    Check integrity of the data set.
+
+    At the moment this is limited to referential integrity. Other checks can
+    be added and triggered by different argument flags.
+    """
+    if config_dir:
+        env.config = parse_config(config_dir)[0]
+        env.app_globals = AppGlobals(env.config)
+    else:
+        import lakesuperior.env_setup
+    with TxnManager(env.app_globals.rdfly.store):
+        return { t for t in env.app_globals.rdfly.find_refint_violations()}

+ 24 - 0
lakesuperior/store/ldp_rs/lmdb_store.py

@@ -595,6 +595,30 @@ class LmdbStore(Store):
                 yield self._from_key(spok), contexts
                 yield self._from_key(spok), contexts
 
 
 
 
+    def all_terms(self, term_type):
+        """
+        Return all terms of a type (``s``, ``p``, or ``o``) in the store.
+
+        :param str term_type: one of ``s``, ``p`` or ``o``.
+
+        :rtype: Iterator(rdflib.term.Identifier)
+        :return: Iterator of all terms.
+        :raise ValueError: if the term type is not one of the expected values.
+        """
+        if term_type == 's':
+            idx_label = 's:po'
+        elif term_type == 'p':
+            idx_label = 'p:so'
+        elif term_type == 'o':
+            idx_label = 'o:sp'
+        else:
+            raise ValueError('Term type must be \'s\', \'p\' or \'o\'.')
+
+        with self.cur(idx_label) as cur:
+            for key in cur.iternext_nodup():
+                yield self._from_key(key)[0]
+
+
     def bind(self, prefix, namespace):
     def bind(self, prefix, namespace):
         '''
         '''
         Bind a prefix to a namespace.
         Bind a prefix to a namespace.

+ 22 - 0
lakesuperior/store/ldp_rs/rsrc_centric_layout.py

@@ -556,6 +556,28 @@ class RsrcCentricLayout:
         return str(uri).replace(nsc['fcres'], '')
         return str(uri).replace(nsc['fcres'], '')
 
 
 
 
+    def find_refint_violations(self):
+        """
+        Find all referential integrity violations.
+
+        This method looks for dangling relationships within a repository by
+        checking the objects of each triple; if the object is an in-repo
+        resource reference, and no resource with that URI results to be in the
+        repo, that triple is reported.
+
+        :rtype: set
+        :return: Triples referencing a repository URI that is not a resource.
+        """
+        for obj in self.store.all_terms('o'):
+            if (
+                    isinstance(obj, URIRef)
+                    and str(obj).startswith(nsc['fcres'])
+                    and not self.ask_rsrc_exists(self.uri_to_uid(obj))):
+                print('Object not found: {}'.format(obj))
+                for trp in self.store.triples((None, None, obj)):
+                    yield trp
+
+
     ## PROTECTED MEMBERS ##
     ## PROTECTED MEMBERS ##
 
 
     def _check_rsrc_status(self, rsrc):
     def _check_rsrc_status(self, rsrc):

+ 22 - 6
lsup-admin

@@ -85,17 +85,33 @@ def check_fixity(uid):
     pass
     pass
 
 
 
 
+@click.option(
+    '--config-folder', '-c', default=None, help='Alternative configuration '
+    'folder to look up. If not set, the location set in the environment or '
+    'the default configuration is used.')
 @click.command()
 @click.command()
-def check_refint():
-    '''
-    [STUB] Check referential integrity.
+def check_refint(config_folder=None):
+    """
+    Check referential integrity.
 
 
     This command scans the graph store to verify that all references to
     This command scans the graph store to verify that all references to
     resources within the repository are effectively pointing to existing
     resources within the repository are effectively pointing to existing
-    resources. For repositories set up with the `referencial_integrity` option
+    resources. For repositories set up with the `referential_integrity` option
     (the default), this is a pre-condition for a consistent data set.
     (the default), this is a pre-condition for a consistent data set.
-    '''
-    pass
+
+    Note: this check is run regardless of whether the repository enforces
+    referential integrity.
+    """
+    check_results = admin_api.integrity_check(config_folder)
+    click.echo('Integrity check results:')
+    if len(check_results):
+        click.echo(click.style('Inconsistencies found!', fg='red', bold=True))
+        click.echo('Missing object in the following triples:')
+        for trp in check_results:
+            click.echo(' '.join([str(t) for t in trp[0]]))
+    else:
+        click.echo(click.style('Clean. ', fg='green', bold=True)
+                + 'No inconsistency found.')
 
 
 
 
 @click.command()
 @click.command()