Переглянути джерело

Referential integrity check.

Stefano Cossu 7 роки тому
батько
коміт
a436acdaac

+ 10 - 0
etc.skeleton/application.yml

@@ -25,6 +25,16 @@ store:
         # Store layout. This corresponds to a sub-class of the
         # `lakesuperior.store_layouts.rdf.base_rdf_layout/BaseRdfLayout`.
         layout: simple_layout
+        # Whether to check if the object of a client-provided triple is the URI
+        # of a repository-managed resource and veify if that exists.
+        # If set to false, properties are allowed to point to resources in the
+        # repositoy that do not exist. Also, if a resource is deleted, inbound
+        # relationships may not be cleaned up.
+        # This can be one of `none`, `lenient` or `strict`. `none` does not
+        # check for referential integrity. `lenient` quietly drops a
+        # user-provided triple if its # object violates referential integrity.
+        # `strict` raises an exception.
+        referential_integrity: lenient
         webroot: http://localhost:9999/namespace/fcrepo/
         query_ep: sparql
         update_ep: sparql

+ 17 - 0
lakesuperior/exceptions.py

@@ -71,6 +71,23 @@ class ServerManagedTermError(RuntimeError):
                 .format(self.term_name, ' , '.join(self.terms))
 
 
+
+class RefIntViolationError(RuntimeError):
+    '''
+    Raised when a provided data set has a link to a non-existing repository
+    resource. With some setups this is handled silently, with a strict setting
+    it raises this exception that should return a 412 HTTP code.
+    '''
+    def __init__(self, o):
+        self.o = o
+
+    def __str__(self):
+        return 'Resource {} does not exist in repository. Linking to it '\
+            'constitutes an integrity violation under the current setup.'\
+            .format(self.o)
+
+
+
 class SingleSubjectError(RuntimeError):
     '''
     Raised when a SPARQL-Update query or a RDF payload for a PUT contain

+ 19 - 0
lakesuperior/model/ldp_rs.py

@@ -2,6 +2,7 @@ from copy import deepcopy
 
 import arrow
 
+from flask import request
 from rdflib import Graph
 from rdflib.resource import Resource
 from rdflib.namespace import RDF, XSD
@@ -100,6 +101,9 @@ class LdpRs(Ldpr):
                 self.urn)
         self._add_srv_mgd_triples(create=True)
         self._ensure_single_subject_rdf(self.provided_imr.graph)
+        cnf = self.rdfly.conf['referential_integrity']
+        if cnf != 'none':
+            self._check_ref_int(cnf)
 
         if create_only:
             res = self.rdfly.create_rsrc(self.provided_imr)
@@ -229,6 +233,21 @@ class LdpRs(Ldpr):
                 return SingleSubjectError(s, self.uri)
 
 
+    def _check_ref_int(self, config):
+        g = self.provided_imr.graph
+
+        for o in g.objects():
+            if isinstance(o, URIRef) and str(o).startswith(request.host_url) \
+                    and not self.rdfly.ask_rsrc_exists(o):
+                if config == 'strict':
+                    raise RefIntViolationError(o)
+                else:
+                    self._logger.info(
+                            'Removing link to non-existent repo resource: {}'
+                            .format(o))
+                    g.remove((None, None, o))
+
+
 class Ldpc(LdpRs):
     '''LDPC (LDP Container).'''
 

+ 3 - 3
lakesuperior/store_layouts/rdf/base_rdf_layout.py

@@ -67,11 +67,11 @@ class BaseRdfLayout(metaclass=ABCMeta):
     RES_CREATED = '_created_'
     RES_UPDATED = '_updated_'
 
-    _conf = config['application']['store']['ldp_rs']
+    conf = config['application']['store']['ldp_rs']
     _logger = logging.getLogger(__name__)
 
-    query_ep = _conf['webroot'] + _conf['query_ep']
-    update_ep = _conf['webroot'] + _conf['update_ep']
+    query_ep = conf['webroot'] + conf['query_ep']
+    update_ep = conf['webroot'] + conf['update_ep']
 
 
     ## MAGIC METHODS ##