Procházet zdrojové kódy

Implement soft-delete and purge.

Stefano Cossu před 7 roky
rodič
revize
13401860d2

+ 15 - 4
doc/notes/fcrepo4_deltas.md

@@ -83,7 +83,7 @@ in LAKEsuperior in a 404.
 In both above cases, PUTting into `rest/a` yields a 409, POSTing to it results
 in a 201.
 
-## Non-mandatory slug in version POST
+### Non-mandatory slug in version POST
 
 FCREPO requires a `Slug` header to POST to `fcr:versions` to create a new
 version.
@@ -170,7 +170,18 @@ while leaving the other server-managed triples when retrieving a resource:
 
 The default behavior is including all children URIs.
 
-### Optional deletion without leaving tombstone
+### Soft-delete and purge
 
-In LAKEsuperior, setting the `Prefer:no-tombstone` header option allows to
-delete a resource without leaving a tombstone.
+In FCREPO4 a deleted resource leaves a tombstone deleting all traces of the
+previous resource.
+
+In LAKEsuperior, a normal DELETE creates a new version snapshot of the resource
+and puts a tombstone in its place. The resource versions are still available
+in the `fcr:versions` location. The resource can be "resurrected" by
+issuing a POST to its tombstone. This will result in a `201`.
+
+If a tombstone is deleted, the resource and its versions are completely deleted
+(purged).
+
+Moreover, setting the `Prefer:no-tombstone` header option on DELETE allows to
+delete a resource and its versions directly without leaving a tombstone.

+ 21 - 4
lakesuperior/endpoints/ldp.py

@@ -194,7 +194,7 @@ def get_version_info(uuid):
     Get version info (`fcr:versions`).
     '''
     try:
-        rsp = Ldpr(uuid).version_info
+        rsp = Ldpr(uuid).get_version_info()
     except ResourceNotExistsError as e:
         return str(e), 404
     except InvalidResourceError as e:
@@ -343,7 +343,16 @@ def patch_resource_metadata(uuid):
 @ldp.route('/<path:uuid>', methods=['DELETE'])
 def delete_resource(uuid):
     '''
-    Delete a resource.
+    Delete a resource and optionally leave a tombstone.
+
+    This behaves differently from FCREPO. A tombstone indicated that the
+    resource is no longer available at its current location, but its historic
+    snapshots still are. Also, deleting a resource with a tombstone creates
+    one more version snapshot of the resource prior to being deleted.
+
+    In order to completely wipe out all traces of a resource, the tombstone
+    must be deleted as well, or the `Prefer:no-tombstone` header can be used.
+    The latter will purge the resource immediately.
     '''
     headers = std_headers
 
@@ -374,7 +383,8 @@ def tombstone(uuid):
     '''
     Handle all tombstone operations.
 
-    The only allowed method is DELETE; any other verb will return a 405.
+    The only allowed methods are POST and DELETE; any other verb will return a
+    405.
     '''
     logger.debug('Deleting tombstone for {}.'.format(uuid))
     rsrc = Ldpr(uuid)
@@ -383,10 +393,17 @@ def tombstone(uuid):
     except TombstoneError as e:
         if request.method == 'DELETE':
             if e.uuid == uuid:
-                rsrc.delete_tombstone()
+                rsrc.purge()
                 return '', 204
             else:
                 return _tombstone_response(e, uuid)
+        elif request.method == 'POST':
+            if e.uuid == uuid:
+                rsrc_uri = rsrc.resurrect()
+                headers = {'Location' : rsrc_uri}
+                return rsrc_uri, 201, headers
+            else:
+                return _tombstone_response(e, uuid)
         else:
             return 'Method Not Allowed.', 405
     except ResourceNotExistsError as e:

+ 5 - 5
lakesuperior/exceptions.py

@@ -146,8 +146,8 @@ class TombstoneError(RuntimeError):
         self.ts = ts
 
     def __str__(self):
-        return 'Discovered tombstone resource at /{}, departed: {}'.format(
-                self.uuid, self.ts)
-
-
-
+        return (
+            'Discovered tombstone resource at /{}, departed: {}\n'.format(
+                self.uuid, self.ts),
+            'To resurrect that resource, send a POST request to it.'
+        )

+ 138 - 96
lakesuperior/model/ldpr.py

@@ -396,9 +396,29 @@ class Ldpr(metaclass=ABCMeta):
         '''
         Return version metadata (`fcr:versions`).
         '''
-        rsp = self.rdfly.get_version_info(self.urn)
+        if not hasattr(self, '_version_info'):
+            self._version_info = self.rdfly.get_version_info(self.urn)
 
-        return g.tbox.globalize_graph(rsp)
+        return self._version_info
+
+
+    @property
+    def versions(self):
+        '''
+        Return a generator of version URIs.
+        '''
+        return set(self.version_info[self.urn : nsc['fcrepo'].hasVersion :])
+
+
+    @property
+    def version_uids(self):
+        '''
+        Return a generator of version UIDs (relative to their parent resource).
+        '''
+        return set(self.version_info[
+            self.urn
+            : nsc['fcrepo'].hasVersion / nsc['fcrepo'].hasVersionLabel
+            :])
 
 
     @property
@@ -409,16 +429,6 @@ class Ldpr(metaclass=ABCMeta):
             return self.rdfly.ask_rsrc_exists(self.urn)
 
 
-    #@property
-    #def has_versions(self):
-    #    '''
-    #    Whether if a current resource has versions.
-
-    #    @return boolean
-    #    '''
-    #    return bool(self.imr.value(nsc['fcrepo'].hasVersions, any=False))
-
-
     @property
     def types(self):
         '''All RDF types.
@@ -521,31 +531,41 @@ class Ldpr(metaclass=ABCMeta):
         children = self.imr[nsc['ldp'].contains * '+'] \
                 if delete_children else []
 
-        ret = self._delete_rsrc(inbound, leave_tstone)
-
-        for child_uri in children:
-            child_rsrc = Ldpr.outbound_inst(
-                g.tbox.uri_to_uuid(child_uri.identifier),
-                repr_opts={'incl_children' : False})
-            child_rsrc._delete_rsrc(inbound, leave_tstone,
-                    tstone_pointer=self.urn)
+        if leave_tstone:
+            ret = self._bury_rsrc(inbound)
+        else:
+            ret = self._purge_rsrc(inbound)
+
+            for child_uri in children:
+                child_rsrc = Ldpr.outbound_inst(
+                    g.tbox.uri_to_uuid(child_uri.identifier),
+                    repr_opts={'incl_children' : False})
+                if leave_tstone:
+                    child_rsrc._bury_rsrc(inbound, tstone_pointer=self.urn)
+                else:
+                    child_rsrc._purge_rsrc(inbound)
 
         return ret
 
 
     @atomic
-    def delete_tombstone(self):
+    def purge(self, inbound=True):
         '''
-        Delete a tombstone.
+        Delete a tombstone and all historic snapstots.
 
         N.B. This does not trigger an event.
         '''
-        remove_trp = {
-            (self.urn, RDF.type, nsc['fcsystem'].Tombstone),
-            (self.urn, nsc['fcrepo'].created, None),
-            (None, nsc['fcsystem'].tombstone, self.urn),
-        }
-        self.rdfly.modify_dataset(remove_trp)
+        refint = current_app.config['store']['ldp_rs']['referential_integrity']
+        inbound = True if refint else inbound
+
+        return self._purge_rsrc(inbound)
+
+
+    def get_version_info(self):
+        '''
+        Get the `fcr:versions` graph.
+        '''
+        return g.tbox.globalize_graph(self.version_info)
 
 
     def get_version(self, ver_uid):
@@ -569,55 +589,7 @@ class Ldpr(metaclass=ABCMeta):
         @param ver_uid Version ver_uid. If already existing, an exception is
         raised.
         '''
-        # Create version resource from copying the current state.
-        ver_add_gr = Graph()
-        vers_uuid = '{}/{}'.format(self.uuid, self.RES_VER_CONT_LABEL)
-        ver_uuid = '{}/{}'.format(vers_uuid, ver_uid)
-        ver_urn = nsc['fcres'][ver_uuid]
-        ver_add_gr.add((ver_urn, RDF.type, nsc['fcrepo'].Version))
-        for t in self.imr.graph:
-            if (
-                t[1] == RDF.type and t[2] in {
-                    nsc['fcrepo'].Binary,
-                    nsc['fcrepo'].Container,
-                    nsc['fcrepo'].Resource,
-                }
-            ) or (
-                t[1] in {
-                    nsc['fcrepo'].hasParent,
-                    nsc['fcrepo'].hasVersions,
-                    nsc['premis'].hasMessageDigest,
-                }
-            ):
-                pass
-            else:
-                ver_add_gr.add((
-                        g.tbox.replace_term_domain(t[0], self.urn, ver_urn),
-                        t[1], t[2]))
-
-        self.rdfly.modify_dataset(
-                add_trp=ver_add_gr, types={nsc['fcrepo'].Version})
-
-        # Add version metadata.
-        meta_add_gr = Graph()
-        meta_add_gr.add((
-            self.urn, nsc['fcrepo'].hasVersion, ver_urn))
-        meta_add_gr.add(
-                (ver_urn, nsc['fcrepo'].created, g.timestamp_term))
-        meta_add_gr.add(
-                (ver_urn, nsc['fcrepo'].hasVersionLabel, Literal(ver_uid)))
-
-        self.rdfly.modify_dataset(
-                add_trp=meta_add_gr, types={nsc['fcrepo'].Metadata})
-
-        # Update resource.
-        rsrc_add_gr = Graph()
-        rsrc_add_gr.add((
-            self.urn, nsc['fcrepo'].hasVersions, nsc['fcres'][vers_uuid]))
-
-        self._modify_rsrc(self.RES_UPDATED, add_trp=rsrc_add_gr, notify=False)
-
-        return g.tbox.uuid_to_uri(ver_uuid)
+        return g.tbox.globalize_term(self._create_rsrc_version(ver_uid))
 
 
     @atomic
@@ -702,34 +674,32 @@ class Ldpr(metaclass=ABCMeta):
         return self.RES_UPDATED
 
 
-    def _delete_rsrc(self, inbound, leave_tstone=True, tstone_pointer=None):
+    def _bury_rsrc(self, inbound, tstone_pointer=None):
         '''
         Delete a single resource and create a tombstone.
 
         @param inbound (boolean) Whether to delete the inbound relationships.
         @param tstone_pointer (URIRef) If set to a URN, this creates a pointer
         to the tombstone of the resource that used to contain the deleted
-        resource. Otherwise the delete resource becomes a tombstone.
+        resource. Otherwise the deleted resource becomes a tombstone.
         '''
         self._logger.info('Removing resource {}'.format(self.urn))
+        # Create a backup snapshot for resurrection purposes.
+        self.create_version(uuid4())
 
         remove_trp = self.imr.graph
         add_trp = Graph()
 
-        if leave_tstone:
-            if tstone_pointer:
-                add_trp.add((self.urn, nsc['fcsystem'].tombstone,
-                        tstone_pointer))
-            else:
-                add_trp.add((self.urn, RDF.type, nsc['fcsystem'].Tombstone))
-                add_trp.add((self.urn, nsc['fcrepo'].created, g.timestamp_term))
+        if tstone_pointer:
+            add_trp.add((self.urn, nsc['fcsystem'].tombstone,
+                    tstone_pointer))
         else:
-            self._logger.info('NOT leaving tombstone.')
+            add_trp.add((self.urn, RDF.type, nsc['fcsystem'].Tombstone))
+            add_trp.add((self.urn, nsc['fcrepo'].created, g.timestamp_term))
 
         self._modify_rsrc(self.RES_DELETED, remove_trp, add_trp)
 
         if inbound:
-            remove_trp = set()
             for ib_rsrc_uri in self.imr.graph.subjects(None, self.urn):
                 remove_trp = {(ib_rsrc_uri, None, self.urn)}
                 Ldpr(ib_rsrc_uri)._modify_rsrc(self.RES_UPDATED, remove_trp)
@@ -737,16 +707,87 @@ class Ldpr(metaclass=ABCMeta):
         return self.RES_DELETED
 
 
-    def _create_version_container(self):
+    def _purge_rsrc(self, inbound):
         '''
-        Create the relationship with `fcr:versions` the first time a version is
-        created.
+        Remove all traces of a resource and versions.
         '''
-        add_gr = Graph()
-        add_gr.add((self.urn, nsc['fcrepo'].hasVersions,
-                URIRef(str(self.urn) + '/fcr:versions')))
+        self._logger.info('Purging resource {}'.format(self.urn))
+
+        import pdb; pdb.set_trace()
+        # Remove resource itself.
+        self.rdfly.modify_dataset({(self.urn, None, None)}, types=None)
+
+        # Remove snapshots.
+        for snap_urn in self.versions:
+            remove_trp = {
+                (snap_urn, None, None),
+                (None, None, snap_urn),
+            }
+            self.rdfly.modify_dataset(remove_trp, types={})
+
+        # Remove inbound references.
+        if inbound:
+            for ib_rsrc_uri in self.imr.graph.subjects(None, self.urn):
+                remove_trp = {(ib_rsrc_uri, None, self.urn)}
+                Ldpr(ib_rsrc_uri)._modify_rsrc(self.RES_UPDATED, remove_trp)
+
+        # @TODO This could be a different event type.
+        return self.RES_DELETED
+
+
+    def _create_rsrc_version(self, ver_uid):
+        '''
+        Perform version creation and return the internal URN.
+        '''
+        # Create version resource from copying the current state.
+        ver_add_gr = Graph()
+        vers_uuid = '{}/{}'.format(self.uuid, self.RES_VER_CONT_LABEL)
+        ver_uuid = '{}/{}'.format(vers_uuid, ver_uid)
+        ver_urn = nsc['fcres'][ver_uuid]
+        ver_add_gr.add((ver_urn, RDF.type, nsc['fcrepo'].Version))
+        for t in self.imr.graph:
+            if (
+                t[1] == RDF.type and t[2] in {
+                    nsc['fcrepo'].Binary,
+                    nsc['fcrepo'].Container,
+                    nsc['fcrepo'].Resource,
+                }
+            ) or (
+                t[1] in {
+                    nsc['fcrepo'].hasParent,
+                    nsc['fcrepo'].hasVersions,
+                    nsc['premis'].hasMessageDigest,
+                }
+            ):
+                pass
+            else:
+                ver_add_gr.add((
+                        g.tbox.replace_term_domain(t[0], self.urn, ver_urn),
+                        t[1], t[2]))
+
+        self.rdfly.modify_dataset(
+                add_trp=ver_add_gr, types={nsc['fcrepo'].Version})
+
+        # Add version metadata.
+        meta_add_gr = Graph()
+        meta_add_gr.add((
+            self.urn, nsc['fcrepo'].hasVersion, ver_urn))
+        meta_add_gr.add(
+                (ver_urn, nsc['fcrepo'].created, g.timestamp_term))
+        meta_add_gr.add(
+                (ver_urn, nsc['fcrepo'].hasVersionLabel, Literal(ver_uid)))
+
+        self.rdfly.modify_dataset(
+                add_trp=meta_add_gr, types={nsc['fcrepo'].Metadata})
+
+        # Update resource.
+        rsrc_add_gr = Graph()
+        rsrc_add_gr.add((
+            self.urn, nsc['fcrepo'].hasVersions, nsc['fcres'][vers_uuid]))
+
+        self._modify_rsrc(self.RES_UPDATED, add_trp=rsrc_add_gr, notify=False)
 
-        self._modify_rsrc(self.RES_UPDATED, add_trp=add_gr)
+        return nsc['fcres'][ver_uuid]
 
 
     def _modify_rsrc(self, ev_type, remove_trp=Graph(), add_trp=Graph(),
@@ -943,7 +984,8 @@ class Ldpr(metaclass=ABCMeta):
 
         add_gr = Graph()
         add_gr.add((parent_uri, nsc['ldp'].contains, self.urn))
-        parent_rsrc = Ldpc(parent_uri, repr_opts={
+        parent_rsrc = Ldpc.outbound_inst(
+                g.tbox.uri_to_uuid(parent_uri), repr_opts={
                 'incl_children' : False}, handling='none')
         parent_rsrc._modify_rsrc(self.RES_UPDATED, add_trp=add_gr)
 

+ 2 - 0
lakesuperior/store_layouts/ldp_rs/base_rdf_layout.py

@@ -69,6 +69,7 @@ class BaseRdfLayout(metaclass=ABCMeta):
 
         self.store = self._conn.store
 
+        self.UNION_GRAPH_URI = self._conn.UNION_GRAPH_URI
         self.ds = self._conn.ds
         self.ds.namespace_manager = nsm
 
@@ -149,6 +150,7 @@ class BaseRdfLayout(metaclass=ABCMeta):
         @param types (iterable(rdflib.term.URIRef)) RDF types of the resource
         that may be relevant to the layout strategy. These can be anything
         since they are just used to inform the layout and not actually stored.
+        If this is an empty set, the merge graph is used.
         '''
         pass
 

+ 23 - 11
lakesuperior/store_layouts/ldp_rs/default_layout.py

@@ -168,8 +168,8 @@ class DefaultLayout(BaseRdfLayout):
 
         if not len(rsp):
             raise ResourceNotExistsError(
-                    urn,
-                    'No version found for this resource with the given label.')
+                urn,
+                'No version found for this resource with the given label.')
         else:
             return rsp.graph
 
@@ -184,14 +184,26 @@ class DefaultLayout(BaseRdfLayout):
         self._logger.debug('Add triples: {}'.format(pformat(
                 set(add_trp))))
 
-        if nsc['fcrepo'].Metadata in types:
-            target_gr = self.ds.graph(self.META_GRAPH_URI)
+        if not types:
+            # @FIXME This is terrible, but I can't get Fuseki to update the
+            # default graph without using a vaiable.
+            #target_gr = self.ds.graph(self.UNION_GRAPH_URI)
+            target_gr = {
+                self.ds.graph(self.HIST_GRAPH_URI),
+                self.ds.graph(self.META_GRAPH_URI),
+                self.ds.graph(self.MAIN_GRAPH_URI),
+            }
+        elif nsc['fcrepo'].Metadata in types:
+            target_gr = {self.ds.graph(self.META_GRAPH_URI)}
         elif nsc['fcrepo'].Version in types:
-            target_gr = self.ds.graph(self.HIST_GRAPH_URI)
+            target_gr = {self.ds.graph(self.HIST_GRAPH_URI)}
         else:
-            target_gr = self.ds.graph(self.MAIN_GRAPH_URI)
-
-        for t in remove_trp:
-            target_gr.remove(t)
-        for t in add_trp:
-            target_gr.add(t)
+            target_gr = {self.ds.graph(self.MAIN_GRAPH_URI)}
+
+        for gr in target_gr:
+            gr -= remove_trp
+            gr += add_trp
+        #for t in remove_trp:
+        #    target_gr.remove(t)
+        #for t in add_trp:
+        #    target_gr.add(t)

+ 4 - 0
lakesuperior/store_layouts/ldp_rs/graph_store_connector.py

@@ -1,6 +1,7 @@
 import logging
 
 from rdflib import Dataset
+from rdflib.term import URIRef
 from rdflib.plugins.stores.sparqlstore import SPARQLStore, SPARQLUpdateStore
 from SPARQLWrapper.Wrapper import POST
 
@@ -15,6 +16,9 @@ class GraphStoreConnector:
     be passed any configuration options.
     '''
 
+    # N.B. This is Fuseki-specific.
+    UNION_GRAPH_URI = URIRef('urn:x-arq:UnionGraph')
+
     _logger = logging.getLogger(__name__)
 
     def __init__(self, query_ep, update_ep=None, autocommit=False):