Browse Source

Implement soft-delete and purge.

Stefano Cossu 7 years ago
parent
commit
13401860d2

+ 15 - 4
doc/notes/fcrepo4_deltas.md

@@ -83,7 +83,7 @@ in LAKEsuperior in a 404.
 In both above cases, PUTting into `rest/a` yields a 409, POSTing to it results
 In both above cases, PUTting into `rest/a` yields a 409, POSTing to it results
 in a 201.
 in a 201.
 
 
-## Non-mandatory slug in version POST
+### Non-mandatory slug in version POST
 
 
 FCREPO requires a `Slug` header to POST to `fcr:versions` to create a new
 FCREPO requires a `Slug` header to POST to `fcr:versions` to create a new
 version.
 version.
@@ -170,7 +170,18 @@ while leaving the other server-managed triples when retrieving a resource:
 
 
 The default behavior is including all children URIs.
 The default behavior is including all children URIs.
 
 
-### Optional deletion without leaving tombstone
+### Soft-delete and purge
 
 
-In LAKEsuperior, setting the `Prefer:no-tombstone` header option allows to
-delete a resource without leaving a tombstone.
+In FCREPO4 a deleted resource leaves a tombstone deleting all traces of the
+previous resource.
+
+In LAKEsuperior, a normal DELETE creates a new version snapshot of the resource
+and puts a tombstone in its place. The resource versions are still available
+in the `fcr:versions` location. The resource can be "resurrected" by
+issuing a POST to its tombstone. This will result in a `201`.
+
+If a tombstone is deleted, the resource and its versions are completely deleted
+(purged).
+
+Moreover, setting the `Prefer:no-tombstone` header option on DELETE allows to
+delete a resource and its versions directly without leaving a tombstone.

+ 21 - 4
lakesuperior/endpoints/ldp.py

@@ -194,7 +194,7 @@ def get_version_info(uuid):
     Get version info (`fcr:versions`).
     Get version info (`fcr:versions`).
     '''
     '''
     try:
     try:
-        rsp = Ldpr(uuid).version_info
+        rsp = Ldpr(uuid).get_version_info()
     except ResourceNotExistsError as e:
     except ResourceNotExistsError as e:
         return str(e), 404
         return str(e), 404
     except InvalidResourceError as e:
     except InvalidResourceError as e:
@@ -343,7 +343,16 @@ def patch_resource_metadata(uuid):
 @ldp.route('/<path:uuid>', methods=['DELETE'])
 @ldp.route('/<path:uuid>', methods=['DELETE'])
 def delete_resource(uuid):
 def delete_resource(uuid):
     '''
     '''
-    Delete a resource.
+    Delete a resource and optionally leave a tombstone.
+
+    This behaves differently from FCREPO. A tombstone indicated that the
+    resource is no longer available at its current location, but its historic
+    snapshots still are. Also, deleting a resource with a tombstone creates
+    one more version snapshot of the resource prior to being deleted.
+
+    In order to completely wipe out all traces of a resource, the tombstone
+    must be deleted as well, or the `Prefer:no-tombstone` header can be used.
+    The latter will purge the resource immediately.
     '''
     '''
     headers = std_headers
     headers = std_headers
 
 
@@ -374,7 +383,8 @@ def tombstone(uuid):
     '''
     '''
     Handle all tombstone operations.
     Handle all tombstone operations.
 
 
-    The only allowed method is DELETE; any other verb will return a 405.
+    The only allowed methods are POST and DELETE; any other verb will return a
+    405.
     '''
     '''
     logger.debug('Deleting tombstone for {}.'.format(uuid))
     logger.debug('Deleting tombstone for {}.'.format(uuid))
     rsrc = Ldpr(uuid)
     rsrc = Ldpr(uuid)
@@ -383,10 +393,17 @@ def tombstone(uuid):
     except TombstoneError as e:
     except TombstoneError as e:
         if request.method == 'DELETE':
         if request.method == 'DELETE':
             if e.uuid == uuid:
             if e.uuid == uuid:
-                rsrc.delete_tombstone()
+                rsrc.purge()
                 return '', 204
                 return '', 204
             else:
             else:
                 return _tombstone_response(e, uuid)
                 return _tombstone_response(e, uuid)
+        elif request.method == 'POST':
+            if e.uuid == uuid:
+                rsrc_uri = rsrc.resurrect()
+                headers = {'Location' : rsrc_uri}
+                return rsrc_uri, 201, headers
+            else:
+                return _tombstone_response(e, uuid)
         else:
         else:
             return 'Method Not Allowed.', 405
             return 'Method Not Allowed.', 405
     except ResourceNotExistsError as e:
     except ResourceNotExistsError as e:

+ 5 - 5
lakesuperior/exceptions.py

@@ -146,8 +146,8 @@ class TombstoneError(RuntimeError):
         self.ts = ts
         self.ts = ts
 
 
     def __str__(self):
     def __str__(self):
-        return 'Discovered tombstone resource at /{}, departed: {}'.format(
-                self.uuid, self.ts)
-
-
-
+        return (
+            'Discovered tombstone resource at /{}, departed: {}\n'.format(
+                self.uuid, self.ts),
+            'To resurrect that resource, send a POST request to it.'
+        )

+ 138 - 96
lakesuperior/model/ldpr.py

@@ -396,9 +396,29 @@ class Ldpr(metaclass=ABCMeta):
         '''
         '''
         Return version metadata (`fcr:versions`).
         Return version metadata (`fcr:versions`).
         '''
         '''
-        rsp = self.rdfly.get_version_info(self.urn)
+        if not hasattr(self, '_version_info'):
+            self._version_info = self.rdfly.get_version_info(self.urn)
 
 
-        return g.tbox.globalize_graph(rsp)
+        return self._version_info
+
+
+    @property
+    def versions(self):
+        '''
+        Return a generator of version URIs.
+        '''
+        return set(self.version_info[self.urn : nsc['fcrepo'].hasVersion :])
+
+
+    @property
+    def version_uids(self):
+        '''
+        Return a generator of version UIDs (relative to their parent resource).
+        '''
+        return set(self.version_info[
+            self.urn
+            : nsc['fcrepo'].hasVersion / nsc['fcrepo'].hasVersionLabel
+            :])
 
 
 
 
     @property
     @property
@@ -409,16 +429,6 @@ class Ldpr(metaclass=ABCMeta):
             return self.rdfly.ask_rsrc_exists(self.urn)
             return self.rdfly.ask_rsrc_exists(self.urn)
 
 
 
 
-    #@property
-    #def has_versions(self):
-    #    '''
-    #    Whether if a current resource has versions.
-
-    #    @return boolean
-    #    '''
-    #    return bool(self.imr.value(nsc['fcrepo'].hasVersions, any=False))
-
-
     @property
     @property
     def types(self):
     def types(self):
         '''All RDF types.
         '''All RDF types.
@@ -521,31 +531,41 @@ class Ldpr(metaclass=ABCMeta):
         children = self.imr[nsc['ldp'].contains * '+'] \
         children = self.imr[nsc['ldp'].contains * '+'] \
                 if delete_children else []
                 if delete_children else []
 
 
-        ret = self._delete_rsrc(inbound, leave_tstone)
-
-        for child_uri in children:
-            child_rsrc = Ldpr.outbound_inst(
-                g.tbox.uri_to_uuid(child_uri.identifier),
-                repr_opts={'incl_children' : False})
-            child_rsrc._delete_rsrc(inbound, leave_tstone,
-                    tstone_pointer=self.urn)
+        if leave_tstone:
+            ret = self._bury_rsrc(inbound)
+        else:
+            ret = self._purge_rsrc(inbound)
+
+            for child_uri in children:
+                child_rsrc = Ldpr.outbound_inst(
+                    g.tbox.uri_to_uuid(child_uri.identifier),
+                    repr_opts={'incl_children' : False})
+                if leave_tstone:
+                    child_rsrc._bury_rsrc(inbound, tstone_pointer=self.urn)
+                else:
+                    child_rsrc._purge_rsrc(inbound)
 
 
         return ret
         return ret
 
 
 
 
     @atomic
     @atomic
-    def delete_tombstone(self):
+    def purge(self, inbound=True):
         '''
         '''
-        Delete a tombstone.
+        Delete a tombstone and all historic snapstots.
 
 
         N.B. This does not trigger an event.
         N.B. This does not trigger an event.
         '''
         '''
-        remove_trp = {
-            (self.urn, RDF.type, nsc['fcsystem'].Tombstone),
-            (self.urn, nsc['fcrepo'].created, None),
-            (None, nsc['fcsystem'].tombstone, self.urn),
-        }
-        self.rdfly.modify_dataset(remove_trp)
+        refint = current_app.config['store']['ldp_rs']['referential_integrity']
+        inbound = True if refint else inbound
+
+        return self._purge_rsrc(inbound)
+
+
+    def get_version_info(self):
+        '''
+        Get the `fcr:versions` graph.
+        '''
+        return g.tbox.globalize_graph(self.version_info)
 
 
 
 
     def get_version(self, ver_uid):
     def get_version(self, ver_uid):
@@ -569,55 +589,7 @@ class Ldpr(metaclass=ABCMeta):
         @param ver_uid Version ver_uid. If already existing, an exception is
         @param ver_uid Version ver_uid. If already existing, an exception is
         raised.
         raised.
         '''
         '''
-        # Create version resource from copying the current state.
-        ver_add_gr = Graph()
-        vers_uuid = '{}/{}'.format(self.uuid, self.RES_VER_CONT_LABEL)
-        ver_uuid = '{}/{}'.format(vers_uuid, ver_uid)
-        ver_urn = nsc['fcres'][ver_uuid]
-        ver_add_gr.add((ver_urn, RDF.type, nsc['fcrepo'].Version))
-        for t in self.imr.graph:
-            if (
-                t[1] == RDF.type and t[2] in {
-                    nsc['fcrepo'].Binary,
-                    nsc['fcrepo'].Container,
-                    nsc['fcrepo'].Resource,
-                }
-            ) or (
-                t[1] in {
-                    nsc['fcrepo'].hasParent,
-                    nsc['fcrepo'].hasVersions,
-                    nsc['premis'].hasMessageDigest,
-                }
-            ):
-                pass
-            else:
-                ver_add_gr.add((
-                        g.tbox.replace_term_domain(t[0], self.urn, ver_urn),
-                        t[1], t[2]))
-
-        self.rdfly.modify_dataset(
-                add_trp=ver_add_gr, types={nsc['fcrepo'].Version})
-
-        # Add version metadata.
-        meta_add_gr = Graph()
-        meta_add_gr.add((
-            self.urn, nsc['fcrepo'].hasVersion, ver_urn))
-        meta_add_gr.add(
-                (ver_urn, nsc['fcrepo'].created, g.timestamp_term))
-        meta_add_gr.add(
-                (ver_urn, nsc['fcrepo'].hasVersionLabel, Literal(ver_uid)))
-
-        self.rdfly.modify_dataset(
-                add_trp=meta_add_gr, types={nsc['fcrepo'].Metadata})
-
-        # Update resource.
-        rsrc_add_gr = Graph()
-        rsrc_add_gr.add((
-            self.urn, nsc['fcrepo'].hasVersions, nsc['fcres'][vers_uuid]))
-
-        self._modify_rsrc(self.RES_UPDATED, add_trp=rsrc_add_gr, notify=False)
-
-        return g.tbox.uuid_to_uri(ver_uuid)
+        return g.tbox.globalize_term(self._create_rsrc_version(ver_uid))
 
 
 
 
     @atomic
     @atomic
@@ -702,34 +674,32 @@ class Ldpr(metaclass=ABCMeta):
         return self.RES_UPDATED
         return self.RES_UPDATED
 
 
 
 
-    def _delete_rsrc(self, inbound, leave_tstone=True, tstone_pointer=None):
+    def _bury_rsrc(self, inbound, tstone_pointer=None):
         '''
         '''
         Delete a single resource and create a tombstone.
         Delete a single resource and create a tombstone.
 
 
         @param inbound (boolean) Whether to delete the inbound relationships.
         @param inbound (boolean) Whether to delete the inbound relationships.
         @param tstone_pointer (URIRef) If set to a URN, this creates a pointer
         @param tstone_pointer (URIRef) If set to a URN, this creates a pointer
         to the tombstone of the resource that used to contain the deleted
         to the tombstone of the resource that used to contain the deleted
-        resource. Otherwise the delete resource becomes a tombstone.
+        resource. Otherwise the deleted resource becomes a tombstone.
         '''
         '''
         self._logger.info('Removing resource {}'.format(self.urn))
         self._logger.info('Removing resource {}'.format(self.urn))
+        # Create a backup snapshot for resurrection purposes.
+        self.create_version(uuid4())
 
 
         remove_trp = self.imr.graph
         remove_trp = self.imr.graph
         add_trp = Graph()
         add_trp = Graph()
 
 
-        if leave_tstone:
-            if tstone_pointer:
-                add_trp.add((self.urn, nsc['fcsystem'].tombstone,
-                        tstone_pointer))
-            else:
-                add_trp.add((self.urn, RDF.type, nsc['fcsystem'].Tombstone))
-                add_trp.add((self.urn, nsc['fcrepo'].created, g.timestamp_term))
+        if tstone_pointer:
+            add_trp.add((self.urn, nsc['fcsystem'].tombstone,
+                    tstone_pointer))
         else:
         else:
-            self._logger.info('NOT leaving tombstone.')
+            add_trp.add((self.urn, RDF.type, nsc['fcsystem'].Tombstone))
+            add_trp.add((self.urn, nsc['fcrepo'].created, g.timestamp_term))
 
 
         self._modify_rsrc(self.RES_DELETED, remove_trp, add_trp)
         self._modify_rsrc(self.RES_DELETED, remove_trp, add_trp)
 
 
         if inbound:
         if inbound:
-            remove_trp = set()
             for ib_rsrc_uri in self.imr.graph.subjects(None, self.urn):
             for ib_rsrc_uri in self.imr.graph.subjects(None, self.urn):
                 remove_trp = {(ib_rsrc_uri, None, self.urn)}
                 remove_trp = {(ib_rsrc_uri, None, self.urn)}
                 Ldpr(ib_rsrc_uri)._modify_rsrc(self.RES_UPDATED, remove_trp)
                 Ldpr(ib_rsrc_uri)._modify_rsrc(self.RES_UPDATED, remove_trp)
@@ -737,16 +707,87 @@ class Ldpr(metaclass=ABCMeta):
         return self.RES_DELETED
         return self.RES_DELETED
 
 
 
 
-    def _create_version_container(self):
+    def _purge_rsrc(self, inbound):
         '''
         '''
-        Create the relationship with `fcr:versions` the first time a version is
-        created.
+        Remove all traces of a resource and versions.
         '''
         '''
-        add_gr = Graph()
-        add_gr.add((self.urn, nsc['fcrepo'].hasVersions,
-                URIRef(str(self.urn) + '/fcr:versions')))
+        self._logger.info('Purging resource {}'.format(self.urn))
+
+        import pdb; pdb.set_trace()
+        # Remove resource itself.
+        self.rdfly.modify_dataset({(self.urn, None, None)}, types=None)
+
+        # Remove snapshots.
+        for snap_urn in self.versions:
+            remove_trp = {
+                (snap_urn, None, None),
+                (None, None, snap_urn),
+            }
+            self.rdfly.modify_dataset(remove_trp, types={})
+
+        # Remove inbound references.
+        if inbound:
+            for ib_rsrc_uri in self.imr.graph.subjects(None, self.urn):
+                remove_trp = {(ib_rsrc_uri, None, self.urn)}
+                Ldpr(ib_rsrc_uri)._modify_rsrc(self.RES_UPDATED, remove_trp)
+
+        # @TODO This could be a different event type.
+        return self.RES_DELETED
+
+
+    def _create_rsrc_version(self, ver_uid):
+        '''
+        Perform version creation and return the internal URN.
+        '''
+        # Create version resource from copying the current state.
+        ver_add_gr = Graph()
+        vers_uuid = '{}/{}'.format(self.uuid, self.RES_VER_CONT_LABEL)
+        ver_uuid = '{}/{}'.format(vers_uuid, ver_uid)
+        ver_urn = nsc['fcres'][ver_uuid]
+        ver_add_gr.add((ver_urn, RDF.type, nsc['fcrepo'].Version))
+        for t in self.imr.graph:
+            if (
+                t[1] == RDF.type and t[2] in {
+                    nsc['fcrepo'].Binary,
+                    nsc['fcrepo'].Container,
+                    nsc['fcrepo'].Resource,
+                }
+            ) or (
+                t[1] in {
+                    nsc['fcrepo'].hasParent,
+                    nsc['fcrepo'].hasVersions,
+                    nsc['premis'].hasMessageDigest,
+                }
+            ):
+                pass
+            else:
+                ver_add_gr.add((
+                        g.tbox.replace_term_domain(t[0], self.urn, ver_urn),
+                        t[1], t[2]))
+
+        self.rdfly.modify_dataset(
+                add_trp=ver_add_gr, types={nsc['fcrepo'].Version})
+
+        # Add version metadata.
+        meta_add_gr = Graph()
+        meta_add_gr.add((
+            self.urn, nsc['fcrepo'].hasVersion, ver_urn))
+        meta_add_gr.add(
+                (ver_urn, nsc['fcrepo'].created, g.timestamp_term))
+        meta_add_gr.add(
+                (ver_urn, nsc['fcrepo'].hasVersionLabel, Literal(ver_uid)))
+
+        self.rdfly.modify_dataset(
+                add_trp=meta_add_gr, types={nsc['fcrepo'].Metadata})
+
+        # Update resource.
+        rsrc_add_gr = Graph()
+        rsrc_add_gr.add((
+            self.urn, nsc['fcrepo'].hasVersions, nsc['fcres'][vers_uuid]))
+
+        self._modify_rsrc(self.RES_UPDATED, add_trp=rsrc_add_gr, notify=False)
 
 
-        self._modify_rsrc(self.RES_UPDATED, add_trp=add_gr)
+        return nsc['fcres'][ver_uuid]
 
 
 
 
     def _modify_rsrc(self, ev_type, remove_trp=Graph(), add_trp=Graph(),
     def _modify_rsrc(self, ev_type, remove_trp=Graph(), add_trp=Graph(),
@@ -943,7 +984,8 @@ class Ldpr(metaclass=ABCMeta):
 
 
         add_gr = Graph()
         add_gr = Graph()
         add_gr.add((parent_uri, nsc['ldp'].contains, self.urn))
         add_gr.add((parent_uri, nsc['ldp'].contains, self.urn))
-        parent_rsrc = Ldpc(parent_uri, repr_opts={
+        parent_rsrc = Ldpc.outbound_inst(
+                g.tbox.uri_to_uuid(parent_uri), repr_opts={
                 'incl_children' : False}, handling='none')
                 'incl_children' : False}, handling='none')
         parent_rsrc._modify_rsrc(self.RES_UPDATED, add_trp=add_gr)
         parent_rsrc._modify_rsrc(self.RES_UPDATED, add_trp=add_gr)
 
 

+ 2 - 0
lakesuperior/store_layouts/ldp_rs/base_rdf_layout.py

@@ -69,6 +69,7 @@ class BaseRdfLayout(metaclass=ABCMeta):
 
 
         self.store = self._conn.store
         self.store = self._conn.store
 
 
+        self.UNION_GRAPH_URI = self._conn.UNION_GRAPH_URI
         self.ds = self._conn.ds
         self.ds = self._conn.ds
         self.ds.namespace_manager = nsm
         self.ds.namespace_manager = nsm
 
 
@@ -149,6 +150,7 @@ class BaseRdfLayout(metaclass=ABCMeta):
         @param types (iterable(rdflib.term.URIRef)) RDF types of the resource
         @param types (iterable(rdflib.term.URIRef)) RDF types of the resource
         that may be relevant to the layout strategy. These can be anything
         that may be relevant to the layout strategy. These can be anything
         since they are just used to inform the layout and not actually stored.
         since they are just used to inform the layout and not actually stored.
+        If this is an empty set, the merge graph is used.
         '''
         '''
         pass
         pass
 
 

+ 23 - 11
lakesuperior/store_layouts/ldp_rs/default_layout.py

@@ -168,8 +168,8 @@ class DefaultLayout(BaseRdfLayout):
 
 
         if not len(rsp):
         if not len(rsp):
             raise ResourceNotExistsError(
             raise ResourceNotExistsError(
-                    urn,
-                    'No version found for this resource with the given label.')
+                urn,
+                'No version found for this resource with the given label.')
         else:
         else:
             return rsp.graph
             return rsp.graph
 
 
@@ -184,14 +184,26 @@ class DefaultLayout(BaseRdfLayout):
         self._logger.debug('Add triples: {}'.format(pformat(
         self._logger.debug('Add triples: {}'.format(pformat(
                 set(add_trp))))
                 set(add_trp))))
 
 
-        if nsc['fcrepo'].Metadata in types:
-            target_gr = self.ds.graph(self.META_GRAPH_URI)
+        if not types:
+            # @FIXME This is terrible, but I can't get Fuseki to update the
+            # default graph without using a vaiable.
+            #target_gr = self.ds.graph(self.UNION_GRAPH_URI)
+            target_gr = {
+                self.ds.graph(self.HIST_GRAPH_URI),
+                self.ds.graph(self.META_GRAPH_URI),
+                self.ds.graph(self.MAIN_GRAPH_URI),
+            }
+        elif nsc['fcrepo'].Metadata in types:
+            target_gr = {self.ds.graph(self.META_GRAPH_URI)}
         elif nsc['fcrepo'].Version in types:
         elif nsc['fcrepo'].Version in types:
-            target_gr = self.ds.graph(self.HIST_GRAPH_URI)
+            target_gr = {self.ds.graph(self.HIST_GRAPH_URI)}
         else:
         else:
-            target_gr = self.ds.graph(self.MAIN_GRAPH_URI)
-
-        for t in remove_trp:
-            target_gr.remove(t)
-        for t in add_trp:
-            target_gr.add(t)
+            target_gr = {self.ds.graph(self.MAIN_GRAPH_URI)}
+
+        for gr in target_gr:
+            gr -= remove_trp
+            gr += add_trp
+        #for t in remove_trp:
+        #    target_gr.remove(t)
+        #for t in add_trp:
+        #    target_gr.add(t)

+ 4 - 0
lakesuperior/store_layouts/ldp_rs/graph_store_connector.py

@@ -1,6 +1,7 @@
 import logging
 import logging
 
 
 from rdflib import Dataset
 from rdflib import Dataset
+from rdflib.term import URIRef
 from rdflib.plugins.stores.sparqlstore import SPARQLStore, SPARQLUpdateStore
 from rdflib.plugins.stores.sparqlstore import SPARQLStore, SPARQLUpdateStore
 from SPARQLWrapper.Wrapper import POST
 from SPARQLWrapper.Wrapper import POST
 
 
@@ -15,6 +16,9 @@ class GraphStoreConnector:
     be passed any configuration options.
     be passed any configuration options.
     '''
     '''
 
 
+    # N.B. This is Fuseki-specific.
+    UNION_GRAPH_URI = URIRef('urn:x-arq:UnionGraph')
+
     _logger = logging.getLogger(__name__)
     _logger = logging.getLogger(__name__)
 
 
     def __init__(self, query_ep, update_ep=None, autocommit=False):
     def __init__(self, query_ep, update_ep=None, autocommit=False):