Forráskód Böngészése

WIP—a lot of movements to support LDP-NR.

Stefano Cossu 6 éve
szülő
commit
d9035d6b6f

+ 8 - 0
doc/notes/fcrepo4_deltas.md

@@ -55,6 +55,14 @@ the default.
 If `Prefer` is set to `handling=lenient`, all server-managed triples sent with
 the payload are ignored.
 
+## LDP-NR metadata by content negotiation
+
+FCREPO4 relies on the `/fcr:metadata` identifier to retrieve RDF metadata about
+an LDP-NR. LAKEsuperior supports this as a legacy option, but encourages the
+use of content negotiation to do that. Any request to an LDP-NR with an
+`Accept` header set to one of the supported RDF serialization formats will
+yield the RDF metadata of the resource instead of the binary contents.
+
 ## Asynchronous processing
 
 *TODO*

+ 96 - 46
lakesuperior/endpoints/ldp.py

@@ -3,13 +3,13 @@ import logging
 from collections import defaultdict
 from uuid import uuid4
 
-from flask import Blueprint, request
+from flask import Blueprint, request, send_file
 from werkzeug.datastructures import FileStorage
 
 from lakesuperior.exceptions import InvalidResourceError, \
         ResourceExistsError, ResourceNotExistsError, \
         InvalidResourceError, ServerManagedTermError
-from lakesuperior.model.ldp_rs import Ldpc, LdpRs
+from lakesuperior.model.ldp_rs import Ldpr, Ldpc, LdpRs
 from lakesuperior.model.ldp_nr import LdpNr
 from lakesuperior.store_layouts.rdf.base_rdf_layout import BaseRdfLayout
 from lakesuperior.util.translator import Translator
@@ -27,7 +27,7 @@ ldp = Blueprint('ldp', __name__)
 accept_patch = (
     'application/sparql-update',
 )
-accept_post_rdf = (
+accept_rdf = (
     'application/ld+json',
     'application/n-triples',
     'application/rdf+xml',
@@ -54,7 +54,7 @@ accept_post_rdf = (
 
 std_headers = {
     'Accept-Patch' : ','.join(accept_patch),
-    'Accept-Post' : ','.join(accept_post_rdf),
+    'Accept-Post' : ','.join(accept_rdf),
     #'Allow' : ','.join(allow),
 }
 
@@ -67,24 +67,20 @@ def get_resource(uuid):
     '''
     Retrieve RDF or binary content.
     '''
-    out_headers = std_headers
-
-    pref_return = defaultdict(dict)
-    if 'prefer' in request.headers:
-        prefer = Translator.parse_rfc7240(request.headers['prefer'])
-        logger.debug('Parsed Prefer header: {}'.format(prefer))
-        if 'return' in prefer:
-            pref_return = prefer['return']
+    rsrc = Ldpr.readonly_inst(uuid)
 
-    # @TODO Add conditions for LDP-NR
-    rsrc = Ldpc(uuid)
-    try:
-        out = rsrc.get(pref_return=pref_return)
-    except ResourceNotExistsError:
-        return 'Resource #{} not found.'.format(rsrc.uuid), 404
+    if isinstance(rsrc, LdpRs) or request.headers['accept'] in accept_rdf:
+        return _get_rdf(rsrc)
     else:
-        out_headers = rsrc.head()
-        return (out.graph.serialize(format='turtle'), out_headers)
+        return _get_bitstream(rsrc)
+
+
+@ldp.route('/<path:uuid>/fcr:metadata', methods=['GET'])
+def get_metadata(uuid):
+    '''
+    Retrieve RDF metadata of a LDP-NR.
+    '''
+    return _get_rdf(LdpRs(uuid))
 
 
 @ldp.route('/<path:parent>', methods=['POST'])
@@ -109,10 +105,19 @@ def post_resource(parent):
     except InvalidResourceError as e:
         return str(e), 409
 
-    try:
-        rsrc.post(data)
-    except ServerManagedTermError as e:
-        return str(e), 412
+    if cls == LdpNr:
+        try:
+            cont_disp = Translator.parse_rfc7240(
+                    request.headers['content-disposition'])
+        except KeyError:
+            cont_disp = None
+
+        rsrc.post(data, mimetype=request.content_type, disposition=cont_disp)
+    else:
+        try:
+            rsrc.post(data)
+        except ServerManagedTermError as e:
+            return str(e), 412
 
     out_headers.update({
         'Location' : rsrc.uri,
@@ -133,29 +138,40 @@ def put_resource(uuid):
 
     rsrc = cls(uuid)
 
-    logger.debug('form: {}'.format(request.form))
     # Parse headers.
     pref_handling = None
-    if 'prefer' in request.headers:
-        prefer = Translator.parse_rfc7240(request.headers['prefer'])
-        logger.debug('Parsed Prefer header: {}'.format(prefer))
-        if 'handling' in prefer:
-            pref_handling = prefer['handling']['value']
-
-    try:
-        ret = rsrc.put(
-            request.get_data().decode('utf-8'),
-            handling=pref_handling
-        )
-    except InvalidResourceError as e:
-        return str(e), 409
-    except ResourceExistsError as e:
-        return str(e), 409
-    except ServerManagedTermError as e:
-        return str(e), 412
+    if cls == LdpNr:
+        try:
+            logger.debug('Headers: {}'.format(request.headers))
+            cont_disp = Translator.parse_rfc7240(
+                    request.headers['content-disposition'])
+        except KeyError:
+            cont_disp = None
+
+        try:
+            ret = rsrc.put(data, disposition=cont_disp)
+        except InvalidResourceError as e:
+            return str(e), 409
+        except ResourceExistsError as e:
+            return str(e), 409
     else:
-        res_code = 201 if ret == BaseRdfLayout.RES_CREATED else 204
-        return '', res_code, rsp_headers
+        if 'prefer' in request.headers:
+            prefer = Translator.parse_rfc7240(request.headers['prefer'])
+            logger.debug('Parsed Prefer header: {}'.format(prefer))
+            if 'handling' in prefer:
+                pref_handling = prefer['handling']['value']
+
+        try:
+            ret = rsrc.put(data, handling=pref_handling)
+        except InvalidResourceError as e:
+            return str(e), 409
+        except ResourceExistsError as e:
+            return str(e), 409
+        except ServerManagedTermError as e:
+            return str(e), 412
+
+    res_code = 201 if ret == BaseRdfLayout.RES_CREATED else 204
+    return '', res_code, rsp_headers
 
 
 @ldp.route('/<path:uuid>', methods=['PATCH'])
@@ -194,9 +210,9 @@ def delete_resource(uuid):
 
 def class_from_req_body():
     logger.debug('Content type: {}'.format(request.mimetype))
-    #logger.debug('files: {}'.format(request.files))
+    logger.debug('files: {}'.format(request.files))
     logger.debug('stream: {}'.format(request.stream))
-    if request.mimetype in accept_post_rdf:
+    if request.mimetype in accept_rdf:
         cls = Ldpc
         # Parse out the RDF string.
         data = request.data.decode('utf-8')
@@ -219,3 +235,37 @@ def class_from_req_body():
     return cls, data
 
 
+def _get_rdf(rsrc):
+    '''
+    Get the RDF representation of a resource.
+
+    @param rsrc An in-memory resource.
+    '''
+    out_headers = std_headers
+
+    pref_return = defaultdict(dict)
+    if 'prefer' in request.headers:
+        prefer = Translator.parse_rfc7240(request.headers['prefer'])
+        logger.debug('Parsed Prefer header: {}'.format(prefer))
+        if 'return' in prefer:
+            pref_return = prefer['return']
+
+    try:
+        imr = rsrc.get('rdf', pref_return=pref_return)
+        logger.debug('GET RDF: {}'.format(imr))
+    except ResourceNotExistsError as e:
+        return str(e), 404
+    else:
+        out_headers.update(rsrc.head())
+        return (imr.graph.serialize(format='turtle'), out_headers)
+
+
+def _get_bitstream(rsrc):
+    out_headers = std_headers
+
+    # @TODO This may change in favor of more low-level handling if the file
+    # system is not local.
+    return send_file(rsrc.local_path, as_attachment=True,
+            attachment_filename=rsrc.filename)
+
+

+ 77 - 12
lakesuperior/model/ldp_nr.py

@@ -16,34 +16,99 @@ class LdpNr(Ldpr):
 
     base_types = {
         nsc['fcrepo'].Binary,
+        nsc['fcrepo'].Resource,
+        nsc['ldp'].Resource,
         nsc['ldp'].NonRDFSource,
     }
 
 
+    @property
+    def nonrdfly(self):
+        '''
+        Load non-RDF (binary) store layout.
+        '''
+        if not hasattr(self, '_nonrdfly'):
+            self._nonrdfly = __class__.load_layout('non_rdf')
+
+        return self._nonrdfly
+
+
+    @property
+    def filename(self):
+        return self.imr.value(nsc['ebucore'].filename)
+
+
+    @property
+    def local_path(self):
+        cksum_term = self.imr.value(nsc['premis'].hasMessageDigest)
+        cksum = str(cksum_term.identifier.replace('urn:sha1:',''))
+        return self.nonrdfly.local_path(cksum)
+
+
     ## LDP METHODS ##
 
-    def get(self, *args, **kwargs):
-        raise NotImplementedError()
+    def get(self, **kwargs):
+        return LdpRs(self.uuid).get(**kwargs)
 
 
-    def post(self, stream):
+    @transactional
+    def post(self, stream, mimetype=None, disposition=None):
         '''
         Create a new binary resource with a corresponding RDF representation.
 
         @param file (Stream) A Stream resource representing the uploaded file.
         '''
-        #self._logger.debug('Data: {}'.format(data[:256]))
-        metadata_rsrc = Resource(Graph(), self.urn)
+        # Persist the stream.
+        uuid = self.nonrdfly.persist(stream)
+
+        # Gather RDF metadata.
+        self._add_metadata(stream, mimetype=mimetype, disposition=disposition)
 
-        for t in self.base_types:
-            metadata_rsrc.add(RDF.type, t)
+        # Try to persist metadata. If it fails, delete the file.
+        self._logger.debug('Persisting LDP-NR triples in {}'.format(
+            self.urn))
+        try:
+            rsrc = self.rdfly.create_rsrc(self.imr)
+        except:
+            self.nonrdfly.delete(uuid)
+        else:
+            return rsrc
 
-        cksum = self.nonrdfly.persist(stream)
-        cksum_term = URIRef('urn:sha1:{}'.format(cksum))
-        metadata_rsrc.add(nsc['premis'].hasMessageDigest, cksum_term)
 
+    def put(self, stream, **kwargs):
+        return self.post(stream, **kwargs)
 
-    def put(self, data):
-        raise NotImplementedError()
 
+    ## PROTECTED METHODS ##
 
+    def _add_metadata(self, stream, mimetype='application/octet-stream',
+            disposition=None):
+        '''
+        Add all metadata for the RDF representation of the LDP-NR.
+
+        @param stream (BufferedIO) The uploaded data stream.
+        @param mimetype (string) MIME type of the uploaded file.
+        @param disposition (defaultdict) The `Content-Disposition` header
+        content, parsed through `parse_rfc7240`.
+        '''
+        # File size.
+        self._logger.debug('Data stream size: {}'.format(stream.limit))
+        self.imr.add(nsc['premis'].hasSize, Literal(stream.limit,
+                datatype=XSD.long))
+
+        # Checksum.
+        cksum_term = URIRef('urn:sha1:{}'.format(self.uuid))
+        self.imr.add(nsc['premis'].hasMessageDigest, cksum_term)
+
+        # MIME type.
+        self.imr.add(nsc['ebucore']['hasMimeType'], Literal(
+                mimetype, datatype=XSD.string))
+
+        # File name.
+        self._logger.debug('Disposition: {}'.format(disposition))
+        try:
+            self.imr.add(nsc['ebucore']['filename'], Literal(
+                    disposition['attachment']['parameters']['filename'],
+                    datatype=XSD.string))
+        except KeyError:
+            pass

+ 24 - 16
lakesuperior/model/ldp_rs.py

@@ -14,7 +14,6 @@ from lakesuperior.dictionaries.srv_mgd_terms import  srv_mgd_subjects, \
 from lakesuperior.model.ldpr import Ldpr, transactional, must_exist
 from lakesuperior.exceptions import ResourceNotExistsError, \
         ServerManagedTermError, SingleSubjectError
-from lakesuperior.util.digest import Digest
 from lakesuperior.util.translator import Translator
 
 class LdpRs(Ldpr):
@@ -29,10 +28,14 @@ class LdpRs(Ldpr):
     RETURN_SRV_MGD_RES_URI = nsc['fcrepo'].ServerManaged
 
     base_types = {
-        nsc['ldp'].RDFSource
+        nsc['fcrepo'].Resource,
+        nsc['ldp'].Resource,
+        nsc['ldp'].RDFSource,
     }
 
 
+    ## LDP METHODS ##
+
     def get(self, pref_return):
         '''
         https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
@@ -60,12 +63,7 @@ class LdpRs(Ldpr):
             if str(self.RETURN_SRV_MGD_RES_URI) in omit:
                     kwargs['incl_srv_mgd'] = False
 
-        imr = self.rdfly.out_rsrc
-
-        if not imr or not len(imr.graph):
-            raise ResourceNotExistsError(self.uri)
-
-        return Translator.globalize_rsrc(imr)
+        return Translator.globalize_rsrc(self.imr)
 
 
     @transactional
@@ -89,13 +87,17 @@ class LdpRs(Ldpr):
 
     @transactional
     @must_exist
-    def patch(self, data):
+    def patch(self, update_str):
         '''
         https://www.w3.org/TR/ldp/#ldpr-HTTP_PATCH
+
+        Update an existing resource by applying a SPARQL-UPDATE query.
+
+        @param update_str (string) SPARQL-Update staements.
         '''
-        trp_remove, trp_add = self._sparql_delta(data)
+        delta = self._sparql_delta(update_str)
 
-        return self.rdfly.modify_rsrc(trp_remove, trp_add)
+        return self.rdfly.modify_dataset(*delta)
 
 
     ## PROTECTED METHODS ##
@@ -112,8 +114,8 @@ class LdpRs(Ldpr):
         how to handle provided server-managed triples. If `strict` is selected,
         any server-managed triple  included in the input RDF will trigger an
         exception. If `lenient`, server-managed triples are ignored.
-        @param create_only (boolean) Whether the operation is a create-only one (i.e.
-        POST) or a create-or-update one (i.e. PUT).
+        @param create_only (boolean) Whether the operation is a create-only
+        one (i.e. POST) or a create-or-update one (i.e. PUT).
         '''
         g = Graph().parse(data=data, format=format, publicID=self.urn)
 
@@ -134,6 +136,10 @@ class LdpRs(Ldpr):
     def _check_mgd_terms(self, g, handling='strict'):
         '''
         Check whether server-managed terms are in a RDF payload.
+
+        @param handling (string) One of `strict` (the default) or `lenient`.
+        `strict` raises an error if a server-managed term is in the graph.
+        `lenient` removes all sever-managed triples encountered.
         '''
         offending_subjects = set(g.subjects()) & srv_mgd_subjects
         if offending_subjects:
@@ -194,7 +200,8 @@ class LdpRs(Ldpr):
         '''
         Calculate the delta obtained by a SPARQL Update operation.
 
-        This does a couple of extra things:
+        This is a critical component of the SPARQL query prcess and does a
+        couple of things:
 
         1. It ensures that no resources outside of the subject of the request
         are modified (e.g. by variable subjects)
@@ -209,10 +216,11 @@ class LdpRs(Ldpr):
         cause any change in the updated resource, no error is raised.
 
         @return tuple Remove and add triples. These can be used with
-        `BaseStoreLayout.update_resource`.
+        `BaseStoreLayout.update_resource` and/or recorded as separate events in
+        a provenance tracking system.
         '''
 
-        pre_g = self.rdfly.extract_imr().graph
+        pre_g = self.imr.graph
 
         post_g = deepcopy(pre_g)
         post_g.update(q)

+ 78 - 33
lakesuperior/model/ldpr.py

@@ -28,11 +28,11 @@ def transactional(fn):
     def wrapper(self, *args, **kwargs):
         try:
             ret = fn(self, *args, **kwargs)
-            print('Committing transaction.')
+            self._logger.info('Committing transaction.')
             self.rdfly.store.commit()
             return ret
         except:
-            print('Rolling back transaction.')
+            self._logger.warn('Rolling back transaction.')
             self.rdfly.store.rollback()
             raise
 
@@ -118,16 +118,14 @@ class Ldpr(metaclass=ABCMeta):
         self._urn = nsc['fcres'][uuid] if self.uuid is not None \
                 else BaseRdfLayout.ROOT_NODE_URN
 
-        self.rdfly = __class__.load_layout('rdf', self._urn)
-        self.nonrdfly = __class__.load_layout('non_rdf')
-
 
 
     @property
     def urn(self):
         '''
         The internal URI (URN) for the resource as stored in the triplestore.
-        This is a URN that needs to be converted to a global URI for the REST
+
+        This is a URN that needs to be converted to a global URI for the LDP
         API.
 
         @return rdflib.URIRef
@@ -144,34 +142,74 @@ class Ldpr(metaclass=ABCMeta):
         return Translator.uuid_to_uri(self.uuid)
 
 
+    @property
+    def rdfly(self):
+        '''
+        Load RDF store layout.
+        '''
+        if not hasattr(self, '_rdfly'):
+            self._rdfly = __class__.load_layout('rdf')
+
+        return self._rdfly
+
+
     @property
     def rsrc(self):
         '''
-        The RDFLib resource representing this LDPR. This is a copy of the
-        stored data if present, and what gets passed to most methods of the
-        store layout methods.
+        The RDFLib resource representing this LDPR. This is a live
+        representation of the stored data if present.
 
         @return rdflib.resource.Resource
         '''
         if not hasattr(self, '_rsrc'):
-            self._rsrc = self.rdfly.rsrc
+            self._rsrc = self.rdfly.rsrc(self.urn)
 
         return self._rsrc
 
 
+    @property
+    def imr(self):
+        '''
+        Extract an in-memory resource for harmless manipulation and output.
+
+        If the resource is not stored (yet), initialize a new IMR with basic
+        triples.
+
+        @return rdflib.resource.Resource
+        '''
+        if not hasattr(self, '_imr'):
+            if not self.is_stored:
+                self._imr = Resource(Graph(), self.urn)
+                for t in self.base_types:
+                    self.imr.add(RDF.type, t)
+            else:
+                self._imr = self.rdfly.extract_imr(self.urn)
+
+        return self._imr
+
+
+    @imr.deleter
+    def imr(self):
+        '''
+        Delete in-memory buffered resource.
+        '''
+        delattr(self, '_imr')
+
+
     @property
     def is_stored(self):
-        return self.rdfly.ask_rsrc_exists()
+        return self.rdfly.ask_rsrc_exists(self.urn)
 
 
     @property
     def types(self):
         '''All RDF types.
 
-        @return generator
+        @return set(rdflib.term.URIRef)
         '''
         if not hasattr(self, '_types'):
             self._types = set(self.rsrc[RDF.type])
+
         return self._types
 
 
@@ -186,6 +224,7 @@ class Ldpr(metaclass=ABCMeta):
             for t in self.types:
                 if t.qname()[:4] == 'ldp:':
                     self._ldp_types.add(t)
+
         return self._ldp_types
 
 
@@ -245,18 +284,18 @@ class Ldpr(metaclass=ABCMeta):
         layout to be loaded.
         @param uuid (string) UUID of the base resource. For RDF layouts only.
         '''
-        layout_name = getattr(cls, '{}_store_layout'.format(type))
+        layout_cls = getattr(cls, '{}_store_layout'.format(type))
         store_mod = import_module('lakesuperior.store_layouts.{0}.{1}'.format(
-                type, layout_name))
-        layout_cls = getattr(store_mod, Translator.camelcase(layout_name))
+                type, layout_cls))
+        layout_cls = getattr(store_mod, Translator.camelcase(layout_cls))
 
-        return layout_cls(uuid) if type=='rdf' else layout_cls()
+        return layout_cls()
 
 
     @classmethod
     def readonly_inst(cls, uuid):
         '''
-        Fatory method that creates and returns an instance of an LDPR subclass
+        Factory method that creates and returns an instance of an LDPR subclass
         based on information that needs to be queried from the underlying
         graph store.
 
@@ -264,16 +303,23 @@ class Ldpr(metaclass=ABCMeta):
 
         @param uuid UUID of the instance.
         '''
-        rdfly = cls.load_rdf_layout(cls, uuid)
-        rdf_types = rdfly.rsrc[nsc['res'][uuid] : RDF.type]
+        rdfly = cls.load_layout('rdf')
+        imr_urn = nsc['fcres'][uuid] if uuid else rdfly.ROOT_NODE_URN
+        imr = rdfly.extract_imr(imr_urn, minimal=True)
+        rdf_types = imr.objects(RDF.type)
 
         for t in rdf_types:
-            if t == cls.LDP_NR_TYPE:
+            cls._logger.debug('Checking RDF type: {}'.format(t.identifier))
+            if t.identifier == cls.LDP_NR_TYPE:
+                from lakesuperior.model.ldp_nr import LdpNr
+                cls._logger.info('Resource is a LDP-NR.')
                 return LdpNr(uuid)
-            if t == cls.LDP_RS_TYPE:
+            if t.identifier == cls.LDP_RS_TYPE:
+                from lakesuperior.model.ldp_rs import LdpRs
+                cls._logger.info('Resource is a LDP-RS.')
                 return LdpRs(uuid)
-            else:
-                raise ResourceNotExistsError(uuid)
+
+        raise ResourceNotExistsError(uuid)
 
 
     @classmethod
@@ -289,15 +335,16 @@ class Ldpr(metaclass=ABCMeta):
         if not slug and not parent_uuid:
             return cls(str(uuid4()))
 
-        rdfly = cls.load_rdf_layout()
-        parent_imr = rdfly.extract_imr(nsc['fcres'][parent_uuid])
+        rdfly = cls.load_layout('rdf')
+
+        parent_imr_urn = nsc['fcres'][parent_uuid] if parent_uuid \
+                else rdfly.ROOT_NODE_URN
+        parent_imr = rdfly.extract_imr(parent_imr_urn, minimal=True)
+        if not len(parent_imr.graph):
+            raise ResourceNotExistsError(parent_uuid)
 
         # Set prefix.
         if parent_uuid:
-            parent_exists = rdfly.ask_rsrc_exists(parent_imr.identifier)
-            if not parent_exists:
-                raise ResourceNotExistsError(parent_uuid)
-
             parent_types = { t.identifier for t in \
                     parent_imr.objects(RDF.type) }
             cls._logger.debug('Parent types: {}'.format(
@@ -328,16 +375,14 @@ class Ldpr(metaclass=ABCMeta):
         '''
         Return values for the headers.
         '''
-        out_rsrc = self.rdfly.out_rsrc
-
         out_headers = defaultdict(list)
 
-        digest = out_rsrc.value(nsc['premis'].hasMessageDigest)
+        digest = self.imr.value(nsc['premis'].hasMessageDigest)
         if digest:
             etag = digest.identifier.split(':')[-1]
             out_headers['ETag'] = 'W/"{}"'.format(etag),
 
-        last_updated_term = out_rsrc.value(nsc['fcrepo'].lastModified)
+        last_updated_term = self.imr.value(nsc['fcrepo'].lastModified)
         if last_updated_term:
             out_headers['Last-Modified'] = arrow.get(last_updated_term)\
                 .format('ddd, D MMM YYYY HH:mm:ss Z')

+ 21 - 1
lakesuperior/store_layouts/non_rdf/base_non_rdf_layout.py

@@ -8,6 +8,10 @@ from lakesuperior.config_parser import config
 class BaseNonRdfLayout(metaclass=ABCMeta):
     '''
     Abstract class for setting the non-RDF (bitstream) store layout.
+
+    Differerent layouts can be created by implementing all the abstract methods
+    of this class. A non-RDF layout is not necessarily restricted to a
+    traditional filesystem—e.g. a layout persisting to HDFS can be written too.
     '''
 
     _conf = config['application']['store']['ldp_nr']
@@ -24,8 +28,24 @@ class BaseNonRdfLayout(metaclass=ABCMeta):
     ## INTERFACE METHODS ##
 
     @abstractmethod
-    def persist(self, file):
+    def persist(self, stream):
         '''
         Store the stream in the designated persistence layer for this layout.
         '''
         pass
+
+
+    @abstractmethod
+    def delete(self, id):
+        '''
+        Delete a stream by its identifier (i.e. checksum).
+        '''
+        pass
+
+
+    @abstractmethod
+    def local_path(self, uuid):
+        '''
+        Return the local path of a file.
+        '''
+        pass

+ 20 - 14
lakesuperior/store_layouts/non_rdf/default_layout.py

@@ -1,7 +1,6 @@
 import os
 
 from hashlib import sha1
-from shutil import copyfileobj
 from uuid import uuid4
 
 from lakesuperior.store_layouts.non_rdf.base_non_rdf_layout import \
@@ -30,8 +29,6 @@ class DefaultLayout(BaseNonRdfLayout):
         tmp_file = '{}/tmp/{}'.format(self.root, uuid4())
         try:
             with open(tmp_file, 'wb') as f:
-                #if hasattr(stream, 'seek'):
-                #    stream.seek(0)
                 self._logger.debug('Writing temp file to {}.'.format(tmp_file))
 
                 hash = sha1()
@@ -47,40 +44,49 @@ class DefaultLayout(BaseNonRdfLayout):
             raise
 
         # Move temp file to final destination.
-
-        digest = hash.hexdigest()
-        dst = self._path(digest)
+        uuid = hash.hexdigest()
+        dst = self.local_path(uuid)
         self._logger.debug('Saving file to disk: {}'.format(dst))
         if not os.access(os.path.dirname(dst), os.X_OK):
             os.makedirs(os.path.dirname(dst))
+
         # If the file exists already, don't bother rewriting it.
         if os.path.exists(dst):
-            self._logger.info('File exists on {}. Not overwriting.'.format(dst))
+            self._logger.info(
+                    'File exists on {}. Not overwriting.'.format(dst))
             os.unlink(tmp_file)
         else:
             os.rename(tmp_file, dst)
 
-        return digest
+        return uuid
+
+
+    def delete(self, uuid):
+        '''
+        See BaseNonRdfLayout.delete.
+        '''
+        os.unlink(self.local_path(uuid))
 
 
     ## PROTECTED METHODS ##
 
-    def _path(self, digest):
+    def local_path(self, uuid):
         '''
         Generate the resource path splitting the resource checksum according to
         configuration parameters.
 
-        @param digest (string) The resource digest.
+        @param uuid (string) The resource UUID. This corresponds to the content
+        checksum.
         '''
-        self._logger.debug('Generating path from digest: {}'.format(digest))
+        self._logger.debug('Generating path from uuid: {}'.format(uuid))
         bl = self._conf['pairtree_branch_length']
         bc = self._conf['pairtree_branches']
-        term = len(digest) if bc==0 else min(bc*bl, len(digest))
+        term = len(uuid) if bc==0 else min(bc*bl, len(uuid))
 
-        path = [ digest[i:i+bl] for i in range(0, term, bl) ]
+        path = [ uuid[i:i+bl] for i in range(0, term, bl) ]
 
         if bc > 0:
-            path.append(digest[term:])
+            path.append(uuid[term:])
         path.insert(0, self.root)
 
         return '/'.join(path)

+ 40 - 65
lakesuperior/store_layouts/rdf/base_rdf_layout.py

@@ -11,20 +11,21 @@ from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
 from lakesuperior.config_parser import config
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
+from lakesuperior.exceptions import ResourceNotExistsError
 
 
-def needs_rsrc(fn):
-    '''
-    Decorator for methods that cannot be called without `self.rsrc` set.
-    '''
-    def wrapper(self, *args, **kwargs):
-        if not hasattr(self, 'rsrc') or self.rsrc is None:
-            raise TypeError(
-                'This method must be called by an instance with `rsrc` set.')
-
-        return fn(self, *args, **kwargs)
-
-    return wrapper
+#def needs_rsrc(fn):
+#    '''
+#    Decorator for methods that cannot be called without `self.rsrc` set.
+#    '''
+#    def wrapper(self, *args, **kwargs):
+#        if not hasattr(self, 'rsrc') or self.rsrc is None:
+#            raise TypeError(
+#                'This method must be called by an instance with `rsrc` set.')
+#
+#        return fn(self, *args, **kwargs)
+#
+#    return wrapper
 
 
 
@@ -75,7 +76,7 @@ class BaseRdfLayout(metaclass=ABCMeta):
 
     ## MAGIC METHODS ##
 
-    def __init__(self, urn=None):
+    def __init__(self):
         '''Initialize the graph store and a layout.
 
         NOTE: `rdflib.Dataset` requires a RDF 1.1 compliant store with support
@@ -84,20 +85,9 @@ class BaseRdfLayout(metaclass=ABCMeta):
         this only in the (currently) unreleased 2.2 branch. It works with Jena,
         but other considerations would have to be made (e.g. Jena has no REST
         API for handling transactions).
-
-        In a more advanced development phase it could be possible to extend the
-        SPARQLUpdateStore class to add non-standard interaction with specific
-        SPARQL implementations in order to support ACID features provided
-        by them; e.g. Blazegraph's RESTful transaction handling methods.
-
-        The layout can be initialized with a URN to make resource-centric
-        operations simpler. However, for generic queries, urn can be None and
-        no `self.rsrc` is assigned. In this case, some methods (the ones
-        decorated by `@needs_rsrc`) will not be available.
         '''
         self.ds = Dataset(self.store, default_union=True)
         self.ds.namespace_manager = nsm
-        self._base_urn = urn
 
 
     @property
@@ -112,32 +102,16 @@ class BaseRdfLayout(metaclass=ABCMeta):
         return self._store
 
 
-    @property
-    def base_urn(self):
-        '''
-        The base URN for the current resource being handled.
-
-        This value is only here for convenience. It does not preclude one from
-        using an instance of this class with more than one subject.
-        '''
-        return self._base_urn
-
-
-    @property
-    def rsrc(self):
+    def rsrc(self, urn):
         '''
         Reference to a live data set that can be updated. This exposes the
         whole underlying triplestore structure and is used to update a
         resource.
         '''
-        if self.base_urn is None:
-            return None
-        return self.ds.resource(self.base_urn)
+        return self.ds.resource(urn)
 
 
-    @property
-    @needs_rsrc
-    def out_rsrc(self):
+    def out_rsrc(self, urn):
         '''
         Graph obtained by querying the triplestore and adding any abstraction
         and filtering to make up a graph that can be used for read-only,
@@ -146,7 +120,9 @@ class BaseRdfLayout(metaclass=ABCMeta):
 
         @return rdflib.resource.Resource
         '''
-        return self.extract_imr()
+        imr = self.extract_imr(urn)
+        if not len(imr.graph):
+            raise ResourceNotExistsError
 
 
 
@@ -182,19 +158,18 @@ class BaseRdfLayout(metaclass=ABCMeta):
         return self.ds.query(q, initBindings=initBindings, initNs=nsc)
 
 
-    @needs_rsrc
-    def create_or_replace_rsrc(self, g):
+    def create_or_replace_rsrc(self, imr):
         '''Create a resource graph in the main graph if it does not exist.
 
         If it exists, replace the existing one retaining the creation date.
         '''
-        if self.ask_rsrc_exists():
+        if self.ask_rsrc_exists(imr.identifier):
             self._logger.info(
                     'Resource {} exists. Removing all outbound triples.'
-                    .format(self.rsrc.identifier))
-            return self.replace_rsrc(g)
+                    .format(imr.identifier))
+            return self.replace_rsrc(imr)
         else:
-            return self.create_rsrc(g)
+            return self.create_rsrc(imr)
 
 
     ## INTERFACE METHODS ##
@@ -203,7 +178,8 @@ class BaseRdfLayout(metaclass=ABCMeta):
     # implement.
 
     @abstractmethod
-    def extract_imr(self, uri=None, graph=None, inbound=False):
+    def extract_imr(self, uri, graph=None, minimal=False,
+            incl_inbound=False, embed_children=False, incl_srv_mgd=True):
         '''
         Extract an in-memory resource based on the copy of a graph on a subject.
 
@@ -218,13 +194,12 @@ class BaseRdfLayout(metaclass=ABCMeta):
 
 
     @abstractmethod
-    def ask_rsrc_exists(self, uri=None):
+    def ask_rsrc_exists(self, urn):
         '''
-        Ask if a resource exists (is stored) in the graph store.
+        Ask if a resource is stored in the graph store.
 
-        @param uri (rdflib.term.URIRef) If this is provided, this method
-        will look for the specified resource. Otherwise, it will look for the
-        default resource. If this latter is not specified, the result is False.
+        @param uri (rdflib.term.URIRef) The internal URN of the resource to be
+        queried.
 
         @return boolean
         '''
@@ -232,8 +207,7 @@ class BaseRdfLayout(metaclass=ABCMeta):
 
 
     @abstractmethod
-    @needs_rsrc
-    def create_rsrc(self, urn, data, commit=True):
+    def create_rsrc(self, imr):
         '''Create a resource graph in the main graph.
 
         If the resource exists, raise an exception.
@@ -242,8 +216,7 @@ class BaseRdfLayout(metaclass=ABCMeta):
 
 
     @abstractmethod
-    @needs_rsrc
-    def replace_rsrc(self, g):
+    def replace_rsrc(self, imr):
         '''Replace a resource, i.e. delete all the triples and re-add the
         ones provided.
 
@@ -254,10 +227,13 @@ class BaseRdfLayout(metaclass=ABCMeta):
 
 
     @abstractmethod
-    @needs_rsrc
-    def modify_rsrc(self, remove, add):
+    def modify_dataset(self, remove_trp, add_trp):
         '''
-        Adds and/or removes triples from a graph.
+        Adds and/or removes triples from the graph.
+
+        NOTE: This is not specific to a resource. The LDP layer is responsible
+        for checking that all the +/- triples are referring to the intended
+        subject(s).
 
         @param remove (rdflib.Graph) Triples to be removed.
         @param add (rdflib.Graph) Triples to be added.
@@ -266,8 +242,7 @@ class BaseRdfLayout(metaclass=ABCMeta):
 
 
     @abstractmethod
-    @needs_rsrc
-    def delete_rsrc(self, urn, commit=True):
+    def delete_rsrc(self, urn, inbound=True):
         pass
 
 

+ 25 - 29
lakesuperior/store_layouts/rdf/simple_layout.py

@@ -12,8 +12,8 @@ from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
 from lakesuperior.dictionaries.srv_mgd_terms import  srv_mgd_subjects, \
         srv_mgd_predicates, srv_mgd_types
-from lakesuperior.store_layouts.rdf.base_rdf_layout import BaseRdfLayout, \
-        needs_rsrc
+from lakesuperior.exceptions import InvalidResourceError
+from lakesuperior.store_layouts.rdf.base_rdf_layout import BaseRdfLayout
 from lakesuperior.util.translator import Translator
 
 
@@ -29,14 +29,12 @@ class SimpleLayout(BaseRdfLayout):
     for (possible) improved speed and reduced storage.
     '''
 
-    def extract_imr(self, uri=None, graph=None, minimal=False,
+    def extract_imr(self, uri, graph=None, minimal=False,
             incl_inbound=False, embed_children=False, incl_srv_mgd=True):
         '''
         See base_rdf_layout.extract_imr.
         '''
-        uri = uri or self.base_urn
-
-        inbound_qry = '\n?s1 ?p1 {}'.format(self.base_urn.n3()) \
+        inbound_qry = '\n?s1 ?p1 {}'.format(uri.n3()) \
                 if incl_inbound else ''
         embed_children_qry = '''
         OPTIONAL {{
@@ -62,6 +60,8 @@ class SimpleLayout(BaseRdfLayout):
             g = Graph()
         else:
             g = qres.graph
+            # @FIXME This can be expensive with many children. Move this in
+            # query string.
             if not incl_srv_mgd:
                 self._logger.info('Removing server managed triples.')
                 for p in srv_mgd_predicates:
@@ -74,39 +74,35 @@ class SimpleLayout(BaseRdfLayout):
         return Resource(g, uri)
 
 
-    def ask_rsrc_exists(self, uri=None):
+    def ask_rsrc_exists(self, urn):
         '''
         See base_rdf_layout.ask_rsrc_exists.
         '''
-        if not uri:
-            if self.rsrc is not None:
-                uri = self.rsrc.identifier
-            else:
-                return False
-
-        self._logger.info('Checking if resource exists: {}'.format(uri))
-        return (uri, Variable('p'), Variable('o')) in self.ds
+        self._logger.info('Checking if resource exists: {}'.format(urn))
+        return (urn, Variable('p'), Variable('o')) in self.ds
 
 
-    @needs_rsrc
     def create_rsrc(self, imr):
         '''
         See base_rdf_layout.create_rsrc.
         '''
-        for s, p, o in imr.graph:
-            self.ds.add((s, p, o))
+        self.ds |= imr.graph
 
         return self.RES_CREATED
 
 
-    @needs_rsrc
     def replace_rsrc(self, imr):
         '''
         See base_rdf_layout.replace_rsrc.
         '''
+        # @TODO Move this to LDP.
+        rsrc = self.rsrc(imr.identifier)
         # Delete all triples but keep creation date and creator.
-        created = self.rsrc.value(nsc['fcrepo'].created)
-        created_by = self.rsrc.value(nsc['fcrepo'].createdBy)
+        created = rsrc.value(nsc['fcrepo'].created)
+        created_by = rsrc.value(nsc['fcrepo'].createdBy)
+
+        if not created or not created_by:
+            raise InvalidResourceError(urn)
 
         imr.set(nsc['fcrepo'].created, created)
         imr.set(nsc['fcrepo'].createdBy, created_by)
@@ -115,14 +111,11 @@ class SimpleLayout(BaseRdfLayout):
         self.delete_rsrc()
 
         self.ds |= imr.graph
-        #for s, p, o in imr.graph:
-        #    self.ds.add((s, p, o))
 
         return self.RES_UPDATED
 
 
-    @needs_rsrc
-    def modify_rsrc(self, remove_trp, add_trp):
+    def modify_dataset(self, remove_trp, add_trp):
         '''
         See base_rdf_layout.update_rsrc.
         '''
@@ -135,16 +128,19 @@ class SimpleLayout(BaseRdfLayout):
         #    self.rsrc.add(t[0], t[1])
 
 
-    def delete_rsrc(self, inbound=True):
+    def delete_rsrc(self, urn, inbound=True):
         '''
         Delete a resource. If `inbound` is specified, delete all inbound
         relationships as well (this is the default).
         '''
-        print('Removing resource {}.'.format(self.rsrc.identifier))
+        rsrc = self.rsrc(urn)
+
+        print('Removing resource {}.'.format(rsrc.identifier))
 
-        self.rsrc.remove(Variable('p'))
+        rsrc.remove(Variable('p'))
+        # @TODO Remove children recursively
         if inbound:
             self.ds.remove(
-                    (Variable('s'), Variable('p'), self.rsrc.identifier))
+                    (Variable('s'), Variable('p'), rsrc.identifier))
 
 

+ 1 - 0
requirements.txt

@@ -10,6 +10,7 @@ isodate==0.5.4
 itsdangerous==0.24
 Jinja2==2.9.6
 MarkupSafe==1.0
+numpy==1.13.3
 py==1.4.34
 pyparsing==2.2.0
 pytest==3.2.2