Bladeren bron

Complete LDP-NR integration and restructure methods and parameters furhter.

Stefano Cossu 7 jaren geleden
bovenliggende
commit
3c726da92d

+ 31 - 34
lakesuperior/endpoints/ldp.py

@@ -7,8 +7,7 @@ from flask import Blueprint, request, send_file
 from werkzeug.datastructures import FileStorage
 
 from lakesuperior.exceptions import InvalidResourceError, \
-        ResourceExistsError, ResourceNotExistsError, \
-        InvalidResourceError, ServerManagedTermError
+        ResourceExistsError, ResourceNotExistsError, ServerManagedTermError
 from lakesuperior.model.ldp_rs import Ldpr, Ldpc, LdpRs
 from lakesuperior.model.ldp_nr import LdpNr
 from lakesuperior.store_layouts.rdf.base_rdf_layout import BaseRdfLayout
@@ -63,16 +62,39 @@ std_headers = {
 @ldp.route('/<path:uuid>', methods=['GET'])
 @ldp.route('/', defaults={'uuid': None}, methods=['GET'],
         strict_slashes=False)
-def get_resource(uuid):
+def get_resource(uuid, force_rdf=False):
     '''
     Retrieve RDF or binary content.
+
+    @param uuid (string) UUID of resource to retrieve.
+    @param force_rdf (boolean) Whether to retrieve RDF even if the resource is
+    a LDP-NR. This is not available in the API but is used e.g. by the
+    `*/fcr:metadata` endpoint. The default is False.
     '''
-    rsrc = Ldpr.readonly_inst(uuid)
+    out_headers = std_headers
+    repr_options = defaultdict(dict)
+    if 'prefer' in request.headers:
+        prefer = Translator.parse_rfc7240(request.headers['prefer'])
+        logger.debug('Parsed Prefer header: {}'.format(prefer))
+        if 'return' in prefer:
+            repr_options = prefer['return']
 
-    if isinstance(rsrc, LdpRs) or request.headers['accept'] in accept_rdf:
-        return _get_rdf(rsrc)
+    try:
+        rsrc = Ldpr.readonly_inst(uuid, repr_options)
+    except ResourceNotExistsError as e:
+        return str(e), 404
     else:
-        return _get_bitstream(rsrc)
+        logger.debug('Resource URN in the route: {}'.format(rsrc._urn))
+        x = rsrc.imr
+        logger.debug('IMR options in the route: {}'.format(rsrc._imr_options))
+        out_headers.update(rsrc.head())
+        if isinstance(rsrc, LdpRs) \
+                or request.headers['accept'] in accept_rdf \
+                or force_rdf:
+            return (rsrc.imr.graph.serialize(format='turtle'), out_headers)
+        else:
+            return send_file(rsrc.local_path, as_attachment=True,
+                    attachment_filename=rsrc.filename)
 
 
 @ldp.route('/<path:uuid>/fcr:metadata', methods=['GET'])
@@ -80,7 +102,7 @@ def get_metadata(uuid):
     '''
     Retrieve RDF metadata of a LDP-NR.
     '''
-    return _get_rdf(LdpRs(uuid))
+    return get_resource(uuid, force_rdf=True)
 
 
 @ldp.route('/<path:parent>', methods=['POST'])
@@ -212,7 +234,7 @@ def class_from_req_body():
     logger.debug('Content type: {}'.format(request.mimetype))
     logger.debug('files: {}'.format(request.files))
     logger.debug('stream: {}'.format(request.stream))
-    if request.mimetype in accept_rdf:
+    if  not request.mimetype or request.mimetype in accept_rdf:
         cls = Ldpc
         # Parse out the RDF string.
         data = request.data.decode('utf-8')
@@ -235,31 +257,6 @@ def class_from_req_body():
     return cls, data
 
 
-def _get_rdf(rsrc):
-    '''
-    Get the RDF representation of a resource.
-
-    @param rsrc An in-memory resource.
-    '''
-    out_headers = std_headers
-
-    pref_return = defaultdict(dict)
-    if 'prefer' in request.headers:
-        prefer = Translator.parse_rfc7240(request.headers['prefer'])
-        logger.debug('Parsed Prefer header: {}'.format(prefer))
-        if 'return' in prefer:
-            pref_return = prefer['return']
-
-    try:
-        imr = rsrc.get('rdf', pref_return=pref_return)
-        logger.debug('GET RDF: {}'.format(imr))
-    except ResourceNotExistsError as e:
-        return str(e), 404
-    else:
-        out_headers.update(rsrc.head())
-        return (imr.graph.serialize(format='turtle'), out_headers)
-
-
 def _get_bitstream(rsrc):
     out_headers = std_headers
 

+ 2 - 1
lakesuperior/model/ldp_nr.py

@@ -6,6 +6,7 @@ from rdflib.term import URIRef, Literal, Variable
 from lakesuperior.config_parser import config
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.model.ldpr import Ldpr, transactional, must_exist
+from lakesuperior.model.ldp_rs import LdpRs
 from lakesuperior.util.digest import Digest
 
 class LdpNr(Ldpr):
@@ -93,7 +94,7 @@ class LdpNr(Ldpr):
         '''
         # File size.
         self._logger.debug('Data stream size: {}'.format(stream.limit))
-        self.imr.add(nsc['premis'].hasSize, Literal(stream.limit,
+        self.stored_or_new_imr.add(nsc['premis'].hasSize, Literal(stream.limit,
                 datatype=XSD.long))
 
         # Checksum.

+ 2 - 27
lakesuperior/model/ldp_rs.py

@@ -14,6 +14,7 @@ from lakesuperior.dictionaries.srv_mgd_terms import  srv_mgd_subjects, \
 from lakesuperior.model.ldpr import Ldpr, transactional, must_exist
 from lakesuperior.exceptions import ResourceNotExistsError, \
         ServerManagedTermError, SingleSubjectError
+from lakesuperior.util.digest import Digest
 from lakesuperior.util.translator import Translator
 
 class LdpRs(Ldpr):
@@ -23,9 +24,6 @@ class LdpRs(Ldpr):
     '''
 
     DEFAULT_USER = Literal('BypassAdmin')
-    RETURN_CHILD_RES_URI = nsc['fcrepo'].EmbedResources
-    RETURN_INBOUND_REF_URI = nsc['fcrepo'].InboundReferences
-    RETURN_SRV_MGD_RES_URI = nsc['fcrepo'].ServerManaged
 
     base_types = {
         nsc['fcrepo'].Resource,
@@ -36,33 +34,10 @@ class LdpRs(Ldpr):
 
     ## LDP METHODS ##
 
-    def get(self, pref_return):
+    def get(self, repr_opts):
         '''
         https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
         '''
-        kwargs = {}
-
-        minimal = embed_children = incl_inbound = False
-        kwargs['incl_srv_mgd'] = True
-
-        if 'value' in pref_return and pref_return['value'] == 'minimal':
-            kwargs['minimal'] = True
-        else:
-            include = pref_return['parameters']['include'].split(' ') \
-                    if 'include' in pref_return['parameters'] else []
-            omit = pref_return['parameters']['omit'].split(' ') \
-                    if 'omit' in pref_return['parameters'] else []
-
-            self._logger.debug('Include: {}'.format(include))
-            self._logger.debug('Omit: {}'.format(omit))
-
-            if str(self.RETURN_INBOUND_REF_URI) in include:
-                    kwargs['incl_inbound'] = True
-            if str(self.RETURN_CHILD_RES_URI) in omit:
-                    kwargs['embed_chldren'] = False
-            if str(self.RETURN_SRV_MGD_RES_URI) in omit:
-                    kwargs['incl_srv_mgd'] = False
-
         return Translator.globalize_rsrc(self.imr)
 
 

+ 69 - 13
lakesuperior/model/ldpr.py

@@ -95,6 +95,9 @@ class Ldpr(metaclass=ABCMeta):
     FCREPO_PTREE_TYPE = nsc['fcrepo'].Pairtree
     LDP_NR_TYPE = nsc['ldp'].NonRDFSource
     LDP_RS_TYPE = nsc['ldp'].RDFSource
+    RETURN_CHILD_RES_URI = nsc['fcrepo'].EmbedResources
+    RETURN_INBOUND_REF_URI = nsc['fcrepo'].InboundReferences
+    RETURN_SRV_MGD_RES_URI = nsc['fcrepo'].ServerManaged
 
     _logger = logging.getLogger(__name__)
 
@@ -103,7 +106,7 @@ class Ldpr(metaclass=ABCMeta):
 
     ## MAGIC METHODS ##
 
-    def __init__(self, uuid):
+    def __init__(self, uuid, retr_opts={}):
         '''Instantiate an in-memory LDP resource that can be loaded from and
         persisted to storage.
 
@@ -118,6 +121,7 @@ class Ldpr(metaclass=ABCMeta):
         self._urn = nsc['fcres'][uuid] if self.uuid is not None \
                 else BaseRdfLayout.ROOT_NODE_URN
 
+        self._set_imr_options(retr_opts)
 
 
     @property
@@ -169,6 +173,24 @@ class Ldpr(metaclass=ABCMeta):
 
     @property
     def imr(self):
+        '''
+        Extract an in-memory resource from the graph store.
+
+        If the resource is not stored (yet), a `ResourceNotExistsError` is
+        raised.
+
+        @return rdflib.resource.Resource
+        '''
+        if not hasattr(self, '_imr'):
+            self._logger.debug('IMR options: {}'.format(self._imr_options))
+            options = dict(self._imr_options, strict=True)
+            self._imr = self.rdfly.extract_imr(self.urn, **options)
+
+        return self._imr
+
+
+    @property
+    def stored_or_new_imr(self):
         '''
         Extract an in-memory resource for harmless manipulation and output.
 
@@ -178,12 +200,13 @@ class Ldpr(metaclass=ABCMeta):
         @return rdflib.resource.Resource
         '''
         if not hasattr(self, '_imr'):
-            if not self.is_stored:
+            options = dict(self._imr_options, strict=True)
+            try:
+                self._imr = self.rdfly.extract_imr(self.urn, **options)
+            except ResourceNotExistsError:
                 self._imr = Resource(Graph(), self.urn)
                 for t in self.base_types:
                     self.imr.add(RDF.type, t)
-            else:
-                self._imr = self.rdfly.extract_imr(self.urn)
 
         return self._imr
 
@@ -293,7 +316,7 @@ class Ldpr(metaclass=ABCMeta):
 
 
     @classmethod
-    def readonly_inst(cls, uuid):
+    def readonly_inst(cls, uuid, repr_opts=None):
         '''
         Factory method that creates and returns an instance of an LDPR subclass
         based on information that needs to be queried from the underlying
@@ -305,7 +328,7 @@ class Ldpr(metaclass=ABCMeta):
         '''
         rdfly = cls.load_layout('rdf')
         imr_urn = nsc['fcres'][uuid] if uuid else rdfly.ROOT_NODE_URN
-        imr = rdfly.extract_imr(imr_urn, minimal=True)
+        imr = rdfly.extract_imr(imr_urn, **repr_opts)
         rdf_types = imr.objects(RDF.type)
 
         for t in rdf_types:
@@ -377,6 +400,7 @@ class Ldpr(metaclass=ABCMeta):
         '''
         out_headers = defaultdict(list)
 
+        self._logger.debug('IMR options in head(): {}'.format(self._imr_options))
         digest = self.imr.value(nsc['premis'].hasMessageDigest)
         if digest:
             etag = digest.identifier.split(':')[-1]
@@ -498,17 +522,49 @@ class Ldpr(metaclass=ABCMeta):
         between a and a/b and between a/b and a/b/c in order to maintain the
         `containment chain.
         '''
-        g = Graph()
-        g.add((uri, RDF.type, nsc['ldp'].Container))
-        g.add((uri, RDF.type, nsc['ldp'].BasicContainer))
-        g.add((uri, RDF.type, nsc['ldp'].RDFSource))
-        g.add((uri, nsc['fcrepo'].contains, child_uri))
+        imr = Resource(Graph(), uri)
+        imr.add(RDF.type, nsc['ldp'].Container)
+        imr.add(RDF.type, nsc['ldp'].BasicContainer)
+        imr.add(RDF.type, nsc['ldp'].RDFSource)
+        imr.add(nsc['fcrepo'].contains, child_uri)
 
         # If the path segment is just below root
         if '/' not in str(uri):
-            g.add((nsc['fcsystem'].root, nsc['fcrepo'].contains, uri))
+            imr.graph.add((nsc['fcsystem'].root, nsc['fcrepo'].contains, uri))
+
+        self.rdfly.create_rsrc(imr)
+
+
+    def _set_imr_options(self, repr_opts):
+        '''
+        Set options to retrieve IMR.
+
+        Ideally, IMR retrieval is done once per request, so all the options
+        are set once in the `imr()` property.
+
+        @param repr_opts (dict): Options parsed from `Prefer` header.
+        '''
+        self._imr_options = {}
+
+        minimal = embed_children = incl_inbound = False
+        self._imr_options['incl_srv_mgd'] = True
+
+        if 'value' in repr_opts and repr_opts['value'] == 'minimal':
+            self._imr_options['minimal'] = True
+        elif 'parameters' in repr_opts:
+            include = repr_opts['parameters']['include'].split(' ') \
+                    if 'include' in repr_opts['parameters'] else []
+            omit = repr_opts['parameters']['omit'].split(' ') \
+                    if 'omit' in repr_opts['parameters'] else []
 
-        self.rdfly.create_rsrc(g)
+            self._logger.debug('Include: {}'.format(include))
+            self._logger.debug('Omit: {}'.format(omit))
 
+            if str(self.RETURN_INBOUND_REF_URI) in include:
+                    self._imr_options['incl_inbound'] = True
+            if str(self.RETURN_CHILD_RES_URI) in omit:
+                    self._imr_options['embed_chldren'] = False
+            if str(self.RETURN_SRV_MGD_RES_URI) in omit:
+                    self._imr_options['incl_srv_mgd'] = False
 
 

+ 4 - 5
lakesuperior/store_layouts/rdf/base_rdf_layout.py

@@ -178,15 +178,14 @@ class BaseRdfLayout(metaclass=ABCMeta):
     # implement.
 
     @abstractmethod
-    def extract_imr(self, uri, graph=None, minimal=False,
-            incl_inbound=False, embed_children=False, incl_srv_mgd=True):
+    def extract_imr(self, uri, strict=False, minimal=False, incl_inbound=False,
+                embed_children=False, incl_srv_mgd=True):
         '''
         Extract an in-memory resource based on the copy of a graph on a subject.
 
         @param uri (URIRef) Resource URI.
-        @param graph (rdflib.term.URIRef | set(rdflib.graphURIRef)) The graph
-        to extract from. This can be an URI for a single graph, or a list of
-        graph URIs in which case an aggregate graph will be used.
+        @param strict (boolean) If set to True, an empty result graph will
+        raise a `ResourceNotExistsError`.
         @param inbound (boolean) Whether to pull triples that have the resource
         URI as their object.
         '''

+ 18 - 6
lakesuperior/store_layouts/rdf/simple_layout.py

@@ -12,7 +12,8 @@ from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
 from lakesuperior.dictionaries.srv_mgd_terms import  srv_mgd_subjects, \
         srv_mgd_predicates, srv_mgd_types
-from lakesuperior.exceptions import InvalidResourceError
+from lakesuperior.exceptions import InvalidResourceError, \
+        ResourceNotExistsError
 from lakesuperior.store_layouts.rdf.base_rdf_layout import BaseRdfLayout
 from lakesuperior.util.translator import Translator
 
@@ -29,8 +30,8 @@ class SimpleLayout(BaseRdfLayout):
     for (possible) improved speed and reduced storage.
     '''
 
-    def extract_imr(self, uri, graph=None, minimal=False,
-            incl_inbound=False, embed_children=False, incl_srv_mgd=True):
+    def extract_imr(self, uri, strict=False, minimal=False, incl_inbound=False,
+                embed_children=False, incl_srv_mgd=True):
         '''
         See base_rdf_layout.extract_imr.
         '''
@@ -71,6 +72,11 @@ class SimpleLayout(BaseRdfLayout):
                     self._logger.debug('Removing type: {}'.format(t))
                     rsrc.remove(RDF.type, t)
 
+        #self._logger.debug('Found resource: {}'.format(
+        #        g.serialize(format='turtle').decode('utf-8')))
+        if strict and not len(g):
+            raise ResourceNotExistsError(uri)
+
         return Resource(g, uri)
 
 
@@ -86,7 +92,11 @@ class SimpleLayout(BaseRdfLayout):
         '''
         See base_rdf_layout.create_rsrc.
         '''
-        self.ds |= imr.graph
+        self._logger.debug('Creating resource:\n{}'.format(
+            imr.graph.serialize(format='turtle').decode('utf8')))
+        #self.ds |= imr.graph # This does not seem to work with datasets.
+        for t in imr.graph:
+            self.ds.add(t)
 
         return self.RES_CREATED
 
@@ -108,9 +118,11 @@ class SimpleLayout(BaseRdfLayout):
         imr.set(nsc['fcrepo'].createdBy, created_by)
 
         # Delete the stored triples.
-        self.delete_rsrc()
+        self.delete_rsrc(imr.identifier)
 
-        self.ds |= imr.graph
+        #self.ds |= imr.graph # This does not seem to work with datasets.
+        for t in imr.graph:
+            self.ds.add(t)
 
         return self.RES_UPDATED