Browse Source

Support Prefer headers for GET and PUT.

Stefano Cossu 7 năm trước cách đây
mục cha
commit
025bc95528

+ 40 - 10
lakesuperior/endpoints/ldp.py

@@ -1,12 +1,16 @@
 import logging
 
+from collections import defaultdict
+
 from flask import Blueprint, request
 
 from lakesuperior.exceptions import InvalidResourceError, \
-        ResourceNotExistsError, ServerManagedTermError
-
+        ResourceExistsError, ResourceNotExistsError, \
+        InvalidResourceError, ServerManagedTermError
 from lakesuperior.model.ldp_rs import Ldpc, LdpRs
 from lakesuperior.model.ldp_nr import LdpNr
+from lakesuperior.store_layouts.rdf.base_rdf_layout import BaseRdfLayout
+from lakesuperior.util.translator import Translator
 
 
 logger = logging.getLogger(__name__)
@@ -52,7 +56,6 @@ std_headers = {
     #'Allow' : ','.join(allow),
 }
 
-
 ## REST SERVICES ##
 
 @ldp.route('/<path:uuid>', methods=['GET'])
@@ -62,16 +65,24 @@ def get_resource(uuid):
     '''
     Retrieve RDF or binary content.
     '''
-    headers = std_headers
+    out_headers = std_headers
+
+    pref_return = defaultdict(dict)
+    if 'prefer' in request.headers:
+        prefer = Translator.parse_rfc7240(request.headers['prefer'])
+        logger.debug('Parsed Prefer header: {}'.format(prefer))
+        if 'return' in prefer:
+            pref_return = prefer['return']
+
     # @TODO Add conditions for LDP-NR
     rsrc = Ldpc(uuid)
     try:
-        out = rsrc.get()
+        out = rsrc.get(pref_return=pref_return)
     except ResourceNotExistsError:
         return 'Resource #{} not found.'.format(rsrc.uuid), 404
     else:
-        headers = rsrc.head()
-        return (out.graph.serialize(format='turtle'), headers)
+        out_headers = rsrc.head()
+        return (out.graph.serialize(format='turtle'), out_headers)
 
 
 @ldp.route('/<path:parent>', methods=['POST'])
@@ -111,14 +122,32 @@ def put_resource(uuid):
     '''
     Add a new resource at a specified URI.
     '''
-    headers = std_headers
+    logger.info('Request headers: {}'.format(request.headers))
+    rsp_headers = std_headers
     rsrc = Ldpc(uuid)
 
+    # Parse headers.
+    pref_handling = None
+    if 'prefer' in request.headers:
+        prefer = Translator.parse_rfc7240(request.headers['prefer'])
+        logger.debug('Parsed Prefer header: {}'.format(prefer))
+        if 'handling' in prefer:
+            pref_handling = prefer['handling']['value']
+
     try:
-        rsrc.put(request.get_data().decode('utf-8'))
+        ret = rsrc.put(
+            request.get_data().decode('utf-8'),
+            handling=pref_handling
+        )
+    except InvalidResourceError as e:
+        return str(e), 409
+    except ResourceExistsError as e:
+        return str(e), 409
     except ServerManagedTermError as e:
         return str(e), 412
-    return '', 204, headers
+    else:
+        res_code = 201 if ret == BaseRdfLayout.RES_CREATED else 204
+        return '', res_code, rsp_headers
 
 
 @ldp.route('/<path:uuid>', methods=['PATCH'])
@@ -154,3 +183,4 @@ def delete_resource(uuid):
 
     return '', 204, headers
 
+

+ 59 - 13
lakesuperior/model/ldp_rs.py

@@ -18,6 +18,11 @@ class LdpRs(Ldpr):
 
     Definition: https://www.w3.org/TR/ldp/#ldprs
     '''
+
+    RETURN_CHILD_RES_URI = nsc['fcrepo'].EmbedResources
+    RETURN_INBOUND_REF_URI = nsc['fcrepo'].InboundReferences
+    RETURN_SRV_MGD_RES_URI = nsc['fcrepo'].ServerManaged
+
     base_types = {
         nsc['ldp'].RDFSource
     }
@@ -51,11 +56,35 @@ class LdpRs(Ldpr):
         return headers
 
 
-    def get(self, inbound=False, children=True, srv_mgd=True):
+    def get(self, pref_return):
         '''
         https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
         '''
-        im_rsrc = self.rdfly.out_rsrc(inbound)
+        kwargs = {}
+
+        minimal = embed_children = incl_inbound = False
+        kwargs['incl_srv_mgd'] = True
+
+        if 'value' in pref_return and pref_return['value'] == 'minimal':
+            kwargs['minimal'] = True
+        else:
+            include = pref_return['parameters']['include'].split(' ') \
+                    if 'include' in pref_return['parameters'] else []
+            omit = pref_return['parameters']['omit'].split(' ') \
+                    if 'omit' in pref_return['parameters'] else []
+
+            self._logger.debug('Include: {}'.format(include))
+            self._logger.debug('Omit: {}'.format(omit))
+
+            if str(self.RETURN_INBOUND_REF_URI) in include:
+                    kwargs['incl_inbound'] = True
+            if str(self.RETURN_CHILD_RES_URI) in omit:
+                    kwargs['embed_chldren'] = False
+            if str(self.RETURN_SRV_MGD_RES_URI) in omit:
+                    kwargs['incl_srv_mgd'] = False
+
+        im_rsrc = self.rdfly.out_rsrc(**kwargs)
+
         if not len(im_rsrc.graph):
             raise ResourceNotExistsError(im_rsrc.uuid)
 
@@ -63,7 +92,7 @@ class LdpRs(Ldpr):
 
 
     @transactional
-    def post(self, data, format='text/turtle'):
+    def post(self, data, format='text/turtle', handling=None):
         '''
         https://www.w3.org/TR/ldp/#ldpr-HTTP_POST
 
@@ -71,7 +100,7 @@ class LdpRs(Ldpr):
         '''
         g = Graph().parse(data=data, format=format, publicID=self.urn)
 
-        self._check_mgd_terms(g)
+        g = self._check_mgd_terms(g, handling)
         self._ensure_single_subject_rdf(g)
 
         for t in self.base_types:
@@ -83,22 +112,24 @@ class LdpRs(Ldpr):
 
 
     @transactional
-    def put(self, data, format='text/turtle'):
+    def put(self, data, format='text/turtle', handling=None):
         '''
         https://www.w3.org/TR/ldp/#ldpr-HTTP_PUT
         '''
         g = Graph().parse(data=data, format=format, publicID=self.urn)
 
-        self._check_mgd_terms(g)
+        g = self._check_mgd_terms(g, handling)
         self._ensure_single_subject_rdf(g)
 
         for t in self.base_types:
             g.add((self.urn, RDF.type, t))
 
-        self.rdfly.create_or_replace_rsrc(g)
+        res = self.rdfly.create_or_replace_rsrc(g)
 
         self._set_containment_rel()
 
+        return res
+
 
     @transactional
     @must_exist
@@ -113,24 +144,38 @@ class LdpRs(Ldpr):
 
     ## PROTECTED METHODS ##
 
-    def _check_mgd_terms(self, g):
+    def _check_mgd_terms(self, g, handling='strict'):
         '''
         Check whether server-managed terms are in a RDF payload.
         '''
         offending_subjects = set(g.subjects()) & srv_mgd_subjects
         if offending_subjects:
-            raise ServerManagedTermError(offending_subjects, 's')
+            if handling=='strict':
+                raise ServerManagedTermError(offending_subjects, 's')
+            else:
+                for s in offending_subjects:
+                    g.remove((s, Variable('p'), Variable('o')))
 
         offending_predicates = set(g.predicates()) & srv_mgd_predicates
         if offending_predicates:
-            raise ServerManagedTermError(offending_predicates, 'p')
+            if handling=='strict':
+                raise ServerManagedTermError(offending_predicates, 'p')
+            else:
+                for p in offending_predicates:
+                    g.remove((Variable('s'), p, Variable('o')))
 
         offending_types = set(g.objects(predicate=RDF.type)) & srv_mgd_types
         if offending_types:
-            raise ServerManagedTermError(offending_types, 't')
+            if handling=='strict':
+                raise ServerManagedTermError(offending_types, 't')
+            else:
+                for t in offending_types:
+                    g.remove((Variable('s'), RDF.type, t))
+
+        return g
 
 
-    def _sparql_delta(self, q):
+    def _sparql_delta(self, q, handling=None):
         '''
         Calculate the delta obtained by a SPARQL Update operation.
 
@@ -165,7 +210,8 @@ class LdpRs(Ldpr):
         self._logger.info('Adding: {}'.format(
             add.serialize(format='turtle').decode('utf8')))
 
-        self._check_mgd_terms(remove + add)
+        remove = self._check_mgd_terms(remove, handling)
+        add = self._check_mgd_terms(add, handling)
 
         return remove, add
 

+ 2 - 2
lakesuperior/model/ldpr.py

@@ -275,8 +275,8 @@ class Ldpr(metaclass=ABCMeta):
                 return LdpNr(uuid)
             if t == cls.LDP_RS_TYPE:
                 return LdpRs(uuid)
-
-        raise ResourceNotExistsError(uuid)
+            else:
+                raise ResourceNotExistsError(uuid)
 
 
     @classmethod

+ 3 - 0
lakesuperior/store_layouts/rdf/base_rdf_layout.py

@@ -63,6 +63,9 @@ class BaseRdfLayout(metaclass=ABCMeta):
     # N.B. This is Fuseki-specific.
     UNION_GRAPH_URI = URIRef('urn:x-arq:UnionGraph')
 
+    RES_CREATED = '_created_'
+    RES_UPDATED = '_updated_'
+
     _conf = config['application']['store']['ldp_rs']
     _logger = logging.getLogger(__name__)
 

+ 39 - 14
lakesuperior/store_layouts/rdf/simple_layout.py

@@ -3,13 +3,15 @@ from copy import deepcopy
 import arrow
 
 from rdflib import Graph
-from rdflib.namespace import XSD
+from rdflib.namespace import RDF, XSD
 from rdflib.query import ResultException
 from rdflib.resource import Resource
 from rdflib.term import Literal, URIRef, Variable
 
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
+from lakesuperior.dictionaries.srv_mgd_terms import  srv_mgd_subjects, \
+        srv_mgd_predicates, srv_mgd_types
 from lakesuperior.store_layouts.rdf.base_rdf_layout import BaseRdfLayout, \
         needs_rsrc
 from lakesuperior.util.digest import Digest
@@ -44,7 +46,7 @@ class SimpleLayout(BaseRdfLayout):
             etag = digest.identifier.split(':')[-1]
             headers['ETag'] = 'W/"{}"'.format(etag),
 
-        last_updated_term = self.rsrc.value(nsc['fcrepo'].lastUpdated)
+        last_updated_term = self.rsrc.value(nsc['fcrepo'].lastModified)
         if last_updated_term:
             headers['Last-Modified'] = arrow.get(last_updated_term)\
                 .format('ddd, D MMM YYYY HH:mm:ss Z')
@@ -52,23 +54,31 @@ class SimpleLayout(BaseRdfLayout):
         return headers
 
 
-    def extract_imr(self, uri=None, graph=None, inbound=False):
+    def extract_imr(self, uri=None, graph=None, minimal=False,
+            incl_inbound=False, incl_children=True, incl_srv_mgd=True):
         '''
         See base_rdf_layout.extract_imr.
         '''
         uri = uri or self.base_urn
 
         inbound_qry = '\n?s1 ?p1 {}'.format(self.base_urn.n3()) \
-                if inbound else ''
+                if incl_inbound else ''
+        embed_children_qry = '''
+        OPTIONAL {{
+          {0} ldp:contains ?c .
+          ?c ?cp ?co .
+        }}
+        '''.format(uri.n3()) if incl_children else ''
 
         q = '''
         CONSTRUCT {{
             {0} ?p ?o .{1}
+            ?c ?cp ?co .
         }} WHERE {{
-            {0} ?p ?o .{1}
-            #FILTER (?p != premis:hasMessageDigest) .
+            {0} ?p ?o .{1}{2}
+            FILTER (?p != premis:hasMessageDigest) .
         }}
-        '''.format(uri.n3(), inbound_qry)
+        '''.format(uri.n3(), inbound_qry, embed_children_qry)
 
         try:
             qres = self.query(q)
@@ -77,16 +87,25 @@ class SimpleLayout(BaseRdfLayout):
             g = Graph()
         else:
             g = qres.graph
+            rsrc = Resource(g, uri)
+            if not incl_srv_mgd:
+                self._logger.info('Removing server managed triples.')
+                for p in srv_mgd_predicates:
+                    self._logger.debug('Removing predicate: {}'.format(p))
+                    rsrc.remove(p)
+                for t in srv_mgd_types:
+                    self._logger.debug('Removing type: {}'.format(t))
+                    rsrc.remove(RDF.type, t)
 
-        return Resource(g, uri)
+            return rsrc
 
 
     @needs_rsrc
-    def out_rsrc(self, srv_mgd=True, inbound=False, embed_children=False):
+    def out_rsrc(self, **kwargs):
         '''
         See base_rdf_layout.out_rsrc.
         '''
-        im_rsrc = self.extract_imr(inbound=inbound)
+        im_rsrc = self.extract_imr(**kwargs)
 
         im_rsrc.remove(nsc['premis'].hasMessageDigest)
 
@@ -125,19 +144,25 @@ class SimpleLayout(BaseRdfLayout):
             created_by = self.rsrc.value(nsc['fcrepo'].createdBy)
 
             self.delete_rsrc()
+            res = self.RES_UPDATED
         else:
             created = ts
             created_by = Literal('BypassAdmin')
+            res = self.RES_CREATED
+
+        self._logger.info('Created timestamp: {}'.format(ts))
 
         self.rsrc.set(nsc['fcrepo'].created, created)
         self.rsrc.set(nsc['fcrepo'].createdBy, created_by)
 
-        self.rsrc.set(nsc['fcrepo'].lastUpdated, ts)
-        self.rsrc.set(nsc['fcrepo'].lastUpdatedBy, Literal('BypassAdmin'))
+        self.rsrc.set(nsc['fcrepo'].lastModified, ts)
+        self.rsrc.set(nsc['fcrepo'].lastModifiedBy, Literal('BypassAdmin'))
 
         for s, p, o in g:
             self.ds.add((s, p, o))
 
+        return res
+
 
     @needs_rsrc
     def create_rsrc(self, g):
@@ -171,8 +196,8 @@ class SimpleLayout(BaseRdfLayout):
         q = Translator.localize_string(data).replace(
                 '<>', self.rsrc.identifier.n3())
 
-        self.rsrc.set(nsc['fcrepo'].lastUpdated, ts)
-        self.rsrc.set(nsc['fcrepo'].lastUpdatedBy, Literal('BypassAdmin'))
+        self.rsrc.set(nsc['fcrepo'].lastModified, ts)
+        self.rsrc.set(nsc['fcrepo'].lastModifiedBy, Literal('BypassAdmin'))
 
         self.ds.update(q)
 

+ 42 - 0
lakesuperior/util/translator.py

@@ -1,3 +1,5 @@
+from collections import defaultdict
+
 from flask import request
 from rdflib.term import URIRef
 
@@ -118,3 +120,43 @@ class Translator:
         global_uri = Translator.globalize_term(urn)
 
         return global_g.resource(global_uri)
+
+
+    @staticmethod
+    def parse_rfc7240(h_str):
+        '''
+        Parse `Prefer` header as per https://tools.ietf.org/html/rfc7240
+
+        The `cgi.parse_header` standard method does not work with all possible
+        use cases for this header.
+
+        @param h_str (string) The header(s) as a comma-separated list of Prefer
+        statements, excluding the `Prefer: ` token.
+        '''
+        parsed_hdr = defaultdict(dict)
+
+        # Split up headers by comma
+        hdr_list = [ x.strip() for x in h_str.split(',') ]
+        for hdr in hdr_list:
+            parsed_pref = defaultdict(dict)
+            # Split up tokens by semicolon
+            token_list = [ token.strip() for token in hdr.split(';') ]
+            prefer_token = token_list.pop(0).split('=')
+            prefer_name = prefer_token[0]
+            # If preference has a '=', it has a value, else none.
+            if len(prefer_token)>1:
+                parsed_pref['value'] = prefer_token[1].strip('"')
+
+            for param_token in token_list:
+                # If the token list had a ';' the preference has a parameter.
+                print('Param token: {}'.format(param_token))
+                param_parts = [ prm.strip().strip('"') \
+                        for prm in param_token.split('=') ]
+                param_value = param_parts[1] if len(param_parts) > 1 else None
+                parsed_pref['parameters'][param_parts[0]] = param_value
+
+            parsed_hdr[prefer_name] = parsed_pref
+
+        return parsed_hdr
+
+