浏览代码

Support Prefer headers for GET and PUT.

Stefano Cossu 7 年之前
父节点
当前提交
025bc95528

+ 40 - 10
lakesuperior/endpoints/ldp.py

@@ -1,12 +1,16 @@
 import logging
 import logging
 
 
+from collections import defaultdict
+
 from flask import Blueprint, request
 from flask import Blueprint, request
 
 
 from lakesuperior.exceptions import InvalidResourceError, \
 from lakesuperior.exceptions import InvalidResourceError, \
-        ResourceNotExistsError, ServerManagedTermError
-
+        ResourceExistsError, ResourceNotExistsError, \
+        InvalidResourceError, ServerManagedTermError
 from lakesuperior.model.ldp_rs import Ldpc, LdpRs
 from lakesuperior.model.ldp_rs import Ldpc, LdpRs
 from lakesuperior.model.ldp_nr import LdpNr
 from lakesuperior.model.ldp_nr import LdpNr
+from lakesuperior.store_layouts.rdf.base_rdf_layout import BaseRdfLayout
+from lakesuperior.util.translator import Translator
 
 
 
 
 logger = logging.getLogger(__name__)
 logger = logging.getLogger(__name__)
@@ -52,7 +56,6 @@ std_headers = {
     #'Allow' : ','.join(allow),
     #'Allow' : ','.join(allow),
 }
 }
 
 
-
 ## REST SERVICES ##
 ## REST SERVICES ##
 
 
 @ldp.route('/<path:uuid>', methods=['GET'])
 @ldp.route('/<path:uuid>', methods=['GET'])
@@ -62,16 +65,24 @@ def get_resource(uuid):
     '''
     '''
     Retrieve RDF or binary content.
     Retrieve RDF or binary content.
     '''
     '''
-    headers = std_headers
+    out_headers = std_headers
+
+    pref_return = defaultdict(dict)
+    if 'prefer' in request.headers:
+        prefer = Translator.parse_rfc7240(request.headers['prefer'])
+        logger.debug('Parsed Prefer header: {}'.format(prefer))
+        if 'return' in prefer:
+            pref_return = prefer['return']
+
     # @TODO Add conditions for LDP-NR
     # @TODO Add conditions for LDP-NR
     rsrc = Ldpc(uuid)
     rsrc = Ldpc(uuid)
     try:
     try:
-        out = rsrc.get()
+        out = rsrc.get(pref_return=pref_return)
     except ResourceNotExistsError:
     except ResourceNotExistsError:
         return 'Resource #{} not found.'.format(rsrc.uuid), 404
         return 'Resource #{} not found.'.format(rsrc.uuid), 404
     else:
     else:
-        headers = rsrc.head()
-        return (out.graph.serialize(format='turtle'), headers)
+        out_headers = rsrc.head()
+        return (out.graph.serialize(format='turtle'), out_headers)
 
 
 
 
 @ldp.route('/<path:parent>', methods=['POST'])
 @ldp.route('/<path:parent>', methods=['POST'])
@@ -111,14 +122,32 @@ def put_resource(uuid):
     '''
     '''
     Add a new resource at a specified URI.
     Add a new resource at a specified URI.
     '''
     '''
-    headers = std_headers
+    logger.info('Request headers: {}'.format(request.headers))
+    rsp_headers = std_headers
     rsrc = Ldpc(uuid)
     rsrc = Ldpc(uuid)
 
 
+    # Parse headers.
+    pref_handling = None
+    if 'prefer' in request.headers:
+        prefer = Translator.parse_rfc7240(request.headers['prefer'])
+        logger.debug('Parsed Prefer header: {}'.format(prefer))
+        if 'handling' in prefer:
+            pref_handling = prefer['handling']['value']
+
     try:
     try:
-        rsrc.put(request.get_data().decode('utf-8'))
+        ret = rsrc.put(
+            request.get_data().decode('utf-8'),
+            handling=pref_handling
+        )
+    except InvalidResourceError as e:
+        return str(e), 409
+    except ResourceExistsError as e:
+        return str(e), 409
     except ServerManagedTermError as e:
     except ServerManagedTermError as e:
         return str(e), 412
         return str(e), 412
-    return '', 204, headers
+    else:
+        res_code = 201 if ret == BaseRdfLayout.RES_CREATED else 204
+        return '', res_code, rsp_headers
 
 
 
 
 @ldp.route('/<path:uuid>', methods=['PATCH'])
 @ldp.route('/<path:uuid>', methods=['PATCH'])
@@ -154,3 +183,4 @@ def delete_resource(uuid):
 
 
     return '', 204, headers
     return '', 204, headers
 
 
+

+ 59 - 13
lakesuperior/model/ldp_rs.py

@@ -18,6 +18,11 @@ class LdpRs(Ldpr):
 
 
     Definition: https://www.w3.org/TR/ldp/#ldprs
     Definition: https://www.w3.org/TR/ldp/#ldprs
     '''
     '''
+
+    RETURN_CHILD_RES_URI = nsc['fcrepo'].EmbedResources
+    RETURN_INBOUND_REF_URI = nsc['fcrepo'].InboundReferences
+    RETURN_SRV_MGD_RES_URI = nsc['fcrepo'].ServerManaged
+
     base_types = {
     base_types = {
         nsc['ldp'].RDFSource
         nsc['ldp'].RDFSource
     }
     }
@@ -51,11 +56,35 @@ class LdpRs(Ldpr):
         return headers
         return headers
 
 
 
 
-    def get(self, inbound=False, children=True, srv_mgd=True):
+    def get(self, pref_return):
         '''
         '''
         https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
         https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
         '''
         '''
-        im_rsrc = self.rdfly.out_rsrc(inbound)
+        kwargs = {}
+
+        minimal = embed_children = incl_inbound = False
+        kwargs['incl_srv_mgd'] = True
+
+        if 'value' in pref_return and pref_return['value'] == 'minimal':
+            kwargs['minimal'] = True
+        else:
+            include = pref_return['parameters']['include'].split(' ') \
+                    if 'include' in pref_return['parameters'] else []
+            omit = pref_return['parameters']['omit'].split(' ') \
+                    if 'omit' in pref_return['parameters'] else []
+
+            self._logger.debug('Include: {}'.format(include))
+            self._logger.debug('Omit: {}'.format(omit))
+
+            if str(self.RETURN_INBOUND_REF_URI) in include:
+                    kwargs['incl_inbound'] = True
+            if str(self.RETURN_CHILD_RES_URI) in omit:
+                    kwargs['embed_chldren'] = False
+            if str(self.RETURN_SRV_MGD_RES_URI) in omit:
+                    kwargs['incl_srv_mgd'] = False
+
+        im_rsrc = self.rdfly.out_rsrc(**kwargs)
+
         if not len(im_rsrc.graph):
         if not len(im_rsrc.graph):
             raise ResourceNotExistsError(im_rsrc.uuid)
             raise ResourceNotExistsError(im_rsrc.uuid)
 
 
@@ -63,7 +92,7 @@ class LdpRs(Ldpr):
 
 
 
 
     @transactional
     @transactional
-    def post(self, data, format='text/turtle'):
+    def post(self, data, format='text/turtle', handling=None):
         '''
         '''
         https://www.w3.org/TR/ldp/#ldpr-HTTP_POST
         https://www.w3.org/TR/ldp/#ldpr-HTTP_POST
 
 
@@ -71,7 +100,7 @@ class LdpRs(Ldpr):
         '''
         '''
         g = Graph().parse(data=data, format=format, publicID=self.urn)
         g = Graph().parse(data=data, format=format, publicID=self.urn)
 
 
-        self._check_mgd_terms(g)
+        g = self._check_mgd_terms(g, handling)
         self._ensure_single_subject_rdf(g)
         self._ensure_single_subject_rdf(g)
 
 
         for t in self.base_types:
         for t in self.base_types:
@@ -83,22 +112,24 @@ class LdpRs(Ldpr):
 
 
 
 
     @transactional
     @transactional
-    def put(self, data, format='text/turtle'):
+    def put(self, data, format='text/turtle', handling=None):
         '''
         '''
         https://www.w3.org/TR/ldp/#ldpr-HTTP_PUT
         https://www.w3.org/TR/ldp/#ldpr-HTTP_PUT
         '''
         '''
         g = Graph().parse(data=data, format=format, publicID=self.urn)
         g = Graph().parse(data=data, format=format, publicID=self.urn)
 
 
-        self._check_mgd_terms(g)
+        g = self._check_mgd_terms(g, handling)
         self._ensure_single_subject_rdf(g)
         self._ensure_single_subject_rdf(g)
 
 
         for t in self.base_types:
         for t in self.base_types:
             g.add((self.urn, RDF.type, t))
             g.add((self.urn, RDF.type, t))
 
 
-        self.rdfly.create_or_replace_rsrc(g)
+        res = self.rdfly.create_or_replace_rsrc(g)
 
 
         self._set_containment_rel()
         self._set_containment_rel()
 
 
+        return res
+
 
 
     @transactional
     @transactional
     @must_exist
     @must_exist
@@ -113,24 +144,38 @@ class LdpRs(Ldpr):
 
 
     ## PROTECTED METHODS ##
     ## PROTECTED METHODS ##
 
 
-    def _check_mgd_terms(self, g):
+    def _check_mgd_terms(self, g, handling='strict'):
         '''
         '''
         Check whether server-managed terms are in a RDF payload.
         Check whether server-managed terms are in a RDF payload.
         '''
         '''
         offending_subjects = set(g.subjects()) & srv_mgd_subjects
         offending_subjects = set(g.subjects()) & srv_mgd_subjects
         if offending_subjects:
         if offending_subjects:
-            raise ServerManagedTermError(offending_subjects, 's')
+            if handling=='strict':
+                raise ServerManagedTermError(offending_subjects, 's')
+            else:
+                for s in offending_subjects:
+                    g.remove((s, Variable('p'), Variable('o')))
 
 
         offending_predicates = set(g.predicates()) & srv_mgd_predicates
         offending_predicates = set(g.predicates()) & srv_mgd_predicates
         if offending_predicates:
         if offending_predicates:
-            raise ServerManagedTermError(offending_predicates, 'p')
+            if handling=='strict':
+                raise ServerManagedTermError(offending_predicates, 'p')
+            else:
+                for p in offending_predicates:
+                    g.remove((Variable('s'), p, Variable('o')))
 
 
         offending_types = set(g.objects(predicate=RDF.type)) & srv_mgd_types
         offending_types = set(g.objects(predicate=RDF.type)) & srv_mgd_types
         if offending_types:
         if offending_types:
-            raise ServerManagedTermError(offending_types, 't')
+            if handling=='strict':
+                raise ServerManagedTermError(offending_types, 't')
+            else:
+                for t in offending_types:
+                    g.remove((Variable('s'), RDF.type, t))
+
+        return g
 
 
 
 
-    def _sparql_delta(self, q):
+    def _sparql_delta(self, q, handling=None):
         '''
         '''
         Calculate the delta obtained by a SPARQL Update operation.
         Calculate the delta obtained by a SPARQL Update operation.
 
 
@@ -165,7 +210,8 @@ class LdpRs(Ldpr):
         self._logger.info('Adding: {}'.format(
         self._logger.info('Adding: {}'.format(
             add.serialize(format='turtle').decode('utf8')))
             add.serialize(format='turtle').decode('utf8')))
 
 
-        self._check_mgd_terms(remove + add)
+        remove = self._check_mgd_terms(remove, handling)
+        add = self._check_mgd_terms(add, handling)
 
 
         return remove, add
         return remove, add
 
 

+ 2 - 2
lakesuperior/model/ldpr.py

@@ -275,8 +275,8 @@ class Ldpr(metaclass=ABCMeta):
                 return LdpNr(uuid)
                 return LdpNr(uuid)
             if t == cls.LDP_RS_TYPE:
             if t == cls.LDP_RS_TYPE:
                 return LdpRs(uuid)
                 return LdpRs(uuid)
-
-        raise ResourceNotExistsError(uuid)
+            else:
+                raise ResourceNotExistsError(uuid)
 
 
 
 
     @classmethod
     @classmethod

+ 3 - 0
lakesuperior/store_layouts/rdf/base_rdf_layout.py

@@ -63,6 +63,9 @@ class BaseRdfLayout(metaclass=ABCMeta):
     # N.B. This is Fuseki-specific.
     # N.B. This is Fuseki-specific.
     UNION_GRAPH_URI = URIRef('urn:x-arq:UnionGraph')
     UNION_GRAPH_URI = URIRef('urn:x-arq:UnionGraph')
 
 
+    RES_CREATED = '_created_'
+    RES_UPDATED = '_updated_'
+
     _conf = config['application']['store']['ldp_rs']
     _conf = config['application']['store']['ldp_rs']
     _logger = logging.getLogger(__name__)
     _logger = logging.getLogger(__name__)
 
 

+ 39 - 14
lakesuperior/store_layouts/rdf/simple_layout.py

@@ -3,13 +3,15 @@ from copy import deepcopy
 import arrow
 import arrow
 
 
 from rdflib import Graph
 from rdflib import Graph
-from rdflib.namespace import XSD
+from rdflib.namespace import RDF, XSD
 from rdflib.query import ResultException
 from rdflib.query import ResultException
 from rdflib.resource import Resource
 from rdflib.resource import Resource
 from rdflib.term import Literal, URIRef, Variable
 from rdflib.term import Literal, URIRef, Variable
 
 
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
 from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
+from lakesuperior.dictionaries.srv_mgd_terms import  srv_mgd_subjects, \
+        srv_mgd_predicates, srv_mgd_types
 from lakesuperior.store_layouts.rdf.base_rdf_layout import BaseRdfLayout, \
 from lakesuperior.store_layouts.rdf.base_rdf_layout import BaseRdfLayout, \
         needs_rsrc
         needs_rsrc
 from lakesuperior.util.digest import Digest
 from lakesuperior.util.digest import Digest
@@ -44,7 +46,7 @@ class SimpleLayout(BaseRdfLayout):
             etag = digest.identifier.split(':')[-1]
             etag = digest.identifier.split(':')[-1]
             headers['ETag'] = 'W/"{}"'.format(etag),
             headers['ETag'] = 'W/"{}"'.format(etag),
 
 
-        last_updated_term = self.rsrc.value(nsc['fcrepo'].lastUpdated)
+        last_updated_term = self.rsrc.value(nsc['fcrepo'].lastModified)
         if last_updated_term:
         if last_updated_term:
             headers['Last-Modified'] = arrow.get(last_updated_term)\
             headers['Last-Modified'] = arrow.get(last_updated_term)\
                 .format('ddd, D MMM YYYY HH:mm:ss Z')
                 .format('ddd, D MMM YYYY HH:mm:ss Z')
@@ -52,23 +54,31 @@ class SimpleLayout(BaseRdfLayout):
         return headers
         return headers
 
 
 
 
-    def extract_imr(self, uri=None, graph=None, inbound=False):
+    def extract_imr(self, uri=None, graph=None, minimal=False,
+            incl_inbound=False, incl_children=True, incl_srv_mgd=True):
         '''
         '''
         See base_rdf_layout.extract_imr.
         See base_rdf_layout.extract_imr.
         '''
         '''
         uri = uri or self.base_urn
         uri = uri or self.base_urn
 
 
         inbound_qry = '\n?s1 ?p1 {}'.format(self.base_urn.n3()) \
         inbound_qry = '\n?s1 ?p1 {}'.format(self.base_urn.n3()) \
-                if inbound else ''
+                if incl_inbound else ''
+        embed_children_qry = '''
+        OPTIONAL {{
+          {0} ldp:contains ?c .
+          ?c ?cp ?co .
+        }}
+        '''.format(uri.n3()) if incl_children else ''
 
 
         q = '''
         q = '''
         CONSTRUCT {{
         CONSTRUCT {{
             {0} ?p ?o .{1}
             {0} ?p ?o .{1}
+            ?c ?cp ?co .
         }} WHERE {{
         }} WHERE {{
-            {0} ?p ?o .{1}
-            #FILTER (?p != premis:hasMessageDigest) .
+            {0} ?p ?o .{1}{2}
+            FILTER (?p != premis:hasMessageDigest) .
         }}
         }}
-        '''.format(uri.n3(), inbound_qry)
+        '''.format(uri.n3(), inbound_qry, embed_children_qry)
 
 
         try:
         try:
             qres = self.query(q)
             qres = self.query(q)
@@ -77,16 +87,25 @@ class SimpleLayout(BaseRdfLayout):
             g = Graph()
             g = Graph()
         else:
         else:
             g = qres.graph
             g = qres.graph
+            rsrc = Resource(g, uri)
+            if not incl_srv_mgd:
+                self._logger.info('Removing server managed triples.')
+                for p in srv_mgd_predicates:
+                    self._logger.debug('Removing predicate: {}'.format(p))
+                    rsrc.remove(p)
+                for t in srv_mgd_types:
+                    self._logger.debug('Removing type: {}'.format(t))
+                    rsrc.remove(RDF.type, t)
 
 
-        return Resource(g, uri)
+            return rsrc
 
 
 
 
     @needs_rsrc
     @needs_rsrc
-    def out_rsrc(self, srv_mgd=True, inbound=False, embed_children=False):
+    def out_rsrc(self, **kwargs):
         '''
         '''
         See base_rdf_layout.out_rsrc.
         See base_rdf_layout.out_rsrc.
         '''
         '''
-        im_rsrc = self.extract_imr(inbound=inbound)
+        im_rsrc = self.extract_imr(**kwargs)
 
 
         im_rsrc.remove(nsc['premis'].hasMessageDigest)
         im_rsrc.remove(nsc['premis'].hasMessageDigest)
 
 
@@ -125,19 +144,25 @@ class SimpleLayout(BaseRdfLayout):
             created_by = self.rsrc.value(nsc['fcrepo'].createdBy)
             created_by = self.rsrc.value(nsc['fcrepo'].createdBy)
 
 
             self.delete_rsrc()
             self.delete_rsrc()
+            res = self.RES_UPDATED
         else:
         else:
             created = ts
             created = ts
             created_by = Literal('BypassAdmin')
             created_by = Literal('BypassAdmin')
+            res = self.RES_CREATED
+
+        self._logger.info('Created timestamp: {}'.format(ts))
 
 
         self.rsrc.set(nsc['fcrepo'].created, created)
         self.rsrc.set(nsc['fcrepo'].created, created)
         self.rsrc.set(nsc['fcrepo'].createdBy, created_by)
         self.rsrc.set(nsc['fcrepo'].createdBy, created_by)
 
 
-        self.rsrc.set(nsc['fcrepo'].lastUpdated, ts)
-        self.rsrc.set(nsc['fcrepo'].lastUpdatedBy, Literal('BypassAdmin'))
+        self.rsrc.set(nsc['fcrepo'].lastModified, ts)
+        self.rsrc.set(nsc['fcrepo'].lastModifiedBy, Literal('BypassAdmin'))
 
 
         for s, p, o in g:
         for s, p, o in g:
             self.ds.add((s, p, o))
             self.ds.add((s, p, o))
 
 
+        return res
+
 
 
     @needs_rsrc
     @needs_rsrc
     def create_rsrc(self, g):
     def create_rsrc(self, g):
@@ -171,8 +196,8 @@ class SimpleLayout(BaseRdfLayout):
         q = Translator.localize_string(data).replace(
         q = Translator.localize_string(data).replace(
                 '<>', self.rsrc.identifier.n3())
                 '<>', self.rsrc.identifier.n3())
 
 
-        self.rsrc.set(nsc['fcrepo'].lastUpdated, ts)
-        self.rsrc.set(nsc['fcrepo'].lastUpdatedBy, Literal('BypassAdmin'))
+        self.rsrc.set(nsc['fcrepo'].lastModified, ts)
+        self.rsrc.set(nsc['fcrepo'].lastModifiedBy, Literal('BypassAdmin'))
 
 
         self.ds.update(q)
         self.ds.update(q)
 
 

+ 42 - 0
lakesuperior/util/translator.py

@@ -1,3 +1,5 @@
+from collections import defaultdict
+
 from flask import request
 from flask import request
 from rdflib.term import URIRef
 from rdflib.term import URIRef
 
 
@@ -118,3 +120,43 @@ class Translator:
         global_uri = Translator.globalize_term(urn)
         global_uri = Translator.globalize_term(urn)
 
 
         return global_g.resource(global_uri)
         return global_g.resource(global_uri)
+
+
+    @staticmethod
+    def parse_rfc7240(h_str):
+        '''
+        Parse `Prefer` header as per https://tools.ietf.org/html/rfc7240
+
+        The `cgi.parse_header` standard method does not work with all possible
+        use cases for this header.
+
+        @param h_str (string) The header(s) as a comma-separated list of Prefer
+        statements, excluding the `Prefer: ` token.
+        '''
+        parsed_hdr = defaultdict(dict)
+
+        # Split up headers by comma
+        hdr_list = [ x.strip() for x in h_str.split(',') ]
+        for hdr in hdr_list:
+            parsed_pref = defaultdict(dict)
+            # Split up tokens by semicolon
+            token_list = [ token.strip() for token in hdr.split(';') ]
+            prefer_token = token_list.pop(0).split('=')
+            prefer_name = prefer_token[0]
+            # If preference has a '=', it has a value, else none.
+            if len(prefer_token)>1:
+                parsed_pref['value'] = prefer_token[1].strip('"')
+
+            for param_token in token_list:
+                # If the token list had a ';' the preference has a parameter.
+                print('Param token: {}'.format(param_token))
+                param_parts = [ prm.strip().strip('"') \
+                        for prm in param_token.split('=') ]
+                param_value = param_parts[1] if len(param_parts) > 1 else None
+                parsed_pref['parameters'][param_parts[0]] = param_value
+
+            parsed_hdr[prefer_name] = parsed_pref
+
+        return parsed_hdr
+
+