Sfoglia il codice sorgente

Simplify stack and move methods to less specific classes.

Stefano Cossu 7 anni fa
parent
commit
2a375344eb

+ 23 - 11
lakesuperior/endpoints/ldp.py

@@ -25,16 +25,16 @@ ldp = Blueprint('ldp', __name__)
 accept_patch = (
     'application/sparql-update',
 )
-accept_post = (
+accept_post_rdf = (
     'application/ld+json',
     'application/n-triples',
     'application/rdf+xml',
-    'application/x-turtle',
-    'application/xhtml+xml',
-    'application/xml',
-    'text/html',
+    #'application/x-turtle',
+    #'application/xhtml+xml',
+    #'application/xml',
+    #'text/html',
     'text/n3',
-    'text/plain',
+    #'text/plain',
     'text/rdf+n3',
     'text/turtle',
 )
@@ -52,7 +52,7 @@ accept_post = (
 
 std_headers = {
     'Accept-Patch' : ','.join(accept_patch),
-    'Accept-Post' : ','.join(accept_post),
+    'Accept-Post' : ','.join(accept_post_rdf),
     #'Allow' : ','.join(allow),
 }
 
@@ -92,14 +92,26 @@ def post_resource(parent):
     '''
     Add a new resource in a new URI.
     '''
-    headers = std_headers
+    out_headers = std_headers
     try:
         slug = request.headers['Slug']
     except KeyError:
         slug = None
 
+    if 'Content-Type' in request.headers:
+        logger.debug('Content type: {}'.format(request.headers['Content-Type']))
+        if request.headers['Content-Type'] in accept_post_rdf:
+            cls = Ldpc
+        else:
+            cls = LdpNr
+    else:
+        # @TODO guess content type from magic number
+        cls = Ldpc
+
+    logger.info('POSTing resource of type: {}'.format(cls.__name__))
+
     try:
-       rsrc = Ldpc.inst_for_post(parent, slug)
+       rsrc = cls.inst_for_post(parent, slug)
     except ResourceNotExistsError as e:
         return str(e), 404
     except InvalidResourceError as e:
@@ -110,11 +122,11 @@ def post_resource(parent):
     except ServerManagedTermError as e:
         return str(e), 412
 
-    headers.update({
+    out_headers.update({
         'Location' : rsrc.uri,
     })
 
-    return rsrc.uri, headers, 201
+    return rsrc.uri, out_headers, 201
 
 
 @ldp.route('/<path:uuid>', methods=['PUT'])

+ 48 - 31
lakesuperior/model/ldp_rs.py

@@ -1,6 +1,9 @@
 from copy import deepcopy
 
+import arrow
+
 from rdflib import Graph
+from rdflib.resource import Resource
 from rdflib.namespace import RDF, XSD
 from rdflib.plugins.sparql.parser import parseUpdate
 from rdflib.term import URIRef, Literal, Variable
@@ -11,6 +14,7 @@ from lakesuperior.dictionaries.srv_mgd_terms import  srv_mgd_subjects, \
 from lakesuperior.model.ldpr import Ldpr, transactional, must_exist
 from lakesuperior.exceptions import ResourceNotExistsError, \
         ServerManagedTermError, SingleSubjectError
+from lakesuperior.util.digest import Digest
 from lakesuperior.util.translator import Translator
 
 class LdpRs(Ldpr):
@@ -19,6 +23,7 @@ class LdpRs(Ldpr):
     Definition: https://www.w3.org/TR/ldp/#ldprs
     '''
 
+    DEFAULT_USER = Literal('BypassAdmin')
     RETURN_CHILD_RES_URI = nsc['fcrepo'].EmbedResources
     RETURN_INBOUND_REF_URI = nsc['fcrepo'].InboundReferences
     RETURN_SRV_MGD_RES_URI = nsc['fcrepo'].ServerManaged
@@ -44,18 +49,6 @@ class LdpRs(Ldpr):
     }
 
 
-    def head(self):
-        '''
-        Return values for the headers.
-        '''
-        headers = self.rdfly.headers
-
-        for t in self.ldp_types:
-            headers['Link'].append('{};rel="type"'.format(t.identifier.n3()))
-
-        return headers
-
-
     def get(self, pref_return):
         '''
         https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
@@ -83,12 +76,12 @@ class LdpRs(Ldpr):
             if str(self.RETURN_SRV_MGD_RES_URI) in omit:
                     kwargs['incl_srv_mgd'] = False
 
-        im_rsrc = self.rdfly.out_rsrc(**kwargs)
+        imr = self.rdfly.out_rsrc
 
-        if not len(im_rsrc.graph):
-            raise ResourceNotExistsError(im_rsrc.uuid)
+        if not imr or not len(imr.graph):
+            raise ResourceNotExistsError(self.uri)
 
-        return Translator.globalize_rsrc(im_rsrc)
+        return Translator.globalize_rsrc(imr)
 
 
     @transactional
@@ -100,13 +93,11 @@ class LdpRs(Ldpr):
         '''
         g = Graph().parse(data=data, format=format, publicID=self.urn)
 
-        g = self._check_mgd_terms(g, handling)
-        self._ensure_single_subject_rdf(g)
-
-        for t in self.base_types:
-            g.add((self.urn, RDF.type, t))
+        imr = Resource(self._check_mgd_terms(g, handling), self.urn)
+        imr = self._add_srv_mgd_triples(imr, create=True)
+        self._ensure_single_subject_rdf(imr.graph)
 
-        self.rdfly.create_rsrc(g)
+        self.rdfly.create_rsrc(imr)
 
         self._set_containment_rel()
 
@@ -118,13 +109,11 @@ class LdpRs(Ldpr):
         '''
         g = Graph().parse(data=data, format=format, publicID=self.urn)
 
-        g = self._check_mgd_terms(g, handling)
-        self._ensure_single_subject_rdf(g)
+        imr = Resource(self._check_mgd_terms(g, handling), self.urn)
+        imr = self._add_srv_mgd_triples(imr, create=True)
+        self._ensure_single_subject_rdf(imr.graph)
 
-        for t in self.base_types:
-            g.add((self.urn, RDF.type, t))
-
-        res = self.rdfly.create_or_replace_rsrc(g)
+        res = self.rdfly.create_or_replace_rsrc(imr)
 
         self._set_containment_rel()
 
@@ -137,9 +126,9 @@ class LdpRs(Ldpr):
         '''
         https://www.w3.org/TR/ldp/#ldpr-HTTP_PATCH
         '''
-        remove, add = self._sparql_delta(data)
+        trp_remove, trp_add = self._sparql_delta(data)
 
-        self.rdfly.modify_rsrc(remove, add)
+        return self.rdfly.modify_rsrc(trp_remove, trp_add)
 
 
     ## PROTECTED METHODS ##
@@ -175,6 +164,34 @@ class LdpRs(Ldpr):
         return g
 
 
+    def _add_srv_mgd_triples(self, imr, create=False):
+        '''
+        Add server-managed triples to a graph.
+
+        @param create (boolean) Whether the resource is being created.
+        '''
+        # Message digest.
+        cksum = Digest.rdf_cksum(imr.graph)
+        imr.set(nsc['premis'].hasMessageDigest,
+                URIRef('urn:sha1:{}'.format(cksum)))
+
+        # Create and modify timestamp.
+        # @TODO Use gunicorn to get request timestamp.
+        ts = Literal(arrow.utcnow(), datatype=XSD.dateTime)
+        if create:
+            imr.set(nsc['fcrepo'].created, ts)
+            imr.set(nsc['fcrepo'].createdBy, self.DEFAULT_USER)
+
+        imr.set(nsc['fcrepo'].lastModified, ts)
+        imr.set(nsc['fcrepo'].lastModifiedBy, self.DEFAULT_USER)
+
+        # Base LDP types.
+        for t in self.base_types:
+            imr.add(RDF.type, t)
+
+        return imr
+
+
     def _sparql_delta(self, q, handling=None):
         '''
         Calculate the delta obtained by a SPARQL Update operation.
@@ -183,7 +200,7 @@ class LdpRs(Ldpr):
 
         1. It ensures that no resources outside of the subject of the request
         are modified (e.g. by variable subjects)
-        2. It verifies that none of the terms being modified is server-managed.
+        2. It verifies that none of the terms being modified is server managed.
 
         This method extracts an in-memory copy of the resource and performs the
         query on that once it has checked if any of the server managed terms is

+ 44 - 0
lakesuperior/model/ldpr.py

@@ -1,10 +1,13 @@
 import logging
 
 from abc import ABCMeta
+from collections import defaultdict
 from importlib import import_module
 from itertools import accumulate
 from uuid import uuid4
 
+import arrow
+
 from rdflib import Graph
 from rdflib.resource import Resource
 from rdflib.namespace import RDF, XSD
@@ -327,6 +330,47 @@ class Ldpr(metaclass=ABCMeta):
 
     ## LDP METHODS ##
 
+    def head(self):
+        '''
+        Return values for the headers.
+        '''
+        out_rsrc = self.rdfly.out_rsrc
+
+        out_headers = defaultdict(list)
+
+        digest = out_rsrc.value(nsc['premis'].hasMessageDigest)
+        if digest:
+            etag = digest.identifier.split(':')[-1]
+            out_headers['ETag'] = 'W/"{}"'.format(etag),
+
+        last_updated_term = out_rsrc.value(nsc['fcrepo'].lastModified)
+        if last_updated_term:
+            out_headers['Last-Modified'] = arrow.get(last_updated_term)\
+                .format('ddd, D MMM YYYY HH:mm:ss Z')
+
+        for t in self.ldp_types:
+            out_headers['Link'].append(
+                    '{};rel="type"'.format(t.identifier.n3()))
+
+        return out_headers
+
+
+    def get(self, *args, **kwargs):
+        raise NotImplementedError()
+
+
+    def post(self, *args, **kwargs):
+        raise NotImplementedError()
+
+
+    def put(self, *args, **kwargs):
+        raise NotImplementedError()
+
+
+    def patch(self, *args, **kwargs):
+        raise NotImplementedError()
+
+
     @transactional
     @must_exist
     def delete(self):

+ 43 - 34
lakesuperior/store_layouts/rdf/base_rdf_layout.py

@@ -136,15 +136,18 @@ class BaseRdfLayout(metaclass=ABCMeta):
 
 
     @property
-    @abstractmethod
     @needs_rsrc
-    def headers(self):
+    def out_rsrc(self):
         '''
-        Return a dict with information for generating HTTP headers.
+        Graph obtained by querying the triplestore and adding any abstraction
+        and filtering to make up a graph that can be used for read-only,
+        API-facing results. Different layouts can implement this in very
+        different ways, so it is an abstract method.
 
-        @retun dict
+        @return rdflib.resource.Resource
         '''
-        pass
+        return self.extract_imr()
+
 
 
     ## PUBLIC METHODS ##
@@ -168,7 +171,7 @@ class BaseRdfLayout(metaclass=ABCMeta):
         '''
         Perform a SPARQL update on the triplestore.
 
-        This should provide non-abstract access, independent from the layout,
+        This should provide low-level access, independent from the layout,
         therefore it should not be overridden by individual layouts.
 
         @param q (string) SPARQL-UPDATE query.
@@ -179,6 +182,21 @@ class BaseRdfLayout(metaclass=ABCMeta):
         return self.ds.query(q, initBindings=initBindings, initNs=nsc)
 
 
+    @needs_rsrc
+    def create_or_replace_rsrc(self, g):
+        '''Create a resource graph in the main graph if it does not exist.
+
+        If it exists, replace the existing one retaining the creation date.
+        '''
+        if self.ask_rsrc_exists():
+            self._logger.info(
+                    'Resource {} exists. Removing all outbound triples.'
+                    .format(self.rsrc.identifier))
+            return self.replace_rsrc(g)
+        else:
+            return self.create_rsrc(g)
+
+
     ## INTERFACE METHODS ##
 
     # Implementers of custom layouts should look into these methods to
@@ -199,21 +217,6 @@ class BaseRdfLayout(metaclass=ABCMeta):
         pass
 
 
-    @abstractmethod
-    @needs_rsrc
-    def out_rsrc(self, srv_mgd=True, inbound=False, embed_children=False):
-        '''
-        Graph obtained by querying the triplestore and adding any abstraction
-        and filtering to make up a graph that can be used for read-only,
-        API-facing results. Different layouts can implement this in very
-        different ways, so it is an abstract method.
-
-        @return rdflib.resource.Resource
-        '''
-        pass
-
-
-
     @abstractmethod
     def ask_rsrc_exists(self, uri=None):
         '''
@@ -228,16 +231,6 @@ class BaseRdfLayout(metaclass=ABCMeta):
         pass
 
 
-    @abstractmethod
-    @needs_rsrc
-    def create_or_replace_rsrc(self, urn, data, commit=True):
-        '''Create a resource graph in the main graph if it does not exist.
-
-        If it exists, replace the existing one retaining the creation date.
-        '''
-        pass
-
-
     @abstractmethod
     @needs_rsrc
     def create_rsrc(self, urn, data, commit=True):
@@ -250,9 +243,12 @@ class BaseRdfLayout(metaclass=ABCMeta):
 
     @abstractmethod
     @needs_rsrc
-    def patch_rsrc(self, urn, data, commit=False):
-        '''
-        Perform a SPARQL UPDATE on a resource.
+    def replace_rsrc(self, g):
+        '''Replace a resource, i.e. delete all the triples and re-add the
+        ones provided.
+
+        @param g (rdflib.Graph) Graph to load. It must not contain
+        `fcrepo:created` and `fcrepo:createdBy`.
         '''
         pass
 
@@ -273,3 +269,16 @@ class BaseRdfLayout(metaclass=ABCMeta):
     @needs_rsrc
     def delete_rsrc(self, urn, commit=True):
         pass
+
+
+
+    ## PROTECTED METHODS  ##
+
+    def _set_msg_digest(self):
+        '''
+        Add a message digest to the current resource.
+        '''
+        cksum = Digest.rdf_cksum(self.rsrc.graph)
+        self.rsrc.set(nsc['premis'].hasMessageDigest,
+                URIRef('urn:sha1:{}'.format(cksum)))
+

+ 21 - 101
lakesuperior/store_layouts/rdf/simple_layout.py

@@ -14,7 +14,6 @@ from lakesuperior.dictionaries.srv_mgd_terms import  srv_mgd_subjects, \
         srv_mgd_predicates, srv_mgd_types
 from lakesuperior.store_layouts.rdf.base_rdf_layout import BaseRdfLayout, \
         needs_rsrc
-from lakesuperior.util.digest import Digest
 from lakesuperior.util.translator import Translator
 
 
@@ -30,32 +29,8 @@ class SimpleLayout(BaseRdfLayout):
     for (possible) improved speed and reduced storage.
     '''
 
-    @property
-    def headers(self):
-        '''
-        See base_rdf_layout.headers.
-        '''
-        headers = {
-            'Link' : [],
-        }
-
-        # @NOTE: Easy with these one-by-one picks. Each one of them is a call
-        # to the triplestore.
-        digest = self.rsrc.value(nsc['premis'].hasMessageDigest)
-        if digest:
-            etag = digest.identifier.split(':')[-1]
-            headers['ETag'] = 'W/"{}"'.format(etag),
-
-        last_updated_term = self.rsrc.value(nsc['fcrepo'].lastModified)
-        if last_updated_term:
-            headers['Last-Modified'] = arrow.get(last_updated_term)\
-                .format('ddd, D MMM YYYY HH:mm:ss Z')
-
-        return headers
-
-
     def extract_imr(self, uri=None, graph=None, minimal=False,
-            incl_inbound=False, incl_children=True, incl_srv_mgd=True):
+            incl_inbound=False, embed_children=False, incl_srv_mgd=True):
         '''
         See base_rdf_layout.extract_imr.
         '''
@@ -68,7 +43,7 @@ class SimpleLayout(BaseRdfLayout):
           {0} ldp:contains ?c .
           ?c ?cp ?co .
         }}
-        '''.format(uri.n3()) if incl_children else ''
+        '''.format(uri.n3()) if embed_children else ''
 
         q = '''
         CONSTRUCT {{
@@ -76,7 +51,7 @@ class SimpleLayout(BaseRdfLayout):
             ?c ?cp ?co .
         }} WHERE {{
             {0} ?p ?o .{1}{2}
-            FILTER (?p != premis:hasMessageDigest) .
+            #FILTER (?p != premis:hasMessageDigest) .
         }}
         '''.format(uri.n3(), inbound_qry, embed_children_qry)
 
@@ -100,18 +75,6 @@ class SimpleLayout(BaseRdfLayout):
             return rsrc
 
 
-    @needs_rsrc
-    def out_rsrc(self, **kwargs):
-        '''
-        See base_rdf_layout.out_rsrc.
-        '''
-        im_rsrc = self.extract_imr(**kwargs)
-
-        im_rsrc.remove(nsc['premis'].hasMessageDigest)
-
-        return im_rsrc
-
-
     def ask_rsrc_exists(self, uri=None):
         '''
         See base_rdf_layout.ask_rsrc_exists.
@@ -127,79 +90,35 @@ class SimpleLayout(BaseRdfLayout):
 
 
     @needs_rsrc
-    def create_or_replace_rsrc(self, g):
+    def create_rsrc(self, imr):
         '''
-        See base_rdf_layout.create_or_replace_rsrc.
+        See base_rdf_layout.create_rsrc.
         '''
-        # @TODO Use gunicorn to get request timestamp.
-        ts = Literal(arrow.utcnow(), datatype=XSD.dateTime)
-
-        if self.ask_rsrc_exists():
-            self._logger.info(
-                    'Resource {} exists. Removing all outbound triples.'
-                    .format(self.rsrc.identifier))
-
-            # Delete all triples but keep creation date and creator.
-            created = self.rsrc.value(nsc['fcrepo'].created)
-            created_by = self.rsrc.value(nsc['fcrepo'].createdBy)
-
-            self.delete_rsrc()
-            res = self.RES_UPDATED
-        else:
-            created = ts
-            created_by = Literal('BypassAdmin')
-            res = self.RES_CREATED
-
-        self._logger.info('Created timestamp: {}'.format(ts))
-
-        self.rsrc.set(nsc['fcrepo'].created, created)
-        self.rsrc.set(nsc['fcrepo'].createdBy, created_by)
-
-        self.rsrc.set(nsc['fcrepo'].lastModified, ts)
-        self.rsrc.set(nsc['fcrepo'].lastModifiedBy, Literal('BypassAdmin'))
-
-        for s, p, o in g:
+        for s, p, o in imr.graph:
             self.ds.add((s, p, o))
 
-        return res
+        return self.RES_CREATED
 
 
     @needs_rsrc
-    def create_rsrc(self, g):
+    def replace_rsrc(self, imr):
         '''
-        See base_rdf_layout.create_rsrc.
+        See base_rdf_layout.replace_rsrc.
         '''
-        # @TODO Use gunicorn to get request timestamp.
-        ts = Literal(arrow.utcnow(), datatype=XSD.dateTime)
+        # Delete all triples but keep creation date and creator.
+        created = self.rsrc.value(nsc['fcrepo'].created)
+        created_by = self.rsrc.value(nsc['fcrepo'].createdBy)
 
-        self.rsrc.set(nsc['fcrepo'].created, ts)
-        self.rsrc.set(nsc['fcrepo'].createdBy, Literal('BypassAdmin'))
+        imr.set(nsc['fcrepo'].created, created)
+        imr.set(nsc['fcrepo'].createdBy, created_by)
 
-        cksum = Digest.rdf_cksum(self.rsrc.graph)
-        self.rsrc.set(nsc['premis'].hasMessageDigest,
-                URIRef('urn:sha1:{}'.format(cksum)))
+        # Delete the stored triples.
+        self.delete_rsrc()
 
-        for s, p, o in g:
+        for s, p, o in imr.graph:
             self.ds.add((s, p, o))
 
-
-    @needs_rsrc
-    def patch_rsrc(self, data):
-        '''
-        Perform a SPARQL UPDATE on a resource.
-
-        @TODO deprecate.
-        '''
-        # @TODO Use gunicorn to get request timestamp.
-        ts = Literal(arrow.utcnow(), datatype=XSD.dateTime)
-
-        q = Translator.localize_string(data).replace(
-                '<>', self.rsrc.identifier.n3())
-
-        self.rsrc.set(nsc['fcrepo'].lastModified, ts)
-        self.rsrc.set(nsc['fcrepo'].lastModifiedBy, Literal('BypassAdmin'))
-
-        self.ds.update(q)
+        return self.RES_UPDATED
 
 
     @needs_rsrc
@@ -214,7 +133,7 @@ class SimpleLayout(BaseRdfLayout):
             self.rsrc.add(t[0], t[1])
 
 
-    def delete_rsrc(self, inbound=False):
+    def delete_rsrc(self, inbound=True):
         '''
         Delete a resource. If `inbound` is specified, delete all inbound
         relationships as well.
@@ -223,7 +142,8 @@ class SimpleLayout(BaseRdfLayout):
 
         self.rsrc.remove(Variable('p'))
         if inbound:
-            self.ds.remove((Variable('s'), Variable('p'), self.rsrc.identifier))
+            self.ds.remove(
+                    (Variable('s'), Variable('p'), self.rsrc.identifier))
 
 
     ## PROTECTED METHODS ##