Przeglądaj źródła

Improve IMR extraction and out_graph method.

* Rework logic to strip server-managed triples only at the output stage.
* Fix 404 returned if the resource exists but filters output an empty
graph.
Stefano Cossu 7 lat temu
rodzic
commit
08777d3880

+ 11 - 1
doc/notes/fcrepo4_deltas.md

@@ -55,6 +55,16 @@ the default.
 If `Prefer` is set to `handling=lenient`, all server-managed triples sent with
 the payload are ignored.
 
+## "Include" and "Omit" options for children
+
+LAKEsuperior offers an additional `Prefer` header option to exclude all
+references to child resources (i.e. by removing all the `ldp:contains` triples)
+while leaving the other server-managed triples when retrieving a resource:
+
+    Prefer: return=representation; [include | omit]="http://fedora.info/definitions/v4/repository#Children"
+
+The default is `include`.
+
 ## Automatic LDP class assignment
 
 Since LAKEsuperior rejects client-provided server-managed triples, and since
@@ -64,7 +74,7 @@ provided properties: if the `ldp:hasMemberRelation` and
 Container. If in addition to these the `ldp:insertedContentRelation` property
 is present, the resource is an Indirect Container. If any of the first two are
 missing, the resource is a Container (@TODO discuss: shall it be a Basic
-Contaner?)
+Container?)
 
 ## LDP-NR metadata by content negotiation
 

+ 50 - 24
lakesuperior/model/ldpr.py

@@ -14,6 +14,8 @@ from rdflib.resource import Resource
 from rdflib.namespace import RDF, XSD
 
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
+from lakesuperior.dictionaries.srv_mgd_terms import  srv_mgd_subjects, \
+        srv_mgd_predicates, srv_mgd_types
 from lakesuperior.exceptions import InvalidResourceError, \
         ResourceNotExistsError, ServerManagedTermError
 from lakesuperior.store_layouts.rdf.base_rdf_layout import BaseRdfLayout
@@ -129,7 +131,7 @@ class Ldpr(metaclass=ABCMeta):
         self._urn = nsc['fcres'][uuid] if self.uuid is not None \
                 else self.ROOT_NODE_URN
 
-        self._imr_options = __class__.imr_options(repr_opts)
+        self._imr_options = __class__.set_imr_options(repr_opts)
 
 
     @property
@@ -204,7 +206,23 @@ class Ldpr(metaclass=ABCMeta):
 
         Internal URNs are replaced by global URIs using the endpoint webroot.
         '''
-        return Toolbox().globalize_graph(self.imr.graph)
+        # Remove digest hash.
+        self.imr.remove(nsc['premis'].hasMessageDigest)
+
+        if not self._imr_options.setdefault('incl_srv_mgd', False):
+            for p in srv_mgd_predicates:
+                self._logger.debug('Removing predicate: {}'.format(p))
+                self.imr.remove(p)
+            for t in srv_mgd_types:
+                self._logger.debug('Removing type: {}'.format(t))
+                self.imr.remove(RDF.type, t)
+
+        out_g = Toolbox().globalize_graph(self.imr.graph)
+        # Clear IMR because it's been pruned. In the rare case it is needed
+        # after this method, it will be retrieved again.
+        delattr(self, 'imr')
+
+        return out_g
 
 
     @property
@@ -346,7 +364,7 @@ class Ldpr(metaclass=ABCMeta):
         rdfly = cls.load_layout('rdf')
         imr_urn = nsc['fcres'][uuid] if uuid else cls.ROOT_NODE_URN
         cls._logger.debug('Representation options: {}'.format(repr_opts))
-        imr_opts = cls.imr_options(repr_opts)
+        imr_opts = cls.set_imr_options(repr_opts)
         imr = rdfly.extract_imr(imr_urn, **imr_opts)
         rdf_types = imr.objects(RDF.type)
 
@@ -410,7 +428,7 @@ class Ldpr(metaclass=ABCMeta):
 
 
     @classmethod
-    def imr_options(cls, repr_opts):
+    def set_imr_options(cls, repr_opts):
         '''
         Set options to retrieve IMR.
 
@@ -422,9 +440,6 @@ class Ldpr(metaclass=ABCMeta):
         cls._logger.debug('Setting retrieval options from: {}'.format(repr_opts))
         imr_options = {}
 
-        #imr_options['embed_children'] = imr_options['incl_inbound'] = False
-        #imr_options['incl_srv_mgd'] = True
-
         if 'value' in repr_opts and repr_opts['value'] == 'minimal':
             imr_options = {
                 'embed_children' : False,
@@ -432,23 +447,34 @@ class Ldpr(metaclass=ABCMeta):
                 'incl_inbound' : False,
                 'incl_srv_mgd' : False,
             }
-        elif 'parameters' in repr_opts:
-            include = repr_opts['parameters']['include'].split(' ') \
-                    if 'include' in repr_opts['parameters'] else []
-            omit = repr_opts['parameters']['omit'].split(' ') \
-                    if 'omit' in repr_opts['parameters'] else []
-
-            cls._logger.debug('Include: {}'.format(include))
-            cls._logger.debug('Omit: {}'.format(omit))
-
-            if str(cls.EMBED_CHILD_RES_URI) in include:
-                    imr_options['embed_children'] = True
-            if str(cls.RETURN_CHILD_RES_URI) in omit:
-                    imr_options['incl_children'] = False
-            if str(cls.RETURN_INBOUND_REF_URI) in include:
-                    imr_options['incl_inbound'] = True
-            if str(cls.RETURN_SRV_MGD_RES_URI) in omit:
-                    imr_options['incl_srv_mgd'] = False
+        else:
+            # Default.
+            imr_options = {
+                'embed_children' : False,
+                'incl_children' : True,
+                'incl_inbound' : False,
+                'incl_srv_mgd' : True,
+            }
+
+            # Override defaults.
+            if 'parameters' in repr_opts:
+                include = repr_opts['parameters']['include'].split(' ') \
+                        if 'include' in repr_opts['parameters'] else []
+                omit = repr_opts['parameters']['omit'].split(' ') \
+                        if 'omit' in repr_opts['parameters'] else []
+
+                cls._logger.debug('Include: {}'.format(include))
+                cls._logger.debug('Omit: {}'.format(omit))
+
+                if str(cls.EMBED_CHILD_RES_URI) in include:
+                        imr_options['embed_children'] = True
+                if str(cls.RETURN_CHILD_RES_URI) in omit:
+                        imr_options['incl_children'] = False
+                if str(cls.RETURN_INBOUND_REF_URI) in include:
+                        imr_options['incl_inbound'] = True
+                if str(cls.RETURN_SRV_MGD_RES_URI) in omit:
+                        imr_options['incl_srv_mgd'] = False
+
         cls._logger.debug('Retrieval options: {}'.format(imr_options))
 
         return imr_options

+ 18 - 19
lakesuperior/store_layouts/rdf/base_rdf_layout.py

@@ -114,20 +114,6 @@ class BaseRdfLayout(metaclass=ABCMeta):
         return self.ds.resource(urn)
 
 
-    def out_rsrc(self, urn):
-        '''
-        Graph obtained by querying the triplestore and adding any abstraction
-        and filtering to make up a graph that can be used for read-only,
-        API-facing results. Different layouts can implement this in very
-        different ways, so it is an abstract method.
-
-        @return rdflib.resource.Resource
-        '''
-        imr = self.extract_imr(urn)
-        if not len(imr.graph):
-            raise ResourceNotExistsError
-
-
     def create_or_replace_rsrc(self, imr):
         '''Create a resource graph in the main graph if it does not exist.
 
@@ -172,16 +158,29 @@ class BaseRdfLayout(metaclass=ABCMeta):
     # implement.
 
     @abstractmethod
-    def extract_imr(self, uri, strict=False, minimal=False, incl_inbound=False,
-                embed_children=False, incl_srv_mgd=True):
+    def extract_imr(self, uri, strict=False, incl_inbound=False,
+                incl_children=True, embed_children=False, incl_srv_mgd=True):
         '''
-        Extract an in-memory resource based on the copy of a graph on a subject.
+        Extract an in-memory resource from the dataset restricted to a subject.
+
+        some filtering operations are carried out in this method for
+        performance purposes (e.g. `incl_children` and `embed_children`, i.e.
+        the IMR will never have those properties). Others, such as
+        server-managed triples, are kept in the IMR until they are filtered out
+        when the graph is output with `Ldpr.out_graph`.
 
         @param uri (URIRef) Resource URI.
         @param strict (boolean) If set to True, an empty result graph will
         raise a `ResourceNotExistsError`.
-        @param inbound (boolean) Whether to pull triples that have the resource
-        URI as their object.
+        @param incl_inbound (boolean) Whether to pull triples that have the
+        resource URI as their object.
+        @param incl_children (boolean) Whether to include all children
+        indicated by `ldp:contains`. This is only effective if `incl_srv_mgd`
+        is True.
+        @param embed_children (boolean) If this and `incl_children` are True,
+        the full graph is retrieved for each of the children.
+        @param incl_srv_mgd (boolean) Whether to include server-managed
+        triples.
         '''
         pass
 

+ 23 - 25
lakesuperior/store_layouts/rdf/simple_layout.py

@@ -39,36 +39,34 @@ class SimpleLayout(BaseRdfLayout):
                 if incl_inbound else ''
         inbound_qry = '\nOPTIONAL {{ ?s1 ?p1 {} . }} .'.format(uri.n3()) \
                 if incl_inbound else ''
-        embed_children_qry = '''
-        OPTIONAL {{
-          {0} ldp:contains ?c .
-          ?c ?cp ?co .
-        }}
-        '''.format(uri.n3()) if incl_children and embed_children else ''
-
-        incl_children_qry = '\nFILTER ( ?p != ldp:contains )' \
-                if not incl_children else ''
 
-        srv_mgd_qry = ''
-        if not incl_srv_mgd:
-            for p in srv_mgd_predicates:
-                self._logger.debug('Removing predicate: {}'.format(p))
-                srv_mgd_qry += '\nFILTER ( ?p != {} ) .'.format(p.n3())
-            for t in srv_mgd_types:
-                self._logger.debug('Removing type: {}'.format(t))
-                srv_mgd_qry += '\nMINUS {{ ?s a {} .}} .'.format(t.n3())
+        # Include and/or embed children.
+        embed_children_trp = embed_children_qry = ''
+        if incl_srv_mgd and incl_children:
+            incl_children_qry = ''
+
+            # Embed children.
+            if embed_children:
+                embed_children_trp = '?c ?cp ?co .'
+                embed_children_qry = '''
+                OPTIONAL {{
+                  {0} ldp:contains ?c .
+                  {1}
+                }}
+                '''.format(uri.n3(), embed_children_trp)
+        else:
+            incl_children_qry = '\nFILTER ( ?p != ldp:contains )' \
 
         q = '''
         CONSTRUCT {{
             {uri} ?p ?o .{inb_cnst}
-            ?c ?cp ?co .
+            {embed_chld_t}
         }} WHERE {{
-            {uri} ?p ?o .{inb_qry}{incl_chld}{embed_chld}{omit_srv_mgd}
-            #FILTER (?p != premis:hasMessageDigest) .
+            {uri} ?p ?o .{inb_qry}{incl_chld}{embed_chld}
         }}
         '''.format(uri=uri.n3(), inb_cnst=inbound_construct,
                 inb_qry=inbound_qry, incl_chld=incl_children_qry,
-                embed_chld=embed_children_qry, omit_srv_mgd=srv_mgd_qry)
+                embed_chld_t=embed_children_trp, embed_chld=embed_children_qry)
 
         try:
             qres = self._conn.query(q)
@@ -91,9 +89,9 @@ class SimpleLayout(BaseRdfLayout):
                     Toolbox().uri_to_uuid(rsrc.identifier),
                     rsrc.value(nsc['fcrepo'].created))
         elif rsrc.value(nsc['fcsystem'].tombstone):
-            tombstone_rsrc = rsrc.value(nsc['fcsystem'].tombstone)
             raise TombstoneError(
-                    Toolbox().uri_to_uuid(rsrc.identifier),
+                    Toolbox().uri_to_uuid(
+                            rsrc.value(nsc['fcsystem'].tombstone).identifier),
                     tombstone_rsrc.value(nsc['fcrepo'].created))
 
         return rsrc
@@ -104,8 +102,8 @@ class SimpleLayout(BaseRdfLayout):
         See base_rdf_layout.ask_rsrc_exists.
         '''
         self._logger.info('Checking if resource exists: {}'.format(urn))
-        imr = self.extract_imr(urn, incl_children=False)
-        return len(imr.graph) > 0
+
+        return self._conn.query('ASK {{ {} ?p ?o . }}'.format(urn.n3()))
 
 
     def create_rsrc(self, imr):