Browse Source

Improve IMR extraction and out_graph method.

* Rework logic to strip server-managed triples only at the output stage.
* Fix 404 returned if the resource exists but filters output an empty
graph.
Stefano Cossu 7 years ago
parent
commit
08777d3880

+ 11 - 1
doc/notes/fcrepo4_deltas.md

@@ -55,6 +55,16 @@ the default.
 If `Prefer` is set to `handling=lenient`, all server-managed triples sent with
 If `Prefer` is set to `handling=lenient`, all server-managed triples sent with
 the payload are ignored.
 the payload are ignored.
 
 
+## "Include" and "Omit" options for children
+
+LAKEsuperior offers an additional `Prefer` header option to exclude all
+references to child resources (i.e. by removing all the `ldp:contains` triples)
+while leaving the other server-managed triples when retrieving a resource:
+
+    Prefer: return=representation; [include | omit]="http://fedora.info/definitions/v4/repository#Children"
+
+The default is `include`.
+
 ## Automatic LDP class assignment
 ## Automatic LDP class assignment
 
 
 Since LAKEsuperior rejects client-provided server-managed triples, and since
 Since LAKEsuperior rejects client-provided server-managed triples, and since
@@ -64,7 +74,7 @@ provided properties: if the `ldp:hasMemberRelation` and
 Container. If in addition to these the `ldp:insertedContentRelation` property
 Container. If in addition to these the `ldp:insertedContentRelation` property
 is present, the resource is an Indirect Container. If any of the first two are
 is present, the resource is an Indirect Container. If any of the first two are
 missing, the resource is a Container (@TODO discuss: shall it be a Basic
 missing, the resource is a Container (@TODO discuss: shall it be a Basic
-Contaner?)
+Container?)
 
 
 ## LDP-NR metadata by content negotiation
 ## LDP-NR metadata by content negotiation
 
 

+ 50 - 24
lakesuperior/model/ldpr.py

@@ -14,6 +14,8 @@ from rdflib.resource import Resource
 from rdflib.namespace import RDF, XSD
 from rdflib.namespace import RDF, XSD
 
 
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
+from lakesuperior.dictionaries.srv_mgd_terms import  srv_mgd_subjects, \
+        srv_mgd_predicates, srv_mgd_types
 from lakesuperior.exceptions import InvalidResourceError, \
 from lakesuperior.exceptions import InvalidResourceError, \
         ResourceNotExistsError, ServerManagedTermError
         ResourceNotExistsError, ServerManagedTermError
 from lakesuperior.store_layouts.rdf.base_rdf_layout import BaseRdfLayout
 from lakesuperior.store_layouts.rdf.base_rdf_layout import BaseRdfLayout
@@ -129,7 +131,7 @@ class Ldpr(metaclass=ABCMeta):
         self._urn = nsc['fcres'][uuid] if self.uuid is not None \
         self._urn = nsc['fcres'][uuid] if self.uuid is not None \
                 else self.ROOT_NODE_URN
                 else self.ROOT_NODE_URN
 
 
-        self._imr_options = __class__.imr_options(repr_opts)
+        self._imr_options = __class__.set_imr_options(repr_opts)
 
 
 
 
     @property
     @property
@@ -204,7 +206,23 @@ class Ldpr(metaclass=ABCMeta):
 
 
         Internal URNs are replaced by global URIs using the endpoint webroot.
         Internal URNs are replaced by global URIs using the endpoint webroot.
         '''
         '''
-        return Toolbox().globalize_graph(self.imr.graph)
+        # Remove digest hash.
+        self.imr.remove(nsc['premis'].hasMessageDigest)
+
+        if not self._imr_options.setdefault('incl_srv_mgd', False):
+            for p in srv_mgd_predicates:
+                self._logger.debug('Removing predicate: {}'.format(p))
+                self.imr.remove(p)
+            for t in srv_mgd_types:
+                self._logger.debug('Removing type: {}'.format(t))
+                self.imr.remove(RDF.type, t)
+
+        out_g = Toolbox().globalize_graph(self.imr.graph)
+        # Clear IMR because it's been pruned. In the rare case it is needed
+        # after this method, it will be retrieved again.
+        delattr(self, 'imr')
+
+        return out_g
 
 
 
 
     @property
     @property
@@ -346,7 +364,7 @@ class Ldpr(metaclass=ABCMeta):
         rdfly = cls.load_layout('rdf')
         rdfly = cls.load_layout('rdf')
         imr_urn = nsc['fcres'][uuid] if uuid else cls.ROOT_NODE_URN
         imr_urn = nsc['fcres'][uuid] if uuid else cls.ROOT_NODE_URN
         cls._logger.debug('Representation options: {}'.format(repr_opts))
         cls._logger.debug('Representation options: {}'.format(repr_opts))
-        imr_opts = cls.imr_options(repr_opts)
+        imr_opts = cls.set_imr_options(repr_opts)
         imr = rdfly.extract_imr(imr_urn, **imr_opts)
         imr = rdfly.extract_imr(imr_urn, **imr_opts)
         rdf_types = imr.objects(RDF.type)
         rdf_types = imr.objects(RDF.type)
 
 
@@ -410,7 +428,7 @@ class Ldpr(metaclass=ABCMeta):
 
 
 
 
     @classmethod
     @classmethod
-    def imr_options(cls, repr_opts):
+    def set_imr_options(cls, repr_opts):
         '''
         '''
         Set options to retrieve IMR.
         Set options to retrieve IMR.
 
 
@@ -422,9 +440,6 @@ class Ldpr(metaclass=ABCMeta):
         cls._logger.debug('Setting retrieval options from: {}'.format(repr_opts))
         cls._logger.debug('Setting retrieval options from: {}'.format(repr_opts))
         imr_options = {}
         imr_options = {}
 
 
-        #imr_options['embed_children'] = imr_options['incl_inbound'] = False
-        #imr_options['incl_srv_mgd'] = True
-
         if 'value' in repr_opts and repr_opts['value'] == 'minimal':
         if 'value' in repr_opts and repr_opts['value'] == 'minimal':
             imr_options = {
             imr_options = {
                 'embed_children' : False,
                 'embed_children' : False,
@@ -432,23 +447,34 @@ class Ldpr(metaclass=ABCMeta):
                 'incl_inbound' : False,
                 'incl_inbound' : False,
                 'incl_srv_mgd' : False,
                 'incl_srv_mgd' : False,
             }
             }
-        elif 'parameters' in repr_opts:
-            include = repr_opts['parameters']['include'].split(' ') \
-                    if 'include' in repr_opts['parameters'] else []
-            omit = repr_opts['parameters']['omit'].split(' ') \
-                    if 'omit' in repr_opts['parameters'] else []
-
-            cls._logger.debug('Include: {}'.format(include))
-            cls._logger.debug('Omit: {}'.format(omit))
-
-            if str(cls.EMBED_CHILD_RES_URI) in include:
-                    imr_options['embed_children'] = True
-            if str(cls.RETURN_CHILD_RES_URI) in omit:
-                    imr_options['incl_children'] = False
-            if str(cls.RETURN_INBOUND_REF_URI) in include:
-                    imr_options['incl_inbound'] = True
-            if str(cls.RETURN_SRV_MGD_RES_URI) in omit:
-                    imr_options['incl_srv_mgd'] = False
+        else:
+            # Default.
+            imr_options = {
+                'embed_children' : False,
+                'incl_children' : True,
+                'incl_inbound' : False,
+                'incl_srv_mgd' : True,
+            }
+
+            # Override defaults.
+            if 'parameters' in repr_opts:
+                include = repr_opts['parameters']['include'].split(' ') \
+                        if 'include' in repr_opts['parameters'] else []
+                omit = repr_opts['parameters']['omit'].split(' ') \
+                        if 'omit' in repr_opts['parameters'] else []
+
+                cls._logger.debug('Include: {}'.format(include))
+                cls._logger.debug('Omit: {}'.format(omit))
+
+                if str(cls.EMBED_CHILD_RES_URI) in include:
+                        imr_options['embed_children'] = True
+                if str(cls.RETURN_CHILD_RES_URI) in omit:
+                        imr_options['incl_children'] = False
+                if str(cls.RETURN_INBOUND_REF_URI) in include:
+                        imr_options['incl_inbound'] = True
+                if str(cls.RETURN_SRV_MGD_RES_URI) in omit:
+                        imr_options['incl_srv_mgd'] = False
+
         cls._logger.debug('Retrieval options: {}'.format(imr_options))
         cls._logger.debug('Retrieval options: {}'.format(imr_options))
 
 
         return imr_options
         return imr_options

+ 18 - 19
lakesuperior/store_layouts/rdf/base_rdf_layout.py

@@ -114,20 +114,6 @@ class BaseRdfLayout(metaclass=ABCMeta):
         return self.ds.resource(urn)
         return self.ds.resource(urn)
 
 
 
 
-    def out_rsrc(self, urn):
-        '''
-        Graph obtained by querying the triplestore and adding any abstraction
-        and filtering to make up a graph that can be used for read-only,
-        API-facing results. Different layouts can implement this in very
-        different ways, so it is an abstract method.
-
-        @return rdflib.resource.Resource
-        '''
-        imr = self.extract_imr(urn)
-        if not len(imr.graph):
-            raise ResourceNotExistsError
-
-
     def create_or_replace_rsrc(self, imr):
     def create_or_replace_rsrc(self, imr):
         '''Create a resource graph in the main graph if it does not exist.
         '''Create a resource graph in the main graph if it does not exist.
 
 
@@ -172,16 +158,29 @@ class BaseRdfLayout(metaclass=ABCMeta):
     # implement.
     # implement.
 
 
     @abstractmethod
     @abstractmethod
-    def extract_imr(self, uri, strict=False, minimal=False, incl_inbound=False,
-                embed_children=False, incl_srv_mgd=True):
+    def extract_imr(self, uri, strict=False, incl_inbound=False,
+                incl_children=True, embed_children=False, incl_srv_mgd=True):
         '''
         '''
-        Extract an in-memory resource based on the copy of a graph on a subject.
+        Extract an in-memory resource from the dataset restricted to a subject.
+
+        some filtering operations are carried out in this method for
+        performance purposes (e.g. `incl_children` and `embed_children`, i.e.
+        the IMR will never have those properties). Others, such as
+        server-managed triples, are kept in the IMR until they are filtered out
+        when the graph is output with `Ldpr.out_graph`.
 
 
         @param uri (URIRef) Resource URI.
         @param uri (URIRef) Resource URI.
         @param strict (boolean) If set to True, an empty result graph will
         @param strict (boolean) If set to True, an empty result graph will
         raise a `ResourceNotExistsError`.
         raise a `ResourceNotExistsError`.
-        @param inbound (boolean) Whether to pull triples that have the resource
-        URI as their object.
+        @param incl_inbound (boolean) Whether to pull triples that have the
+        resource URI as their object.
+        @param incl_children (boolean) Whether to include all children
+        indicated by `ldp:contains`. This is only effective if `incl_srv_mgd`
+        is True.
+        @param embed_children (boolean) If this and `incl_children` are True,
+        the full graph is retrieved for each of the children.
+        @param incl_srv_mgd (boolean) Whether to include server-managed
+        triples.
         '''
         '''
         pass
         pass
 
 

+ 23 - 25
lakesuperior/store_layouts/rdf/simple_layout.py

@@ -39,36 +39,34 @@ class SimpleLayout(BaseRdfLayout):
                 if incl_inbound else ''
                 if incl_inbound else ''
         inbound_qry = '\nOPTIONAL {{ ?s1 ?p1 {} . }} .'.format(uri.n3()) \
         inbound_qry = '\nOPTIONAL {{ ?s1 ?p1 {} . }} .'.format(uri.n3()) \
                 if incl_inbound else ''
                 if incl_inbound else ''
-        embed_children_qry = '''
-        OPTIONAL {{
-          {0} ldp:contains ?c .
-          ?c ?cp ?co .
-        }}
-        '''.format(uri.n3()) if incl_children and embed_children else ''
-
-        incl_children_qry = '\nFILTER ( ?p != ldp:contains )' \
-                if not incl_children else ''
 
 
-        srv_mgd_qry = ''
-        if not incl_srv_mgd:
-            for p in srv_mgd_predicates:
-                self._logger.debug('Removing predicate: {}'.format(p))
-                srv_mgd_qry += '\nFILTER ( ?p != {} ) .'.format(p.n3())
-            for t in srv_mgd_types:
-                self._logger.debug('Removing type: {}'.format(t))
-                srv_mgd_qry += '\nMINUS {{ ?s a {} .}} .'.format(t.n3())
+        # Include and/or embed children.
+        embed_children_trp = embed_children_qry = ''
+        if incl_srv_mgd and incl_children:
+            incl_children_qry = ''
+
+            # Embed children.
+            if embed_children:
+                embed_children_trp = '?c ?cp ?co .'
+                embed_children_qry = '''
+                OPTIONAL {{
+                  {0} ldp:contains ?c .
+                  {1}
+                }}
+                '''.format(uri.n3(), embed_children_trp)
+        else:
+            incl_children_qry = '\nFILTER ( ?p != ldp:contains )' \
 
 
         q = '''
         q = '''
         CONSTRUCT {{
         CONSTRUCT {{
             {uri} ?p ?o .{inb_cnst}
             {uri} ?p ?o .{inb_cnst}
-            ?c ?cp ?co .
+            {embed_chld_t}
         }} WHERE {{
         }} WHERE {{
-            {uri} ?p ?o .{inb_qry}{incl_chld}{embed_chld}{omit_srv_mgd}
-            #FILTER (?p != premis:hasMessageDigest) .
+            {uri} ?p ?o .{inb_qry}{incl_chld}{embed_chld}
         }}
         }}
         '''.format(uri=uri.n3(), inb_cnst=inbound_construct,
         '''.format(uri=uri.n3(), inb_cnst=inbound_construct,
                 inb_qry=inbound_qry, incl_chld=incl_children_qry,
                 inb_qry=inbound_qry, incl_chld=incl_children_qry,
-                embed_chld=embed_children_qry, omit_srv_mgd=srv_mgd_qry)
+                embed_chld_t=embed_children_trp, embed_chld=embed_children_qry)
 
 
         try:
         try:
             qres = self._conn.query(q)
             qres = self._conn.query(q)
@@ -91,9 +89,9 @@ class SimpleLayout(BaseRdfLayout):
                     Toolbox().uri_to_uuid(rsrc.identifier),
                     Toolbox().uri_to_uuid(rsrc.identifier),
                     rsrc.value(nsc['fcrepo'].created))
                     rsrc.value(nsc['fcrepo'].created))
         elif rsrc.value(nsc['fcsystem'].tombstone):
         elif rsrc.value(nsc['fcsystem'].tombstone):
-            tombstone_rsrc = rsrc.value(nsc['fcsystem'].tombstone)
             raise TombstoneError(
             raise TombstoneError(
-                    Toolbox().uri_to_uuid(rsrc.identifier),
+                    Toolbox().uri_to_uuid(
+                            rsrc.value(nsc['fcsystem'].tombstone).identifier),
                     tombstone_rsrc.value(nsc['fcrepo'].created))
                     tombstone_rsrc.value(nsc['fcrepo'].created))
 
 
         return rsrc
         return rsrc
@@ -104,8 +102,8 @@ class SimpleLayout(BaseRdfLayout):
         See base_rdf_layout.ask_rsrc_exists.
         See base_rdf_layout.ask_rsrc_exists.
         '''
         '''
         self._logger.info('Checking if resource exists: {}'.format(urn))
         self._logger.info('Checking if resource exists: {}'.format(urn))
-        imr = self.extract_imr(urn, incl_children=False)
-        return len(imr.graph) > 0
+
+        return self._conn.query('ASK {{ {} ?p ?o . }}'.format(urn.n3()))
 
 
 
 
     def create_rsrc(self, imr):
     def create_rsrc(self, imr):