فهرست منبع

Sparse path segments (formerly Pairtrees)

* Rename Pairtrees to Path Segments
* Place PS under one dedicated named graph
* Reduce number of triples created with PS
* Change policy: allow PUT and POST in existing path segments
* Update documentation
Stefano Cossu 6 سال پیش
والد
کامیت
6be0879865

+ 11 - 9
lakesuperior/endpoints/ldp.py

@@ -17,6 +17,7 @@ from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
 from lakesuperior.exceptions import (ResourceNotExistsError, TombstoneError,
         ServerManagedTermError, InvalidResourceError, SingleSubjectError,
         ResourceExistsError, IncompatibleLdpTypeError)
+from lakesuperior.model.generic_resource import PathSegment
 from lakesuperior.model.ldp_factory import LdpFactory
 from lakesuperior.model.ldp_nr import LdpNr
 from lakesuperior.model.ldp_rs import LdpRs
@@ -130,9 +131,11 @@ def get_resource(uid, force_rdf=False):
         return _tombstone_response(e, uid)
     else:
         out_headers.update(rsrc.head())
-        if isinstance(rsrc, LdpRs) \
-                or is_accept_hdr_rdf_parsable() \
-                or force_rdf:
+        if (
+                isinstance(rsrc, LdpRs)
+                or isinstance(rsrc, PathSegment)
+                or is_accept_hdr_rdf_parsable()
+                or force_rdf):
             rsp = rsrc.get()
             return negotiate_content(rsp, out_headers)
         else:
@@ -459,17 +462,16 @@ def uuid_for_post(parent_uid, slug=None):
     parent = LdpFactory.from_stored(parent_uid,
             repr_opts={'incl_children' : False})
 
-    if nsc['fcrepo'].Pairtree in parent.types:
-        raise InvalidResourceError(parent.uid,
-                'Resource {} cannot be created under a pairtree.')
+    #if isintance(parent, PathSegment):
+    #    raise InvalidResourceError(parent.uid,
+    #            'Resource {} cannot be created under a pairtree.')
 
     # Set prefix.
     if parent_uid:
-        logger.debug('Parent types: {}'.format(pformat(parent.types)))
-        if nsc['ldp'].Container not in parent.types:
+        if (not isinstance(parent, PathSegment)
+                and nsc['ldp'].Container not in parent.types):
             raise InvalidResourceError(parent_uid,
                     'Parent {} is not a container.')
-
         pfx = parent_uid + '/'
     else:
         pfx = ''

+ 12 - 0
lakesuperior/exceptions.py

@@ -64,6 +64,18 @@ class IncompatibleLdpTypeError(ResourceError):
 
 
 
+class PathSegmentError(ResourceError):
+    '''
+    Raised when a LDP-NR resource is a path segment.
+
+    This may be an expected result and may be handled to return a 200.
+    '''
+    def __str__(self):
+        return self.msg or 'Resource \'{}\' is a path segment.'.format(
+                self.uid)
+
+
+
 class ServerManagedTermError(RuntimeError):
     '''
     Raised in an attempt to change a triple containing a server-managed term.

+ 0 - 0
lakesuperior/model/__init__.py


+ 70 - 5
lakesuperior/model/generic_resource.py

@@ -1,13 +1,49 @@
-from rdflib import Resource
+from flask import current_app, g
+from rdflib.resource import Resource
 
-class GenericResource(Resource):
+from lakesuperior.dictionaries.namespaces import ns_collection as nsc
+from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
+from lakesuperior.store_layouts.ldp_rs.rsrc_centric_layout import PTREE_GR_URI
+
+
+class GenericResource:
     '''
-    Generic RDF resource that extends from rdflib.Resource.
+    Generic RDF resource.
 
-    This should also serve as the base class for LDP resource classes. Some
-    convenience methods missing in that class can also be added here.
+    This may not have a dedicated named graph.
     '''
 
+    def __init__(self, uid):
+        '''
+        Initialize a generic resource.
+        '''
+        self.uid = uid
+        self.urn = nsc['fcres'][uid]
+        self.rdfly = current_app.rdfly
+
+
+    @property
+    def metadata(self):
+        if not hasattr(self, '_metadata'):
+            gr = self.rdfly.get_raw(self.urn)
+            self._metadata = Resource(gr, self.urn)
+
+        return self._metadata
+
+
+    @property
+    def out_graph(self):
+        return self.metadata.graph
+
+
+    def head(self):
+        '''
+        No-op to keep consistency with methods that may request this
+        without knowing if it is a LDP resource or what else.
+        '''
+        return {}
+
+
     def extract(self, p=None, o=None):
         '''
         Extract an in-memory copy of the resource containing either a
@@ -16,3 +52,32 @@ class GenericResource(Resource):
         '''
         # @TODO
         pass
+
+
+class PathSegment(GenericResource):
+    '''
+    Represent a path segment in a URI.
+
+    A path segment is not an LDP resource, and its metadata should be confined
+    to a separate, generic named graph.
+    '''
+    @property
+    def metadata(self):
+        if not hasattr(self, '_metadata'):
+            gr = self.rdfly.get_raw(self.urn, PTREE_GR_URI)
+            self._metadata = Resource(gr, self.urn)
+
+        return self._metadata
+
+
+    def get(self):
+        '''
+        Get an RDF representation of the resource.
+
+        Internal URNs are replaced by global URIs using the endpoint webroot.
+        The resource has very few triples so no namespace manager is used to
+        reduce output size.
+        '''
+        return g.tbox.globalize_graph(self.out_graph)
+
+

+ 6 - 3
lakesuperior/model/ldp_factory.py

@@ -1,6 +1,6 @@
 import logging
 
-#from pprint import pformat
+from pprint import pformat
 
 import rdflib
 
@@ -10,6 +10,7 @@ from rdflib.resource import Resource
 from rdflib.namespace import RDF
 
 from lakesuperior import model
+from lakesuperior.model.generic_resource import PathSegment
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.exceptions import (IncompatibleLdpTypeError,
         InvalidResourceError, ResourceNotExistsError)
@@ -42,8 +43,8 @@ class LdpFactory:
         imr_urn = nsc['fcres'][uid]
 
         rsrc_meta = current_app.rdfly.get_metadata(uid)
-        #__class__._logger.debug('Extracted metadata: {}'.format(
-        #        pformat(set(rsrc_meta.graph))))
+        __class__._logger.debug('Extracted metadata: {}'.format(
+                pformat(set(rsrc_meta.graph))))
         rdf_types = set(rsrc_meta.graph[imr_urn : RDF.type])
 
         if __class__.LDP_NR_TYPE in rdf_types:
@@ -52,6 +53,8 @@ class LdpFactory:
         elif __class__.LDP_RS_TYPE in rdf_types:
             __class__._logger.info('Resource is a LDP-RS.')
             rsrc = model.ldp_rs.LdpRs(uid, repr_opts, **kwargs)
+        elif nsc['fcsystem']['PathSegment'] in rdf_types:
+            return PathSegment(uid)
         else:
             raise ResourceNotExistsError(uid)
 

+ 41 - 36
lakesuperior/model/ldpr.py

@@ -262,9 +262,7 @@ class Ldpr(metaclass=ABCMeta):
     @property
     def out_graph(self):
         '''
-        Retun a globalized graph of the resource's IMR.
-
-        Internal URNs are replaced by global URIs using the endpoint webroot.
+        Retun a graph of the resource's IMR formatted for output.
         '''
         out_gr = Graph()
 
@@ -388,8 +386,11 @@ class Ldpr(metaclass=ABCMeta):
 
     def get(self):
         '''
-        This gets the RDF metadata. The binary retrieval is handled directly
-        by the route.
+        Get an RDF representation of the resource.
+
+        The binary retrieval is handled directly by the router.
+
+        Internal URNs are replaced by global URIs using the endpoint webroot.
         '''
         gr = g.tbox.globalize_graph(self.out_graph)
         gr.namespace_manager = nsm
@@ -897,17 +898,18 @@ class Ldpr(metaclass=ABCMeta):
         parent_uid = ROOT_UID # Defaults to root
         segments = []
         for cparent_uid in rev_search_order:
-            cparent_uid = cparent_uid
-
             if self.rdfly.ask_rsrc_exists(cparent_uid):
+                # If a real parent is found, set that and break the loop.
                 parent_uid = cparent_uid
                 break
             else:
+                # Otherwise, add to the list of segments to be built.
                 segments.append((cparent_uid, cur_child_uid))
                 cur_child_uid = cparent_uid
 
-        for uid, child_uid in segments:
-            self._create_path_segment(uid, child_uid, parent_uid)
+        for segm_uid, next_uid in segments:
+            self.rdfly.add_path_segment(uid=segm_uid, next_uid=next_uid,
+                    child_uid=self.uid, parent_uid=parent_uid)
 
         return parent_uid
 
@@ -923,35 +925,38 @@ class Ldpr(metaclass=ABCMeta):
         )
 
 
-    def _create_path_segment(self, uid, child_uid, real_parent_uid):
-        '''
-        Create a path segment with a non-LDP containment statement.
-
-        If a resource such as `fcres:a/b/c` is created, and neither fcres:a or
-        fcres:a/b exists, we have to create two "hidden" containment statements
-        between a and a/b and between a/b and a/b/c in order to maintain the
-        `containment chain.
-        '''
-        rsrc_uri = nsc['fcres'][uid]
-
-        add_trp = {
-            (rsrc_uri, nsc['fcsystem'].contains, nsc['fcres'][child_uid]),
-            (rsrc_uri, nsc['ldp'].contains, self.urn),
-            (rsrc_uri, RDF.type, nsc['ldp'].Container),
-            (rsrc_uri, RDF.type, nsc['ldp'].BasicContainer),
-            (rsrc_uri, RDF.type, nsc['ldp'].RDFSource),
-            (rsrc_uri, RDF.type, nsc['fcrepo'].Pairtree),
-            (rsrc_uri, nsc['fcrepo'].hasParent, nsc['fcres'][real_parent_uid]),
-        }
+    #def _create_path_segment(self, uid, child_uid, parent_uid):
+    #    '''
+    #    Create a path segment with a non-LDP containment statement.
 
-        self.rdfly.modify_rsrc(
-                uid, add_trp=add_trp)
+    #    If a resource such as `fcres:a/b/c` is created, and neither fcres:a or
+    #    fcres:a/b exists, we have to create two "hidden" containment statements
+    #    between a and a/b and between a/b and a/b/c in order to maintain the
+    #    containment chain.
 
-        # If the path segment is just below root
-        if '/' not in uid:
-            self.rdfly.modify_rsrc(ROOT_UID, add_trp={
-                (ROOT_RSRC_URI, nsc['fcsystem'].contains, nsc['fcres'][uid])
-            })
+    #    These triples are stored separately and are not versioned.
+    #    '''
+    #    rsrc_uri = nsc['fcres'][uid]
+
+    #    add_trp = {
+    #        (rsrc_uri, nsc['fcsystem'].contains, nsc['fcres'][child_uid]),
+    #        (rsrc_uri, nsc['ldp'].contains, self.urn),
+    #        (rsrc_uri, RDF.type, nsc['ldp'].Container),
+    #        (rsrc_uri, RDF.type, nsc['ldp'].BasicContainer),
+    #        (rsrc_uri, RDF.type, nsc['ldp'].RDFSource),
+    #        (rsrc_uri, RDF.type, nsc['fcrepo'].Pairtree),
+    #        (rsrc_uri, nsc['fcrepo'].hasParent, nsc['fcres'][real_parent_uid]),
+    #    }
+
+    #    self.rdfly.add_segment(nsc['fcres'][uid], next=self.urn,
+    #            child=nsc['fcres'][child_uid],
+    #            parent=nsc['fcres'][parent_uid])
+
+    #    # If the path segment is just below root
+    #    if '/' not in uid:
+    #        self.rdfly.modify_rsrc(ROOT_UID, add_trp={
+    #            (ROOT_RSRC_URI, nsc['fcsystem'].contains, nsc['fcres'][uid])
+    #        })
 
 
     def _add_ldp_dc_ic_rel(self, cont_rsrc):

+ 72 - 3
lakesuperior/store_layouts/ldp_rs/rsrc_centric_layout.py

@@ -12,11 +12,12 @@ from rdflib.term import Literal
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
 from lakesuperior.exceptions import (InvalidResourceError,
-        ResourceNotExistsError, TombstoneError)
+        ResourceNotExistsError, TombstoneError, PathSegmentError)
 
 
 META_GR_URI = nsc['fcsystem']['meta']
 HIST_GR_URI = nsc['fcsystem']['histmeta']
+PTREE_GR_URI = nsc['fcsystem']['pairtree']
 VERS_CONT_LABEL = 'fcr:versions'
 
 
@@ -161,6 +162,32 @@ class RsrcCentricLayout:
         self.ds.store.close()
 
 
+    def get_raw(self, uri, ctx):
+        '''
+        Get a raw graph of a non-LDP resource.
+
+        The graph is queried across all contexts or within a specific one.
+
+        @param s(rdflib.term.URIRef) URI of the subject.
+        @param ctx (rdflib.term.URIRef) URI of the optional context. If None,
+        all named graphs are queried.
+
+        return rdflib.Graph
+        '''
+        bindings = {'s': uri}
+        if ctx:
+            bindings['g'] = ctx
+
+        qry = '''
+        CONSTRUCT { ?s ?p ?o . } {
+          GRAPH ?g {
+            ?s ?p ?o .
+          }
+        }'''
+
+        return self._parse_construct(qry, init_bindings=bindings)
+
+
     def extract_imr(
                 self, uid, ver_uid=None, strict=True, incl_inbound=False,
                 incl_children=True, embed_children=False, **kwargs):
@@ -192,8 +219,8 @@ class RsrcCentricLayout:
         if incl_inbound and len(gr):
             gr += self.get_inbound_rel(nsc['fcres'][uid])
 
-        #self._logger.debug('Found resource: {}'.format(
-        #        gr.serialize(format='turtle').decode('utf-8')))
+        self._logger.debug('Found resource: {}'.format(
+                gr.serialize(format='turtle').decode('utf-8')))
         rsrc = Resource(gr, nsc['fcres'][uid])
 
         if strict:
@@ -218,6 +245,15 @@ class RsrcCentricLayout:
         if ver_uid:
             uid = self.snapshot_uid(uid, ver_uid)
         gr = self.ds.graph(nsc['fcadmin'][uid]) | Graph()
+        if not len(gr):
+            # If no resource is found, search in pairtree graph.
+            try:
+                gr = self.ds.graph(PTREE_GR_URI).query(
+                        'CONSTRUCT WHERE {?s ?p ?o}',
+                        initBindings={'s': nsc['fcres'][uid]}).graph
+            except ResultException:
+                gr = Graph()
+
         rsrc = Resource(gr, nsc['fcres'][uid])
         if strict:
             self._check_rsrc_status(rsrc)
@@ -387,6 +423,39 @@ class RsrcCentricLayout:
         return '{}/{}/{}'.format(uid, VERS_CONT_LABEL, ver_uid)
 
 
+    def add_path_segment(self, uid, next_uid, parent_uid, child_uid):
+        '''
+        Add a pairtree segment.
+
+        @param uid (string) The UID of the subject.
+        @param next_uid (string) UID of the next step down. This may be an LDP
+        resource or another segment.
+        @param parent_uid (string) UID of the actual resource(s) that contains
+        the segment.
+        @param child_uid (string) UID of the LDP resource contained by the
+        segment.
+        '''
+        props = (
+            (RDF.type, nsc['fcsystem'].PathSegment),
+            (nsc['fcsystem'].contains, nsc['fcres'][next_uid]),
+            (nsc['ldp'].contains, nsc['fcres'][child_uid]),
+            #(RDF.type, nsc['ldp'].Container),
+            #(RDF.type, nsc['ldp'].BasicContainer),
+            #(RDF.type, nsc['ldp'].RDFSource),
+            #(RDF.type, nsc['fcrepo'].Pairtree),
+            (nsc['fcrepo'].hasParent, nsc['fcres'][parent_uid]),
+        )
+        for p, o in props:
+            self.ds.graph(PTREE_GR_URI).add((nsc['fcres'][uid], p, o))
+
+
+    def delete_path_segment(self, uid):
+        '''
+        Delete a pairtree segment.
+        '''
+        self.ds.graph(PTREE_GR_URI).delete((nsc['fcres'][uid], None, None))
+
+
     ## PROTECTED MEMBERS ##
 
     def _check_rsrc_status(self, rsrc):

+ 3 - 2
tests/endpoints/test_ldp.py

@@ -80,8 +80,9 @@ class TestLdp:
 
         assert self.client.get(path).status_code == 200
 
-        assert self.client.put('/ldp/test_tree/a').status_code == 409
-        assert self.client.post('/ldp/test_tree/a').status_code == 409
+        assert self.client.put('/ldp/test_tree/a').status_code == 201
+        assert self.client.post('/ldp/test_tree/a').status_code == 201
+        # @TODO More thorough testing of contents
 
 
     def test_put_nested_tree(self, client):