Bladeren bron

Include leading slash in UIDs (á la filesystem path).

Stefano Cossu 7 jaren geleden
bovenliggende
commit
e9f2e4fd85

+ 4 - 4
lakesuperior/dictionaries/namespaces.py

@@ -14,10 +14,10 @@ core_namespaces = {
         'http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#'),
     #'fcrconfig' : Namespace('http://fedora.info/definitions/v4/config#'),
     'fcrepo' : Namespace('http://fedora.info/definitions/v4/repository#'),
-    'fcadmin' : Namespace('info:fcsystem/graph/admin/'),
-    'fcres' : Namespace('info:fcres/'),
-    'fcmain' : Namespace('info:fcsystem/graph/userdata/_main/'),
-    'fcstruct' : Namespace('info:fcsystem/graph/structure/'),
+    'fcadmin' : Namespace('info:fcsystem/graph/admin'),
+    'fcres' : Namespace('info:fcres'),
+    'fcmain' : Namespace('info:fcsystem/graph/userdata/_main'),
+    'fcstruct' : Namespace('info:fcsystem/graph/structure'),
     'fcsystem' : Namespace('info:fcsystem/'),
     'foaf': Namespace('http://xmlns.com/foaf/0.1/'),
     'iana' : Namespace('http://www.iana.org/assignments/relation/'),

+ 17 - 17
lakesuperior/endpoints/ldp.py

@@ -74,15 +74,21 @@ def bp_url_defaults(endpoint, values):
     if url_prefix is not None:
         values.setdefault('url_prefix', url_prefix)
 
+
 @ldp.url_value_preprocessor
 def bp_url_value_preprocessor(endpoint, values):
     g.url_prefix = values.pop('url_prefix')
     g.webroot = request.host_url + g.url_prefix
+    # Normalize leading slashes for UID.
+    if 'uid' in values:
+        values['uid'] = '/' + values['uid'].lstrip('/')
+    if 'parent_uid' in values:
+        values['parent_uid'] = '/' + values['parent_uid'].lstrip('/')
 
 
 @ldp.before_request
 def log_request_start():
-    logger.info('\n\n** Start {} {} **'.format(request.method, request.url))
+    logger.info('** Start {} {} **'.format(request.method, request.url))
 
 
 @ldp.before_request
@@ -92,7 +98,7 @@ def instantiate_req_vars():
 
 @ldp.after_request
 def log_request_end(rsp):
-    logger.info('** End {} {} **\n\n'.format(request.method, request.url))
+    logger.info('** End {} {} **'.format(request.method, request.url))
 
     return rsp
 
@@ -100,7 +106,7 @@ def log_request_end(rsp):
 ## REST SERVICES ##
 
 @ldp.route('/<path:uid>', methods=['GET'], strict_slashes=False)
-@ldp.route('/', defaults={'uid': ''}, methods=['GET'], strict_slashes=False)
+@ldp.route('/', defaults={'uid': '/'}, methods=['GET'], strict_slashes=False)
 @ldp.route('/<path:uid>/fcr:metadata', defaults={'force_rdf' : True},
         methods=['GET'])
 def get_resource(uid, force_rdf=False):
@@ -115,6 +121,7 @@ def get_resource(uid, force_rdf=False):
     a LDP-NR. This is not available in the API but is used e.g. by the
     `*/fcr:metadata` endpoint. The default is False.
     '''
+    logger.info('UID: {}'.format(uid))
     out_headers = std_headers
     repr_options = defaultdict(dict)
     if 'prefer' in request.headers:
@@ -187,10 +194,10 @@ def get_version(uid, ver_uid):
         return _negotiate_content(g.tbox.globalize_graph(gr))
 
 
-@ldp.route('/<path:parent>', methods=['POST'], strict_slashes=False)
-@ldp.route('/', defaults={'parent': ''}, methods=['POST'],
+@ldp.route('/<path:parent_uid>', methods=['POST'], strict_slashes=False)
+@ldp.route('/', defaults={'parent_uid': '/'}, methods=['POST'],
         strict_slashes=False)
-def post_resource(parent):
+def post_resource(parent_uid):
     '''
     https://www.w3.org/TR/ldp/#ldpr-HTTP_POST
 
@@ -209,8 +216,7 @@ def post_resource(parent):
     if LdpFactory.is_rdf_parsable(mimetype):
         # If the content is RDF, localize in-repo URIs.
         global_rdf = stream.read()
-        local_rdf = global_rdf.replace(
-                g.webroot.encode('utf-8'), nsc['fcres'].encode('utf-8'))
+        local_rdf = g.tbox.localize_payload(global_rdf)
         stream = BytesIO(local_rdf)
         is_rdf = True
     else:
@@ -218,7 +224,7 @@ def post_resource(parent):
 
     try:
         uid = rsrc_api.create(
-                parent, slug, stream=stream, mimetype=mimetype,
+                parent_uid, slug, stream=stream, mimetype=mimetype,
                 handling=handling, disposition=disposition)
     except ResourceNotExistsError as e:
         return str(e), 404
@@ -262,13 +268,7 @@ def put_resource(uid):
     if LdpFactory.is_rdf_parsable(mimetype):
         # If the content is RDF, localize in-repo URIs.
         global_rdf = stream.read()
-        local_rdf = global_rdf.replace(
-            (g.webroot + '/').encode('utf-8'),
-            nsc['fcres'].encode('utf-8')
-        ).replace(
-            g.webroot.encode('utf-8'),
-            nsc['fcres'].encode('utf-8')
-        )
+        local_rdf = g.tbox.localize_payload(global_rdf)
         stream = BytesIO(local_rdf)
         is_rdf = True
     else:
@@ -484,7 +484,7 @@ def _bistream_from_req():
         mimetype = request.mimetype
 
     if mimetype == '' or mimetype == 'application/x-www-form-urlencoded':
-        if stream.limit == 0:
+        if getattr(stream, 'limit', 0) == 0:
             stream = mimetype = None
         else:
             mimetype = 'application/octet-stream'

+ 5 - 5
lakesuperior/exceptions.py

@@ -20,7 +20,7 @@ class ResourceExistsError(ResourceError):
     This usually surfaces at the HTTP level as a 409.
     '''
     def __str__(self):
-        return self.msg or 'Resource /{} already exists.'.format(self.uid)
+        return self.msg or 'Resource {} already exists.'.format(self.uid)
 
 
 
@@ -32,7 +32,7 @@ class ResourceNotExistsError(ResourceError):
     This usually surfaces at the HTTP level as a 404.
     '''
     def __str__(self):
-        return self.msg or 'Resource /{} not found.'.format(self.uid)
+        return self.msg or 'Resource {} not found.'.format(self.uid)
 
 
 
@@ -43,7 +43,7 @@ class InvalidResourceError(ResourceError):
     This usually surfaces at the HTTP level as a 409 or other error.
     '''
     def __str__(self):
-        return self.msg or 'Resource /{} is invalid.'.format(self.uid)
+        return self.msg or 'Resource {} is invalid.'.format(self.uid)
 
 
 
@@ -59,7 +59,7 @@ class IncompatibleLdpTypeError(ResourceError):
 
 
     def __str__(self):
-        return self.msg or 'Invalid content type \'{}\' for resource /{}'.\
+        return self.msg or 'Invalid content type \'{}\' for resource {}'.\
                 format(self.mimetype, self.uid)
 
 
@@ -159,7 +159,7 @@ class TombstoneError(RuntimeError):
 
     def __str__(self):
         return (
-            'Discovered tombstone resource at /{}, departed: {}\n'
+            'Discovered tombstone resource at {}, departed: {}\n'
             'To resurrect this resource, send a POST request to its tombstone.'
             .format(self.uid, self.ts)
         )

+ 1 - 1
lakesuperior/globals.py

@@ -12,7 +12,7 @@ RES_CREATED = '_create_'
 RES_DELETED = '_delete_'
 RES_UPDATED = '_update_'
 
-ROOT_UID = ''
+ROOT_UID = '/'
 ROOT_RSRC_URI = nsc['fcres'][ROOT_UID]
 
 

+ 15 - 26
lakesuperior/model/ldp_factory.py

@@ -15,7 +15,7 @@ from lakesuperior.env import env
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.exceptions import (
         IncompatibleLdpTypeError, InvalidResourceError, ResourceExistsError,
-        ResourceNotExistsError)
+        ResourceNotExistsError, TombstoneError)
 
 
 LDP_NR_TYPE = nsc['ldp'].NonRDFSource
@@ -32,7 +32,7 @@ class LdpFactory:
     '''
     @staticmethod
     def new_container(uid):
-        if not uid:
+        if not uid.startswith('/') or uid == '/':
             raise InvalidResourceError(uid)
         if rdfly.ask_rsrc_exists(uid):
             raise ResourceExistsError(uid)
@@ -105,7 +105,6 @@ class LdpFactory:
             provided_imr = Resource(gr, uri)
 
             # Determine whether it is a basic, direct or indirect container.
-            Ldpr = Ldpr
             if Ldpr.MBR_RSRC_URI in gr.predicates() and \
                     Ldpr.MBR_REL_URI in gr.predicates():
                 if Ldpr.INS_CNT_REL_URI in gr.predicates():
@@ -139,7 +138,7 @@ class LdpFactory:
 
         try:
             types = inst.types
-        except:
+        except (TombstoneError, ResourceNotExistsError):
             types = set()
 
         return inst
@@ -199,32 +198,22 @@ class LdpFactory:
             return uid
 
         # Shortcut!
-        if not path and parent_uid == '':
-            uid = split_if_legacy(str(uuid4()))
-            return uid
+        if not path and parent_uid == '/':
+            return '/' + split_if_legacy(str(uuid4()))
 
-        parent = LdpFactory.from_stored(parent_uid,
-                repr_opts={'incl_children' : False})
+        if not parent_uid.startswith('/'):
+            raise ValueError('Invalid parent UID: {}'.format(parent_uid))
 
-        # Set prefix.
-        if parent_uid:
-            if nsc['ldp'].Container not in parent.types:
-                raise InvalidResourceError(parent_uid,
-                        'Parent {} is not a container.')
-            pfx = parent_uid + '/'
-        else:
-            pfx = ''
+        parent = LdpFactory.from_stored(parent_uid)
+        if nsc['ldp'].Container not in parent.types:
+            raise InvalidResourceError(parent_uid,
+                    'Parent {} is not a container.')
 
-        # Create candidate UID and validate.
         if path:
-            cnd_uid = pfx + path
-            if rdfly.ask_rsrc_exists(cnd_uid):
-                uid = pfx + split_if_legacy(str(uuid4()))
-            else:
-                uid = cnd_uid
-        else:
-            uid = pfx + split_if_legacy(str(uuid4()))
+            cnd_uid = parent_uid + path
+            if not rdfly.ask_rsrc_exists(cnd_uid):
+                return cnd_uid
 
-        return uid
+        return parent_uid + split_if_legacy(str(uuid4()))
 
 

+ 9 - 9
lakesuperior/model/ldpr.py

@@ -734,19 +734,19 @@ class Ldpr(metaclass=ABCMeta):
         This function may recurse up the path tree until an existing container
         is found.
 
-        E.g. if only fcres:a exists:
-        - If fcres:a/b/c/d is being created, a becomes container of
-          fcres:a/b/c/d. Also, containers are created for fcres:a/b and
-          fcres:a/b/c.
-        - If fcres:e is being created, the root node becomes container of
-          fcres:e.
+        E.g. if only fcres:/a exists:
+        - If fcres:/a/b/c/d is being created, a becomes container of
+          fcres:/a/b/c/d. Also, containers are created for fcres:a/b and
+          fcres:/a/b/c.
+        - If fcres:/e is being created, the root node becomes container of
+          fcres:/e.
         '''
         from lakesuperior.model.ldp_factory import LdpFactory
 
-        if '/' in self.uid:
+        if '/' in self.uid.lstrip('/'):
             # Traverse up the hierarchy to find the parent.
-            path_components = self.uid.split('/')
-            cnd_parent_uid = '/'.join(path_components[:-1])
+            path_components = self.uid.lstrip('/').split('/')
+            cnd_parent_uid = '/' + '/'.join(path_components[:-1])
             if rdfly.ask_rsrc_exists(cnd_parent_uid):
                 parent_rsrc = LdpFactory.from_stored(cnd_parent_uid)
                 if nsc['ldp'].Container not in parent_rsrc.types:

+ 2 - 0
lakesuperior/store/ldp_rs/rsrc_centric_layout.py

@@ -259,6 +259,7 @@ class RsrcCentricLayout:
         '''
         See base_rdf_layout.ask_rsrc_exists.
         '''
+        logger.debug('Checking if resource exists: {}'.format(uid))
         meta_gr = self.ds.graph(nsc['fcadmin'][uid])
         return bool(
                 meta_gr[nsc['fcres'][uid] : RDF.type : nsc['fcrepo'].Resource])
@@ -268,6 +269,7 @@ class RsrcCentricLayout:
         '''
         This is an optimized query to get only the administrative metadata.
         '''
+        logger.debug('Getting metadata for: {}'.format(uid))
         if ver_uid:
             uid = self.snapshot_uid(uid, ver_uid)
         gr = self.ds.graph(nsc['fcadmin'][uid]) | Graph()

+ 28 - 15
lakesuperior/toolbox.py

@@ -42,9 +42,7 @@ class Toolbox:
 
         @return URIRef
         '''
-        uri = '{}/{}'.format(g.webroot, uid) if uid else g.webroot
-
-        return URIRef(uri)
+        return URIRef(g.webroot + uid)
 
 
     def uri_to_uid(self, uri):
@@ -55,10 +53,10 @@ class Toolbox:
         if uri.startswith(nsc['fcres']):
             return str(uri).replace(nsc['fcres'], '')
         else:
-            return str(uri).replace(g.webroot, '').strip('/')
+            return '/' + str(uri).replace(g.webroot, '').strip('/')
 
 
-    def localize_string(self, s):
+    def localize_uri_string(self, s):
         '''Convert URIs into URNs in a string using the application base URI.
 
         @param string s Input string.
@@ -68,7 +66,8 @@ class Toolbox:
         if s.strip('/') == g.webroot:
             return str(ROOT_RSRC_URI)
         else:
-            return s.strip('/').replace(g.webroot+'/', str(nsc['fcres']))
+            return s.rstrip('/').replace(
+                    g.webroot, str(nsc['fcres']))
 
 
     def localize_term(self, uri):
@@ -79,7 +78,7 @@ class Toolbox:
 
         @return rdflib.term.URIRef
         '''
-        return URIRef(self.localize_string(str(uri)))
+        return URIRef(self.localize_uri_string(str(uri)))
 
 
     def localize_triple(self, trp):
@@ -110,6 +109,23 @@ class Toolbox:
         return l_gr
 
 
+    def localize_payload(self, data):
+        '''
+        Localize an RDF stream with domain-specific URIs.
+
+        @param data (bytes) Binary RDF data.
+
+        @return bytes
+        '''
+        return data.replace(
+            (g.webroot + '/').encode('utf-8'),
+            (nsc['fcres'] + '/').encode('utf-8')
+        ).replace(
+            g.webroot.encode('utf-8'),
+            (nsc['fcres'] + '/').encode('utf-8')
+        )
+
+
     def localize_ext_str(self, s, urn):
         '''
         Convert global URIs to local in a SPARQL or RDF string.
@@ -118,14 +134,14 @@ class Toolbox:
         of fragments and relative URIs.
 
         This is a 3-pass replacement. First, global URIs whose webroot matches
-        the application ones are replaced with local URNs. Then, relative URIs
-        are converted to absolute using the URN as the base; finally, the
-        root node is appropriately addressed.
+        the application ones are replaced with internal URIs. Then, relative
+        URIs are converted to absolute using the internal URI as the base;
+        finally, the root node is appropriately addressed.
         '''
         esc_webroot = g.webroot.replace('/', '\\/')
         #loc_ptn = r'<({}\/?)?(.*?)?(\?.*?)?(#.*?)?>'.format(esc_webroot)
         loc_ptn1 = r'<{}\/?(.*?)>'.format(esc_webroot)
-        loc_sub1 = '<{}\\1>'.format(nsc['fcres'])
+        loc_sub1 = '<{}/\\1>'.format(nsc['fcres'])
         s1 = re.sub(loc_ptn1, loc_sub1, s)
 
         loc_ptn2 = r'<([#?].*?)?>'
@@ -146,7 +162,7 @@ class Toolbox:
 
         @return string
         '''
-        return s.replace(str(nsc['fcres']), g.webroot + '/')
+        return s.replace(str(nsc['fcres']), g.webroot)
 
 
     def globalize_term(self, urn):
@@ -157,9 +173,6 @@ class Toolbox:
 
         @return rdflib.term.URIRef
         '''
-        if urn == ROOT_RSRC_URI:
-            urn = nsc['fcres']
-
         return URIRef(self.globalize_string(str(urn)))
 
 

+ 8 - 6
tests/endpoints/test_ldp.py

@@ -232,7 +232,8 @@ class TestLdp:
         PREFIX res: <http://example-source.org/res/>
         <> ns:p1 res:bogus ;
           ns:p2 <{0}> ;
-          ns:p3 <{0}/nonexistent> .
+          ns:p3 <{0}/> ;
+          ns:p4 <{0}/nonexistent> .
         '''.format(g.webroot)
         put_rsp = self.client.put('/ldp/test_missing_ref', data=data, headers={
             'content-type': 'text/turtle'})
@@ -245,11 +246,12 @@ class TestLdp:
         gr = Graph().parse(data=resp.data, format='text/turtle')
         assert URIRef('http://example-source.org/res/bogus') in \
                 gr.objects(None, URIRef('http://example.org#p1'))
-        #pdb.set_trace()
-        assert URIRef(g.webroot + '/') in \
-                gr.objects(None, URIRef('http://example.org#p2'))
-        assert URIRef(g.webroot + '/nonexistent') not in \
-                gr.objects(None, URIRef('http://example.org#p3'))
+        assert URIRef(g.webroot + '/') in (
+                gr.objects(None, URIRef('http://example.org#p2')))
+        assert URIRef(g.webroot + '/') in (
+                gr.objects(None, URIRef('http://example.org#p3')))
+        assert URIRef(g.webroot + '/nonexistent') not in (
+                gr.objects(None, URIRef('http://example.org#p4')))
 
 
     def test_post_resource(self, client):

+ 16 - 15
tests/test_toolbox.py

@@ -27,29 +27,30 @@ class TestToolbox:
     #    assert g.tbox.camelcase('test__input__string') == 'Test_Input_String'
 
     def test_uid_to_uri(self):
-        assert g.tbox.uid_to_uri('1234') == URIRef(g.webroot + '/1234')
+        assert g.tbox.uid_to_uri('/1234') == URIRef(g.webroot + '/1234')
+        assert g.tbox.uid_to_uri('/1/2/34') == URIRef(g.webroot + '/1/2/34')
         assert g.tbox.uid_to_uri('') == URIRef(g.webroot)
 
 
     def test_uri_to_uid(self):
-        assert g.tbox.uri_to_uid(URIRef(g.webroot) + '/test01') == 'test01'
+        assert g.tbox.uri_to_uid(URIRef(g.webroot) + '/test01') == '/test01'
         assert g.tbox.uri_to_uid(URIRef(g.webroot) + '/test01/test02') == \
-                'test01/test02'
-        assert g.tbox.uri_to_uid(URIRef(g.webroot)) == ''
-        assert g.tbox.uri_to_uid(nsc['fcres']['']) == ''
-        assert g.tbox.uri_to_uid(nsc['fcres']['1234']) == '1234'
-        assert g.tbox.uri_to_uid(nsc['fcres']['1234/5678']) == '1234/5678'
+                '/test01/test02'
+        assert g.tbox.uri_to_uid(URIRef(g.webroot)) == '/'
+        assert g.tbox.uri_to_uid(nsc['fcres']['/']) == '/'
+        assert g.tbox.uri_to_uid(nsc['fcres']['/1234']) == '/1234'
+        assert g.tbox.uri_to_uid(nsc['fcres']['/1234/5678']) == '/1234/5678'
 
 
-    def test_localize_string(self):
+    def test_localize_uri_string(self):
         '''
         Test string localization.
         '''
-        assert g.tbox.localize_string(g.webroot + '/test/uid') == \
-                g.tbox.localize_string(g.webroot + '/test/uid/') == \
-                str(nsc['fcres']['test/uid'])
-        assert g.tbox.localize_string(g.webroot) == str(nsc['fcres'][''])
-        assert g.tbox.localize_string('http://bogus.org/test/uid') == \
+        assert g.tbox.localize_uri_string(g.webroot + '/test/uid') == \
+                g.tbox.localize_uri_string(g.webroot + '/test/uid/') == \
+                str(nsc['fcres']['/test/uid'])
+        assert g.tbox.localize_uri_string(g.webroot) == str(nsc['fcres']['/'])
+        assert g.tbox.localize_uri_string('http://bogus.org/test/uid') == \
                 'http://bogus.org/test/uid'
 
 
@@ -59,7 +60,7 @@ class TestToolbox:
         '''
         assert g.tbox.localize_term(g.webroot + '/test/uid') == \
                 g.tbox.localize_term(g.webroot + '/test/uid/') == \
-                nsc['fcres']['test/uid']
+                nsc['fcres']['/test/uid']
 
 
     def test_localize_ext_str(self):
@@ -100,4 +101,4 @@ class TestToolbox:
         '''
 
         assert g.tbox.localize_ext_str(
-                input, nsc['fcres']['123']) == exp_output
+                input, nsc['fcres']['/123']) == exp_output