Bläddra i källkod

Refactor store layout loading to be a generic class method; initial
(non-functional) LDP-NR implementation.

Stefano Cossu 7 år sedan
förälder
incheckning
a9e0cbf0f1

+ 5 - 5
etc.skeleton/application.yml

@@ -28,7 +28,6 @@ store:
         webroot: http://localhost:9999/namespace/fcrepo/
         query_ep: sparql
         update_ep: sparql
-        default_graph: http://www.w3.org/ns/sparql-service-description#
         # Optional
         username: <set me>
         password: <set me>
@@ -38,6 +37,8 @@ store:
     # This is for now a POSIX filesystem. Other solutions such as HDFS may be
     # possible in the future.
     ldp_nr:
+        # See store.ldp_rs.layout.
+        layout: default_layout
         # The filesystem path to the root of the binary store.
         path: /data/fcrepo/ldpnr_store
 
@@ -54,10 +55,9 @@ store:
 
         # Max. number of branches to generate. 0 will split the string until
         # it reaches the end.
-        # E.g. if the hash value is 01234567-89ab-cdef-0123-4565789abcdef
-        # (dashes added for readability), and the branch length value is 2, and
-        # the branch number is 4, the path will be 
-        # 01/23/45/67/89abcdef01234565789abcdef. For a value of 0 it will be
+        # E.g. if the hash value is 0123456789abcdef01234565789abcdef and the
+        # branch length value is 2, and the branch number is 4, the path will
+        # be 01/23/45/67/89abcdef01234565789abcdef. For a value of 0 it will be
         # 01/23/45/67/89/ab/cd/ef/01/23/45/67/89/ab/cd/ef. Check your system
         # capabilities for maximum nested directories before setting this to 0,
         # especially with longer hash algorithms.

+ 0 - 4
lakesuperior/connectors/filesystem_connector.py

@@ -1,4 +0,0 @@
-import logging
-
-class FilesystemConnector:
-    pass

+ 22 - 9
lakesuperior/endpoints/ldp.py

@@ -98,17 +98,29 @@ def post_resource(parent):
     except KeyError:
         slug = None
 
-    if 'Content-Type' in request.headers:
-        logger.debug('Content type: {}'.format(request.headers['Content-Type']))
-        if request.headers['Content-Type'] in accept_post_rdf:
-            cls = Ldpc
-        else:
-            cls = LdpNr
-    else:
-        # @TODO guess content type from magic number
+    logger.debug('Content type: {}'.format(request.mimetype))
+    logger.debug('files: {}'.format(request.files))
+    #logger.debug('stream: {}'.format(request.stream))
+    #logger.debug('form: {}'.format(request.form))
+    #logger.debug('data: {}'.format(request.data))
+    if request.mimetype in accept_post_rdf:
         cls = Ldpc
+        data = request.data.decode('utf-8')
+    else:
+        cls = LdpNr
+        if request.mimetype == 'multipart/form-data':
+            # This seems the "right" way to upload a binary file, with a multipart/
+            # form-data MIME type and the file in the `file` field. This however is
+            # not supported by FCREPO4.
+            data = request.files.get('file')
+        else:
+            # This is a less clean way, with the file in the form body and the
+            # request as application/x-www-form-urlencoded.
+            # This is how FCREPO4 accepts binary uploads.
+            data = request.data
 
     logger.info('POSTing resource of type: {}'.format(cls.__name__))
+    #logger.info('POST data: {}'.format(data))
 
     try:
        rsrc = cls.inst_for_post(parent, slug)
@@ -118,7 +130,7 @@ def post_resource(parent):
         return str(e), 409
 
     try:
-        rsrc.post(request.get_data().decode('utf-8'))
+        rsrc.post(data)
     except ServerManagedTermError as e:
         return str(e), 412
 
@@ -138,6 +150,7 @@ def put_resource(uuid):
     rsp_headers = std_headers
     rsrc = Ldpc(uuid)
 
+    logger.debug('form: {}'.format(request.form))
     # Parse headers.
     pref_handling = None
     if 'prefer' in request.headers:

+ 48 - 1
lakesuperior/model/ldp_nr.py

@@ -1,13 +1,60 @@
+from rdflib import Graph
+from rdflib.namespace import RDF, XSD
+from rdflib.resource import Resource
+from rdflib.term import URIRef, Literal, Variable
+
+from lakesuperior.config_parser import config
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.model.ldpr import Ldpr, transactional, must_exist
+from lakesuperior.util.digest import Digest
 
 class LdpNr(Ldpr):
     '''LDP-NR (Non-RDF Source).
 
     Definition: https://www.w3.org/TR/ldp/#ldpnr
     '''
-    pass
 
+    base_types = {
+        nsc['fcrepo'].Binary,
+        nsc['ldp'].NonRDFSource,
+    }
+
+
+    ## LDP METHODS ##
+
+    def get(self, *args, **kwargs):
+        raise NotImplementedError()
+
+
+    def post(self, data):
+        #self._logger.debug('Data: {}'.format(data[:256]))
+        metadata_rsrc = Resource(Graph(), self.urn)
+
+        for t in self.base_types:
+            metadata_rsrc.add(RDF.type, t)
+
+        cksum = Digest.non_rdf_cksum(data)
+        cksum_term = URIRef('urn:sha1:{}'.format(cksum))
+        metadata_rsrc.add(nsc['premis'].hasMessageDigest, cksum_term)
+
+        self._store_binary(data, cksum)
+
+
+
+    def put(self, data):
+        raise NotImplementedError()
+
+
+
+    ## PROTECTED METHODS ##
 
+    def _store_binary(self, data, cksum):
+        '''
+        Move a binary file to persistent storage.
 
+        @param data (bytes) Binary data to store.
+        @param cksum (string) Digest of the data. This is used to determine the
+        file location.
+        '''
+        pass
 

+ 16 - 21
lakesuperior/model/ldpr.py

@@ -13,10 +13,10 @@ from rdflib.resource import Resource
 from rdflib.namespace import RDF, XSD
 
 from lakesuperior.config_parser import config
-from lakesuperior.connectors.filesystem_connector import FilesystemConnector
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.exceptions import InvalidResourceError, \
         ResourceNotExistsError, ServerManagedTermError
+from lakesuperior.store_layouts.rdf.base_rdf_layout import BaseRdfLayout
 from lakesuperior.util.translator import Translator
 
 
@@ -100,6 +100,7 @@ class Ldpr(metaclass=ABCMeta):
     _logger = logging.getLogger(__name__)
 
     rdf_store_layout = config['application']['store']['ldp_rs']['layout']
+    non_rdf_store_layout = config['application']['store']['ldp_nr']['layout']
 
     ## MAGIC METHODS ##
 
@@ -115,21 +116,12 @@ class Ldpr(metaclass=ABCMeta):
         '''
         self.uuid = uuid
 
-        # Dynamically load the store layout indicated in the configuration.
-        store_mod = import_module(
-                'lakesuperior.store_layouts.rdf.{}'.format(
-                        self.rdf_store_layout))
-        rdf_store_cls = getattr(store_mod, Translator.camelcase(
-                self.rdf_store_layout))
-
         self._urn = nsc['fcres'][uuid] if self.uuid is not None \
-                else rdf_store_cls.ROOT_NODE_URN
+                else BaseRdfLayout.ROOT_NODE_URN
 
-        self.rdfly = rdf_store_cls(self._urn)
+        self.rdfly = __class__.load_layout('rdf', self._urn)
+        self.nonrdfly = __class__.load_layout('non_rdf')
 
-        # Same thing coud be done for the filesystem store layout, but we
-        # will keep it simple for now.
-        self.fs = FilesystemConnector()
 
 
     @property
@@ -246,17 +238,20 @@ class Ldpr(metaclass=ABCMeta):
     ## STATIC & CLASS METHODS ##
 
     @classmethod
-    def load_rdf_layout(cls, uuid=None):
+    def load_layout(cls, type, uuid=None):
         '''
         Dynamically load the store layout indicated in the configuration.
-        This essentially replicates the init() code in a static context.
+
+        @param type (string) One of `rdf` or `non_rdf`. Determines the type of
+        layout to be loaded.
+        @param uuid (string) UUID of the base resource. For RDF layouts only.
         '''
-        store_mod = import_module(
-                'lakesuperior.store_layouts.rdf.{}'.format(
-                        cls.rdf_store_layout))
-        rdf_layout_cls = getattr(store_mod, Translator.camelcase(
-                cls.rdf_store_layout))
-        return rdf_layout_cls(uuid)
+        layout_name = getattr(cls, '{}_store_layout'.format(type))
+        store_mod = import_module('lakesuperior.store_layouts.{0}.{1}'.format(
+                type, layout_name))
+        layout_cls = getattr(store_mod, Translator.camelcase(layout_name))
+
+        return layout_cls(uuid) if type=='rdf' else layout_cls()
 
 
     @classmethod

+ 43 - 0
lakesuperior/store_layouts/non_rdf/base_non_rdf_layout.py

@@ -0,0 +1,43 @@
+import logging
+
+from abc import ABCMeta, abstractmethod
+
+from lakesuperior.config_parser import config
+
+
+class BaseNonRdfLayout(metaclass=ABCMeta):
+    '''
+    Abstract class for setting the non-RDF (bitstream) store layout.
+    '''
+
+    _conf = config['application']['store']['ldp_nr']
+    _logger = logging.getLogger(__name__)
+
+
+    def __init__(self):
+        '''
+        Initialize the base non-RDF store layout.
+        '''
+        self.root = self._conf['path']
+
+
+    ## PROTECTED METHODS ##
+
+    def _path(self, hash):
+        '''
+        Generate the resource path splitting the resource checksum according to
+        configuration parameters.
+
+        @param hash (string) The resource hash.
+        '''
+        bl = self._conf['pairtree_branch_length']
+        bc = self._conf['pairtree_branches']
+        term = len(hash) if bc==0 else min(bc*bl, len(hash))
+
+        path = [ hash[i:i+bl] for i in range(0, term, bl) ]
+
+        if bc > 0:
+            path.append(hash[:term])
+        path.insert(0, self.root)
+
+        return '/'.join(path)

+ 9 - 0
lakesuperior/store_layouts/non_rdf/default_layout.py

@@ -0,0 +1,9 @@
+from lakesuperior.store_layouts.non_rdf.base_non_rdf_layout import \
+        BaseNonRdfLayout
+
+class DefaultLayout(BaseNonRdfLayout):
+    '''
+    This is momentarily a stub until more non-RDF layouts use cases are
+    gathered.
+    '''
+    pass