123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 |
- import logging
- import os
- import shutil
- from hashlib import sha1
- from uuid import uuid4
- from lakesuperior.store.ldp_nr.base_non_rdf_layout import BaseNonRdfLayout
- logger = logging.getLogger(__name__)
- class DefaultLayout(BaseNonRdfLayout):
- """
- Default file layout.
- This is a simple filesystem layout that stores binaries in pairtree folders
- in a local filesystem. Parameters can be specified for the
- """
- @staticmethod
- def local_path(root, uuid, bl=4, bc=4):
- """
- Generate the resource path splitting the resource checksum according to
- configuration parameters.
- :param str uuid: The resource UUID. This corresponds to the content
- checksum.
- """
- logger.debug('Generating path from uuid: {}'.format(uuid))
- term = len(uuid) if bc == 0 else min(bc * bl, len(uuid))
- path = [uuid[i : i + bl] for i in range(0, term, bl)]
- if bc > 0:
- path.append(uuid[term :])
- path.insert(0, root)
- return '/'.join(path)
- def __init__(self, *args, **kwargs):
- """Set up path segmentation parameters."""
- super().__init__(*args, **kwargs)
- self.bl = self.config['pairtree_branch_length']
- self.bc = self.config['pairtree_branches']
- ## INTERFACE METHODS ##
- def bootstrap(self):
- """Initialize binary file store."""
- try:
- shutil.rmtree(self.root)
- except FileNotFoundError:
- pass
- os.makedirs(self.root + '/tmp')
- def persist(self, stream, bufsize=8192):
- """
- Store the stream in the file system.
- This method handles the file in chunks. for each chunk it writes to a
- temp file and adds to a checksum. Once the whole file is written out
- to disk and hashed, the temp file is moved to its final location which
- is determined by the hash value.
- :param IOstream stream:: file-like object to persist.
- :param int bufsize: Chunk size. 2**12 to 2**15 is a good range.
- """
- tmp_file = '{}/tmp/{}'.format(self.root, uuid4())
- try:
- with open(tmp_file, 'wb') as f:
- logger.debug('Writing temp file to {}.'.format(tmp_file))
- hash = sha1()
- size = 0
- while True:
- buf = stream.read(bufsize)
- if not buf:
- break
- hash.update(buf)
- f.write(buf)
- size += len(buf)
- except:
- logger.exception('File write failed on {}.'.format(tmp_file))
- os.unlink(tmp_file)
- raise
- if size == 0:
- logger.warn('Zero-length file received.')
- # Move temp file to final destination.
- uuid = hash.hexdigest()
- dst = __class__.local_path(self.root, uuid, self.bl, self.bc)
- logger.debug('Saving file to disk: {}'.format(dst))
- if not os.access(os.path.dirname(dst), os.X_OK):
- os.makedirs(os.path.dirname(dst))
- # If the file exists already, don't bother rewriting it.
- if os.path.exists(dst):
- logger.info(
- 'File exists on {}. Not overwriting.'.format(dst))
- os.unlink(tmp_file)
- else:
- os.rename(tmp_file, dst)
- return uuid, size
- def delete(self, uuid):
- """See BaseNonRdfLayout.delete."""
- os.unlink(__class__.local_path(self.root, uuid, self.bl, self.bc))
|