123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105 |
- import os
- import shutil
- from hashlib import sha1
- from uuid import uuid4
- from lakesuperior.store.ldp_nr.base_non_rdf_layout import BaseNonRdfLayout
- class DefaultLayout(BaseNonRdfLayout):
- '''
- This is momentarily a stub until more non-RDF layouts use cases are
- gathered.
- '''
- ## INTERFACE METHODS ##
- def bootstrap(self):
- '''
- Initialize binary file store.
- '''
- try:
- shutil.rmtree(self.root)
- except FileNotFoundError:
- pass
- os.makedirs(self.root + '/tmp')
- def persist(self, stream, bufsize=8192):
- '''
- Store the stream in the file system.
- This method handles the file in chunks. for each chunk it writes to a
- temp file and adds to a checksum. Once the whole file is written out
- to disk and hashed, the temp file is moved to its final location which
- is determined by the hash value.
- @param stream (IOstream): file-like object to persist.
- @param bufsize (int) Chunk size. 2**12 to 2**15 is a good range.
- '''
- tmp_file = '{}/tmp/{}'.format(self.root, uuid4())
- try:
- with open(tmp_file, 'wb') as f:
- self._logger.debug('Writing temp file to {}.'.format(tmp_file))
- hash = sha1()
- size = 0
- while True:
- buf = stream.read(bufsize)
- if not buf:
- break
- hash.update(buf)
- f.write(buf)
- size += len(buf)
- except:
- self._logger.exception('File write failed on {}.'.format(tmp_file))
- os.unlink(tmp_file)
- raise
- # Move temp file to final destination.
- uuid = hash.hexdigest()
- dst = self.local_path(uuid)
- self._logger.debug('Saving file to disk: {}'.format(dst))
- if not os.access(os.path.dirname(dst), os.X_OK):
- os.makedirs(os.path.dirname(dst))
- # If the file exists already, don't bother rewriting it.
- if os.path.exists(dst):
- self._logger.info(
- 'File exists on {}. Not overwriting.'.format(dst))
- os.unlink(tmp_file)
- else:
- os.rename(tmp_file, dst)
- return uuid, size
- def delete(self, uuid):
- '''
- See BaseNonRdfLayout.delete.
- '''
- os.unlink(self.local_path(uuid))
- ## PROTECTED METHODS ##
- def local_path(self, uuid):
- '''
- Generate the resource path splitting the resource checksum according to
- configuration parameters.
- @param uuid (string) The resource UUID. This corresponds to the content
- checksum.
- '''
- self._logger.debug('Generating path from uuid: {}'.format(uuid))
- bl = self.config['pairtree_branch_length']
- bc = self.config['pairtree_branches']
- term = len(uuid) if bc==0 else min(bc*bl, len(uuid))
- path = [ uuid[i:i+bl] for i in range(0, term, bl) ]
- if bc > 0:
- path.append(uuid[term:])
- path.insert(0, self.root)
- return '/'.join(path)
|