default_layout.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. import os
  2. import shutil
  3. from hashlib import sha1
  4. from uuid import uuid4
  5. from lakesuperior.store.ldp_nr.base_non_rdf_layout import BaseNonRdfLayout
  6. class DefaultLayout(BaseNonRdfLayout):
  7. '''
  8. Default file layout.
  9. '''
  10. ## INTERFACE METHODS ##
  11. def bootstrap(self):
  12. '''
  13. Initialize binary file store.
  14. '''
  15. try:
  16. shutil.rmtree(self.root)
  17. except FileNotFoundError:
  18. pass
  19. os.makedirs(self.root + '/tmp')
  20. def persist(self, stream, bufsize=8192):
  21. '''
  22. Store the stream in the file system.
  23. This method handles the file in chunks. for each chunk it writes to a
  24. temp file and adds to a checksum. Once the whole file is written out
  25. to disk and hashed, the temp file is moved to its final location which
  26. is determined by the hash value.
  27. @param stream (IOstream): file-like object to persist.
  28. @param bufsize (int) Chunk size. 2**12 to 2**15 is a good range.
  29. '''
  30. tmp_file = '{}/tmp/{}'.format(self.root, uuid4())
  31. try:
  32. with open(tmp_file, 'wb') as f:
  33. self._logger.debug('Writing temp file to {}.'.format(tmp_file))
  34. hash = sha1()
  35. size = 0
  36. while True:
  37. buf = stream.read(bufsize)
  38. if not buf:
  39. break
  40. hash.update(buf)
  41. f.write(buf)
  42. size += len(buf)
  43. except:
  44. self._logger.exception('File write failed on {}.'.format(tmp_file))
  45. os.unlink(tmp_file)
  46. raise
  47. if size == 0:
  48. self._logger.warn('Zero-file size received.')
  49. # Move temp file to final destination.
  50. uuid = hash.hexdigest()
  51. dst = self.local_path(uuid)
  52. self._logger.debug('Saving file to disk: {}'.format(dst))
  53. if not os.access(os.path.dirname(dst), os.X_OK):
  54. os.makedirs(os.path.dirname(dst))
  55. # If the file exists already, don't bother rewriting it.
  56. if os.path.exists(dst):
  57. self._logger.info(
  58. 'File exists on {}. Not overwriting.'.format(dst))
  59. os.unlink(tmp_file)
  60. else:
  61. os.rename(tmp_file, dst)
  62. return uuid, size
  63. def delete(self, uuid):
  64. '''
  65. See BaseNonRdfLayout.delete.
  66. '''
  67. os.unlink(self.local_path(uuid))
  68. ## PROTECTED METHODS ##
  69. def local_path(self, uuid):
  70. '''
  71. Generate the resource path splitting the resource checksum according to
  72. configuration parameters.
  73. @param uuid (string) The resource UUID. This corresponds to the content
  74. checksum.
  75. '''
  76. self._logger.debug('Generating path from uuid: {}'.format(uuid))
  77. bl = self.config['pairtree_branch_length']
  78. bc = self.config['pairtree_branches']
  79. term = len(uuid) if bc==0 else min(bc*bl, len(uuid))
  80. path = [ uuid[i:i+bl] for i in range(0, term, bl) ]
  81. if bc > 0:
  82. path.append(uuid[term:])
  83. path.insert(0, self.root)
  84. return '/'.join(path)