default_layout.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. import logging
  2. import os
  3. import shutil
  4. from hashlib import sha1
  5. from uuid import uuid4
  6. from lakesuperior.store.ldp_nr.base_non_rdf_layout import BaseNonRdfLayout
  7. logger = logging.getLogger(__name__)
  8. class DefaultLayout(BaseNonRdfLayout):
  9. '''
  10. Default file layout.
  11. '''
  12. ## INTERFACE METHODS ##
  13. def bootstrap(self):
  14. '''
  15. Initialize binary file store.
  16. '''
  17. try:
  18. shutil.rmtree(self.root)
  19. except FileNotFoundError:
  20. pass
  21. os.makedirs(self.root + '/tmp')
  22. def persist(self, stream, bufsize=8192):
  23. '''
  24. Store the stream in the file system.
  25. This method handles the file in chunks. for each chunk it writes to a
  26. temp file and adds to a checksum. Once the whole file is written out
  27. to disk and hashed, the temp file is moved to its final location which
  28. is determined by the hash value.
  29. @param stream (IOstream): file-like object to persist.
  30. @param bufsize (int) Chunk size. 2**12 to 2**15 is a good range.
  31. '''
  32. tmp_file = '{}/tmp/{}'.format(self.root, uuid4())
  33. try:
  34. with open(tmp_file, 'wb') as f:
  35. logger.debug('Writing temp file to {}.'.format(tmp_file))
  36. hash = sha1()
  37. size = 0
  38. while True:
  39. buf = stream.read(bufsize)
  40. if not buf:
  41. break
  42. hash.update(buf)
  43. f.write(buf)
  44. size += len(buf)
  45. except:
  46. logger.exception('File write failed on {}.'.format(tmp_file))
  47. os.unlink(tmp_file)
  48. raise
  49. if size == 0:
  50. logger.warn('Zero-file size received.')
  51. # Move temp file to final destination.
  52. uuid = hash.hexdigest()
  53. dst = self.local_path(uuid)
  54. logger.debug('Saving file to disk: {}'.format(dst))
  55. if not os.access(os.path.dirname(dst), os.X_OK):
  56. os.makedirs(os.path.dirname(dst))
  57. # If the file exists already, don't bother rewriting it.
  58. if os.path.exists(dst):
  59. logger.info(
  60. 'File exists on {}. Not overwriting.'.format(dst))
  61. os.unlink(tmp_file)
  62. else:
  63. os.rename(tmp_file, dst)
  64. return uuid, size
  65. def delete(self, uuid):
  66. '''
  67. See BaseNonRdfLayout.delete.
  68. '''
  69. os.unlink(self.local_path(uuid))
  70. ## PROTECTED METHODS ##
  71. def local_path(self, uuid):
  72. '''
  73. Generate the resource path splitting the resource checksum according to
  74. configuration parameters.
  75. @param uuid (string) The resource UUID. This corresponds to the content
  76. checksum.
  77. '''
  78. logger.debug('Generating path from uuid: {}'.format(uuid))
  79. bl = self.config['pairtree_branch_length']
  80. bc = self.config['pairtree_branches']
  81. term = len(uuid) if bc==0 else min(bc*bl, len(uuid))
  82. path = [ uuid[i:i+bl] for i in range(0, term, bl) ]
  83. if bc > 0:
  84. path.append(uuid[term:])
  85. path.insert(0, self.root)
  86. return '/'.join(path)