default_layout.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. import logging
  2. import os
  3. import shutil
  4. from hashlib import sha1
  5. from uuid import uuid4
  6. from lakesuperior.store.ldp_nr.base_non_rdf_layout import BaseNonRdfLayout
  7. logger = logging.getLogger(__name__)
  8. class DefaultLayout(BaseNonRdfLayout):
  9. '''
  10. Default file layout.
  11. '''
  12. @staticmethod
  13. def local_path(root, uuid, bl=4, bc=4):
  14. '''
  15. Generate the resource path splitting the resource checksum according to
  16. configuration parameters.
  17. @param uuid (string) The resource UUID. This corresponds to the content
  18. checksum.
  19. '''
  20. logger.debug('Generating path from uuid: {}'.format(uuid))
  21. term = len(uuid) if bc == 0 else min(bc * bl, len(uuid))
  22. path = [uuid[i : i + bl] for i in range(0, term, bl)]
  23. if bc > 0:
  24. path.append(uuid[term :])
  25. path.insert(0, root)
  26. return '/'.join(path)
  27. def __init__(self, *args, **kwargs):
  28. '''
  29. Set up path segmentation parameters.
  30. '''
  31. super().__init__(*args, **kwargs)
  32. self.bl = self.config['pairtree_branch_length']
  33. self.bc = self.config['pairtree_branches']
  34. ## INTERFACE METHODS ##
  35. def bootstrap(self):
  36. '''
  37. Initialize binary file store.
  38. '''
  39. try:
  40. shutil.rmtree(self.root)
  41. except FileNotFoundError:
  42. pass
  43. os.makedirs(self.root + '/tmp')
  44. def persist(self, stream, bufsize=8192):
  45. '''
  46. Store the stream in the file system.
  47. This method handles the file in chunks. for each chunk it writes to a
  48. temp file and adds to a checksum. Once the whole file is written out
  49. to disk and hashed, the temp file is moved to its final location which
  50. is determined by the hash value.
  51. @param stream (IOstream): file-like object to persist.
  52. @param bufsize (int) Chunk size. 2**12 to 2**15 is a good range.
  53. '''
  54. tmp_file = '{}/tmp/{}'.format(self.root, uuid4())
  55. try:
  56. with open(tmp_file, 'wb') as f:
  57. logger.debug('Writing temp file to {}.'.format(tmp_file))
  58. hash = sha1()
  59. size = 0
  60. while True:
  61. buf = stream.read(bufsize)
  62. if not buf:
  63. break
  64. hash.update(buf)
  65. f.write(buf)
  66. size += len(buf)
  67. except:
  68. logger.exception('File write failed on {}.'.format(tmp_file))
  69. os.unlink(tmp_file)
  70. raise
  71. if size == 0:
  72. logger.warn('Zero-length file received.')
  73. # Move temp file to final destination.
  74. uuid = hash.hexdigest()
  75. dst = __class__.local_path(self.root, uuid, self.bl, self.bc)
  76. logger.debug('Saving file to disk: {}'.format(dst))
  77. if not os.access(os.path.dirname(dst), os.X_OK):
  78. os.makedirs(os.path.dirname(dst))
  79. # If the file exists already, don't bother rewriting it.
  80. if os.path.exists(dst):
  81. logger.info(
  82. 'File exists on {}. Not overwriting.'.format(dst))
  83. os.unlink(tmp_file)
  84. else:
  85. os.rename(tmp_file, dst)
  86. return uuid, size
  87. def delete(self, uuid):
  88. '''
  89. See BaseNonRdfLayout.delete.
  90. '''
  91. os.unlink(__class__.local_path(self.root, uuid, self.bl, self.bc))