default_layout.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. import logging
  2. import os
  3. import shutil
  4. from hashlib import sha1
  5. from uuid import uuid4
  6. from lakesuperior.store.ldp_nr.base_non_rdf_layout import BaseNonRdfLayout
  7. logger = logging.getLogger(__name__)
  8. class DefaultLayout(BaseNonRdfLayout):
  9. """
  10. Default file layout.
  11. This is a simple filesystem layout that stores binaries in pairtree folders
  12. in a local filesystem. Parameters can be specified for the
  13. """
  14. @staticmethod
  15. def local_path(root, uuid, bl=4, bc=4):
  16. """
  17. Generate the resource path splitting the resource checksum according to
  18. configuration parameters.
  19. :param str uuid: The resource UUID. This corresponds to the content
  20. checksum.
  21. """
  22. logger.debug('Generating path from uuid: {}'.format(uuid))
  23. term = len(uuid) if bc == 0 else min(bc * bl, len(uuid))
  24. path = [uuid[i : i + bl] for i in range(0, term, bl)]
  25. if bc > 0:
  26. path.append(uuid[term :])
  27. path.insert(0, root)
  28. return '/'.join(path)
  29. def __init__(self, *args, **kwargs):
  30. """Set up path segmentation parameters."""
  31. super().__init__(*args, **kwargs)
  32. self.bl = self.config['pairtree_branch_length']
  33. self.bc = self.config['pairtree_branches']
  34. ## INTERFACE METHODS ##
  35. def bootstrap(self):
  36. """Initialize binary file store."""
  37. try:
  38. shutil.rmtree(self.root)
  39. except FileNotFoundError:
  40. pass
  41. os.makedirs(self.root + '/tmp')
  42. def persist(self, stream, bufsize=8192):
  43. r"""
  44. Store the stream in the file system.
  45. This method handles the file in chunks. for each chunk it writes to a
  46. temp file and adds to a checksum. Once the whole file is written out
  47. to disk and hashed, the temp file is moved to its final location which
  48. is determined by the hash value.
  49. :param IOstream stream: file-like object to persist.
  50. :param int bufsize: Chunk size. 2\*\*12 to 2\*\*15 is a good range.
  51. """
  52. tmp_file = '{}/tmp/{}'.format(self.root, uuid4())
  53. try:
  54. with open(tmp_file, 'wb') as f:
  55. logger.debug('Writing temp file to {}.'.format(tmp_file))
  56. hash = sha1()
  57. size = 0
  58. while True:
  59. buf = stream.read(bufsize)
  60. if not buf:
  61. break
  62. hash.update(buf)
  63. f.write(buf)
  64. size += len(buf)
  65. except:
  66. logger.exception('File write failed on {}.'.format(tmp_file))
  67. os.unlink(tmp_file)
  68. raise
  69. if size == 0:
  70. logger.warn('Zero-length file received.')
  71. # Move temp file to final destination.
  72. uuid = hash.hexdigest()
  73. dst = __class__.local_path(self.root, uuid, self.bl, self.bc)
  74. logger.debug('Saving file to disk: {}'.format(dst))
  75. if not os.access(os.path.dirname(dst), os.X_OK):
  76. os.makedirs(os.path.dirname(dst))
  77. # If the file exists already, don't bother rewriting it.
  78. if os.path.exists(dst):
  79. logger.info(
  80. 'File exists on {}. Not overwriting.'.format(dst))
  81. os.unlink(tmp_file)
  82. else:
  83. os.rename(tmp_file, dst)
  84. return uuid, size
  85. def delete(self, uuid):
  86. """See BaseNonRdfLayout.delete."""
  87. os.unlink(__class__.local_path(self.root, uuid, self.bl, self.bc))