default_layout.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. import os
  2. import shutil
  3. from hashlib import sha1
  4. from uuid import uuid4
  5. from lakesuperior.store.ldp_nr.base_non_rdf_layout import BaseNonRdfLayout
  6. class DefaultLayout(BaseNonRdfLayout):
  7. '''
  8. This is momentarily a stub until more non-RDF layouts use cases are
  9. gathered.
  10. '''
  11. ## INTERFACE METHODS ##
  12. def bootstrap(self):
  13. '''
  14. Initialize binary file store.
  15. '''
  16. try:
  17. shutil.rmtree(self.root)
  18. except FileNotFoundError:
  19. pass
  20. os.makedirs(self.root + '/tmp')
  21. def persist(self, stream, bufsize=8192):
  22. '''
  23. Store the stream in the file system.
  24. This method handles the file in chunks. for each chunk it writes to a
  25. temp file and adds to a checksum. Once the whole file is written out
  26. to disk and hashed, the temp file is moved to its final location which
  27. is determined by the hash value.
  28. @param stream (IOstream): file-like object to persist.
  29. @param bufsize (int) Chunk size. 2**12 to 2**15 is a good range.
  30. '''
  31. tmp_file = '{}/tmp/{}'.format(self.root, uuid4())
  32. try:
  33. with open(tmp_file, 'wb') as f:
  34. self._logger.debug('Writing temp file to {}.'.format(tmp_file))
  35. hash = sha1()
  36. size = 0
  37. while True:
  38. buf = stream.read(bufsize)
  39. if not buf:
  40. break
  41. hash.update(buf)
  42. f.write(buf)
  43. size += len(buf)
  44. except:
  45. self._logger.exception('File write failed on {}.'.format(tmp_file))
  46. os.unlink(tmp_file)
  47. raise
  48. # Move temp file to final destination.
  49. uuid = hash.hexdigest()
  50. dst = self.local_path(uuid)
  51. self._logger.debug('Saving file to disk: {}'.format(dst))
  52. if not os.access(os.path.dirname(dst), os.X_OK):
  53. os.makedirs(os.path.dirname(dst))
  54. # If the file exists already, don't bother rewriting it.
  55. if os.path.exists(dst):
  56. self._logger.info(
  57. 'File exists on {}. Not overwriting.'.format(dst))
  58. os.unlink(tmp_file)
  59. else:
  60. os.rename(tmp_file, dst)
  61. return uuid, size
  62. def delete(self, uuid):
  63. '''
  64. See BaseNonRdfLayout.delete.
  65. '''
  66. os.unlink(self.local_path(uuid))
  67. ## PROTECTED METHODS ##
  68. def local_path(self, uuid):
  69. '''
  70. Generate the resource path splitting the resource checksum according to
  71. configuration parameters.
  72. @param uuid (string) The resource UUID. This corresponds to the content
  73. checksum.
  74. '''
  75. self._logger.debug('Generating path from uuid: {}'.format(uuid))
  76. bl = self.config['pairtree_branch_length']
  77. bc = self.config['pairtree_branches']
  78. term = len(uuid) if bc==0 else min(bc*bl, len(uuid))
  79. path = [ uuid[i:i+bl] for i in range(0, term, bl) ]
  80. if bc > 0:
  81. path.append(uuid[term:])
  82. path.insert(0, self.root)
  83. return '/'.join(path)