default_layout.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. import os
  2. from hashlib import sha1
  3. from uuid import uuid4
  4. from lakesuperior.store_layouts.non_rdf.base_non_rdf_layout import \
  5. BaseNonRdfLayout
  6. class DefaultLayout(BaseNonRdfLayout):
  7. '''
  8. This is momentarily a stub until more non-RDF layouts use cases are
  9. gathered.
  10. '''
  11. ## INTERFACE METHODS ##
  12. def persist(self, stream, bufsize=8192):
  13. '''
  14. Store the stream in the file system.
  15. This method handles the file in chunks. for each chunk it writes to a
  16. temp file and adds to a checksum. Once the whole file is written out
  17. to disk and hashed, the temp file is moved to its final location which
  18. is determined by the hash value.
  19. @param stream (IOstream): file-like object to persist.
  20. @param bufsize (int) Chunk size. 2**12 to 2**15 is a good range.
  21. '''
  22. tmp_file = '{}/tmp/{}'.format(self.root, uuid4())
  23. try:
  24. with open(tmp_file, 'wb') as f:
  25. self._logger.debug('Writing temp file to {}.'.format(tmp_file))
  26. hash = sha1()
  27. while True:
  28. buf = stream.read(bufsize)
  29. if not buf:
  30. break
  31. hash.update(buf)
  32. f.write(buf)
  33. except:
  34. self._logger.exception('File write failed on {}.'.format(tmp_file))
  35. os.unlink(tmp_file)
  36. raise
  37. # Move temp file to final destination.
  38. uuid = hash.hexdigest()
  39. dst = self.local_path(uuid)
  40. self._logger.debug('Saving file to disk: {}'.format(dst))
  41. if not os.access(os.path.dirname(dst), os.X_OK):
  42. os.makedirs(os.path.dirname(dst))
  43. # If the file exists already, don't bother rewriting it.
  44. if os.path.exists(dst):
  45. self._logger.info(
  46. 'File exists on {}. Not overwriting.'.format(dst))
  47. os.unlink(tmp_file)
  48. else:
  49. os.rename(tmp_file, dst)
  50. return uuid
  51. def delete(self, uuid):
  52. '''
  53. See BaseNonRdfLayout.delete.
  54. '''
  55. os.unlink(self.local_path(uuid))
  56. ## PROTECTED METHODS ##
  57. def local_path(self, uuid):
  58. '''
  59. Generate the resource path splitting the resource checksum according to
  60. configuration parameters.
  61. @param uuid (string) The resource UUID. This corresponds to the content
  62. checksum.
  63. '''
  64. self._logger.debug('Generating path from uuid: {}'.format(uuid))
  65. bl = self._conf['pairtree_branch_length']
  66. bc = self._conf['pairtree_branches']
  67. term = len(uuid) if bc==0 else min(bc*bl, len(uuid))
  68. path = [ uuid[i:i+bl] for i in range(0, term, bl) ]
  69. if bc > 0:
  70. path.append(uuid[term:])
  71. path.insert(0, self.root)
  72. return '/'.join(path)