default_layout.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. import os
  2. from hashlib import sha1
  3. from uuid import uuid4
  4. from lakesuperior.store.ldp_nr.base_non_rdf_layout import BaseNonRdfLayout
  5. class DefaultLayout(BaseNonRdfLayout):
  6. '''
  7. This is momentarily a stub until more non-RDF layouts use cases are
  8. gathered.
  9. '''
  10. ## INTERFACE METHODS ##
  11. def persist(self, stream, bufsize=8192):
  12. '''
  13. Store the stream in the file system.
  14. This method handles the file in chunks. for each chunk it writes to a
  15. temp file and adds to a checksum. Once the whole file is written out
  16. to disk and hashed, the temp file is moved to its final location which
  17. is determined by the hash value.
  18. @param stream (IOstream): file-like object to persist.
  19. @param bufsize (int) Chunk size. 2**12 to 2**15 is a good range.
  20. '''
  21. tmp_file = '{}/tmp/{}'.format(self.root, uuid4())
  22. try:
  23. with open(tmp_file, 'wb') as f:
  24. self._logger.debug('Writing temp file to {}.'.format(tmp_file))
  25. hash = sha1()
  26. while True:
  27. buf = stream.read(bufsize)
  28. if not buf:
  29. break
  30. hash.update(buf)
  31. f.write(buf)
  32. except:
  33. self._logger.exception('File write failed on {}.'.format(tmp_file))
  34. os.unlink(tmp_file)
  35. raise
  36. # Move temp file to final destination.
  37. uuid = hash.hexdigest()
  38. dst = self.local_path(uuid)
  39. self._logger.debug('Saving file to disk: {}'.format(dst))
  40. if not os.access(os.path.dirname(dst), os.X_OK):
  41. os.makedirs(os.path.dirname(dst))
  42. # If the file exists already, don't bother rewriting it.
  43. if os.path.exists(dst):
  44. self._logger.info(
  45. 'File exists on {}. Not overwriting.'.format(dst))
  46. os.unlink(tmp_file)
  47. else:
  48. os.rename(tmp_file, dst)
  49. return uuid
  50. def delete(self, uuid):
  51. '''
  52. See BaseNonRdfLayout.delete.
  53. '''
  54. os.unlink(self.local_path(uuid))
  55. ## PROTECTED METHODS ##
  56. def local_path(self, uuid):
  57. '''
  58. Generate the resource path splitting the resource checksum according to
  59. configuration parameters.
  60. @param uuid (string) The resource UUID. This corresponds to the content
  61. checksum.
  62. '''
  63. self._logger.debug('Generating path from uuid: {}'.format(uuid))
  64. bl = self.config['pairtree_branch_length']
  65. bc = self.config['pairtree_branches']
  66. term = len(uuid) if bc==0 else min(bc*bl, len(uuid))
  67. path = [ uuid[i:i+bl] for i in range(0, term, bl) ]
  68. if bc > 0:
  69. path.append(uuid[term:])
  70. path.insert(0, self.root)
  71. return '/'.join(path)