12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152 |
- import pickle
- from hashlib import sha1
- from rdflib.term import Literal, URIRef, Variable
- from lakesuperior.dictionaries.namespaces import ns_collection as nsc
- class Digest:
- '''
- Various digest functions. May be merged into something more generic later.
- '''
- @staticmethod
- def rdf_cksum(g):
- '''
- Generate a checksum for a graph.
- This is not straightforward because a graph is derived from an
- unordered data structure (RDF).
- What this method does is ordering the graph by subject, predicate,
- object, then creating a pickle string and a checksum of it.
- N.B. The context of the triples is ignored, so isomorphic graphs would
- have the same checksum regardless of the context(s) they are found in.
- @TODO This can be later reworked to use a custom hashing algorithm.
- @param rdflib.Graph g The graph to be hashed.
- @return string SHA1 checksum.
- '''
- # Remove the messageDigest property, which very likely reflects the
- # previous state of the resource.
- g.remove((Variable('s'), nsc['premis'].messageDigest, Variable('o')))
- ord_g = sorted(list(g), key=lambda x : (x[0], x[1], x[2]))
- hash = sha1(pickle.dumps(ord_g)).hexdigest()
- return hash
- @staticmethod
- def non_rdf_checksum(data):
- '''
- Generate a checksum of non-RDF content.
- @TODO This can be later reworked to use a custom hashing algorithm.
- '''
- return sha1(data).hexdigest()
|