import pickle from hashlib import sha1 from rdflib.term import Literal, URIRef, Variable from lakesuperior.core.namespaces import ns_collection as nsc class Digest: ''' Various digest functions. May be merged into something more generic later. ''' @staticmethod def rdf_cksum(g): ''' Generate a checksum for a graph. This is not straightforward because a graph is derived from an unordered data structure (RDF). What this method does is ordering the graph by subject, predicate, object, then creating a pickle string and a checksum of it. N.B. The context of the triples is ignored, so isomorphic graphs would have the same checksum regardless of the context(s) they are found in. @TODO This can be later reworked to use a custom hashing algorithm. @param rdflib.Graph g The graph to be hashed. @return string SHA1 checksum. ''' # Remove the messageDigest property, which at this point is very likely # old. g.remove((Variable('s'), nsc['premis'].messageDigest, Variable('o'))) ord_g = sorted(list(g), key=lambda x : (x[0], x[1], x[2])) hash = sha1(pickle.dumps(ord_g)).hexdigest() return hash @staticmethod def non_rdf_checksum(data): ''' Generate a checksum of non-RDF content. @TODO This can be later reworked to use a custom hashing algorithm. ''' return sha1(data).hexdigest()