See my gist that compares two zip files by their contents, and generate patch file from one zip to the other. For example, if two zip files share one entry but with different content, my gist will be able to find it out; if they have different entries, the gist can also make it. The gist ignores difference in modification time. That said, however, if you only care about a shallow comparison, then hashlib
could be a better choice.
For your reference, code from the gist:
import os
import argparse
import collections
import tempfile
import zipfile
import filecmp
import shutil
import shlex
ZipCmpResult = collections.namedtuple('ZipCmpResult',
['to_rm', 'to_cmp', 'to_add'])
def make_parser():
parser = argparse.ArgumentParser(
description='Make patch zip file from two similar zip files.')
parser.add_argument(
'--oldfile',
default=os.path.join('share', 'old.zip'),
help='default: %(default)s')
parser.add_argument(
'--newfile',
default=os.path.join('share', 'new.zip'),
help='default: %(default)s')
parser.add_argument(
'--toname',
default=os.path.join('share', 'patch'),
help='default: %(default)s')
return parser
def zipcmp(old, new):
with zipfile.ZipFile(old) as zinfile:
old_names = set(zinfile.namelist())
with zipfile.ZipFile(new) as zinfile:
new_names = set(zinfile.namelist())
to_rm = old_names - new_names
to_cmp = old_names & new_names
to_add = new_names - old_names
return ZipCmpResult(to_rm, to_cmp, to_add)
def compare_files(old, new, cmpresult):
with tempfile.TemporaryDirectory() as tmpdir, \
zipfile.ZipFile(old) as zinfile_old, \
zipfile.ZipFile(new) as zinfile_new:
old_dest = os.path.join(tmpdir, 'old')
new_dest = os.path.join(tmpdir, 'new')
os.mkdir(old_dest)
os.mkdir(new_dest)
for filename in cmpresult.to_cmp:
zinfile_old.extract(filename, path=old_dest)
zinfile_new.extract(filename, path=new_dest)
if not filecmp.cmp(
os.path.join(old_dest, filename),
os.path.join(new_dest, filename),
shallow=False):
cmpresult.to_add.add(filename)
def mkpatch(new, cmpresult, to_name):
with zipfile.ZipFile(new) as zinfile, \
zipfile.ZipFile(to_name + '.zip', 'w') as zoutfile:
for filename in cmpresult.to_add:
with zinfile.open(filename) as infile, \
zoutfile.open(filename, 'w') as outfile:
shutil.copyfileobj(infile, outfile)
with open(to_name + '.sh', 'w', encoding='utf-8') as outfile:
outfile.write('#!/bin/sh\n')
for filename in cmpresult.to_rm:
outfile.write('rm {}\n'.format(shlex.quote(filename)))
def main():
args = make_parser().parse_args()
cmpresult = zipcmp(args.oldfile, args.newfile)
compare_files(args.oldfile, args.newfile, cmpresult)
mkpatch(args.newfile, cmpresult, args.toname)
if __name__ == '__main__':
main()
sha-256
, for example) of both files and compare these? – Woden