This is fixed in newer versions of Python, which apply the fix that @josselin suggests. For older versions of Python, you can avoid installing a 3rd party package or modifying source code by monkey-patching. It is not pretty, but it will get the job done:
import zipfile
import struct
# Monkey-patch zipfile._EndRecData64
def _EndRecData64(fpin, offset, endrec):
"""
Read the ZIP64 end-of-archive records and use that to update endrec
"""
try:
fpin.seek(offset - zipfile.sizeEndCentDir64Locator, 2)
except OSError:
# If the seek fails, the file is not large enough to contain a ZIP64
# end-of-archive record, so just return the end record we were given.
return endrec
data = fpin.read(zipfile.sizeEndCentDir64Locator)
if len(data) != zipfile.sizeEndCentDir64Locator:
return endrec
sig, diskno, reloff, disks = struct.unpack(
zipfile.structEndArchive64Locator, data)
if sig != zipfile.stringEndArchive64Locator:
return endrec
if diskno != 0 or disks > 1:
raise zipfile.BadZipFile(
"zipfiles that span multiple disks are not supported")
# Assume no 'zip64 extensible data'
fpin.seek(
offset - zipfile.sizeEndCentDir64Locator - zipfile.sizeEndCentDir64, 2)
data = fpin.read(zipfile.sizeEndCentDir64)
if len(data) != zipfile.sizeEndCentDir64:
return endrec
sig, sz, create_version, read_version, disk_num, disk_dir, \
dircount, dircount2, dirsize, diroffset = \
struct.unpack(zipfile.structEndArchive64, data)
if sig != zipfile.stringEndArchive64:
return endrec
# Update the original endrec using data from the ZIP64 record
endrec[zipfile._ECD_SIGNATURE] = sig
endrec[zipfile._ECD_DISK_NUMBER] = disk_num
endrec[zipfile._ECD_DISK_START] = disk_dir
endrec[zipfile._ECD_ENTRIES_THIS_DISK] = dircount
endrec[zipfile._ECD_ENTRIES_TOTAL] = dircount2
endrec[zipfile._ECD_SIZE] = dirsize
endrec[zipfile._ECD_OFFSET] = diroffset
return endrec
# Overwrite _EndRecData64 with the fixed version
zipfile._EndRecData64 = _EndRecData64
python3.7
can be run with a file whilepython3.6
can't for me. – Pajamas