For what it's worth, I like to use try-with-resources everywhere. If you are into that design pattern, then you will like this. Also, this solves the problem of empty parts if the entries are larger than the desired part size. You will at least have as many parts as entries in the worst case.
In:
my-archive.zip
Out:
my-archive.part1of3.zip
my-archive.part2of3.zip
my-archive.part3of3.zip
Note: I'm using logging and Apache Commons FilenameUtils, but feel free to use what you have in your toolkit.
/**
* Utility class to split a zip archive into parts (not volumes)
* by attempting to fit as many entries into a single part before
* creating a new part. If a part would otherwise be empty because
* the next entry won't fit, it will be added anyway to avoid empty parts.
*
* @author Eric Draken, 2019
*/
public class Zip
{
private static final int DEFAULT_BUFFER_SIZE = 1024 * 4;
private static final String ZIP_PART_FORMAT = "%s.part%dof%d.zip";
private static final String EXT = "zip";
private static final Logger logger = LoggerFactory.getLogger( MethodHandles.lookup().lookupClass() );
/**
* Split a large archive into smaller parts
*
* @param zipFile Source zip file to split (must end with .zip)
* @param outZipFile Destination zip file base path. The "part" number will be added automatically
* @param approxPartSizeBytes Approximate part size
* @throws IOException Exceptions on file access
*/
public static void splitZipArchive(
@NotNull final File zipFile,
@NotNull final File outZipFile,
final long approxPartSizeBytes ) throws IOException
{
String basename = FilenameUtils.getBaseName( outZipFile.getName() );
Path basePath = outZipFile.getParentFile() != null ? // Check if this file has a parent folder
outZipFile.getParentFile().toPath() :
Paths.get( "" );
String extension = FilenameUtils.getExtension( zipFile.getName() );
if ( !extension.equals( EXT ) )
{
throw new IllegalArgumentException( "The archive to split must end with ." + EXT );
}
// Get a list of entries in the archive
try ( ZipFile zf = new ZipFile( zipFile ) )
{
// Silliness check
long minRequiredSize = zipFile.length() / 100;
if ( minRequiredSize > approxPartSizeBytes )
{
throw new IllegalArgumentException(
"Please select a minimum part size over " + minRequiredSize + " bytes, " +
"otherwise there will be over 100 parts."
);
}
// Loop over all the entries in the large archive
// to calculate the number of parts required
Enumeration<? extends ZipEntry> enumeration = zf.entries();
long partSize = 0;
long totalParts = 1;
while ( enumeration.hasMoreElements() )
{
long nextSize = enumeration.nextElement().getCompressedSize();
if ( partSize + nextSize > approxPartSizeBytes )
{
partSize = 0;
totalParts++;
}
partSize += nextSize;
}
// Silliness check: if there are more parts than there
// are entries, then one entry will occupy one part by contract
totalParts = Math.min( totalParts, zf.size() );
logger.debug( "Split requires {} parts", totalParts );
if ( totalParts == 1 )
{
// No splitting required. Copy file
Path outFile = basePath.resolve(
String.format( ZIP_PART_FORMAT, basename, 1, 1 )
);
Files.copy( zipFile.toPath(), outFile );
logger.debug( "Copied {} to {} (pass-though)", zipFile.toString(), outFile.toString() );
return;
}
// Reset
enumeration = zf.entries();
// Split into parts
int currPart = 1;
ZipEntry overflowZipEntry = null;
while ( overflowZipEntry != null || enumeration.hasMoreElements() )
{
Path outFilePart = basePath.resolve(
String.format( ZIP_PART_FORMAT, basename, currPart++, totalParts )
);
overflowZipEntry = writeEntriesToPart( overflowZipEntry, zf, outFilePart, enumeration, approxPartSizeBytes );
logger.debug( "Wrote {}", outFilePart );
}
}
}
/**
* Write an entry to the to the outFilePart
*
* @param overflowZipEntry ZipEntry that didn't fit in the last part, or null
* @param inZipFile The large archive to split
* @param outFilePart The part of the archive currently being worked on
* @param enumeration Enumeration of ZipEntries
* @param approxPartSizeBytes Approximate part size
* @return Overflow ZipEntry, or null
* @throws IOException File access exceptions
*/
private static ZipEntry writeEntriesToPart(
@Nullable ZipEntry overflowZipEntry,
@NotNull final ZipFile inZipFile,
@NotNull final Path outFilePart,
@NotNull final Enumeration<? extends ZipEntry> enumeration,
final long approxPartSizeBytes
) throws IOException
{
try (
ZipOutputStream zos =
new ZipOutputStream( new FileOutputStream( outFilePart.toFile(), false ) )
)
{
long partSize = 0;
byte[] buffer = new byte[DEFAULT_BUFFER_SIZE];
while ( overflowZipEntry != null || enumeration.hasMoreElements() )
{
ZipEntry entry = overflowZipEntry != null ? overflowZipEntry : enumeration.nextElement();
overflowZipEntry = null;
long entrySize = entry.getCompressedSize();
if ( partSize + entrySize > approxPartSizeBytes )
{
if ( partSize != 0 )
{
return entry; // Finished this part, but return the dangling ZipEntry
}
// Add the entry anyway if the part would otherwise be empty
}
partSize += entrySize;
zos.putNextEntry( entry );
// Get the input stream for this entry and copy the entry
try ( InputStream is = inZipFile.getInputStream( entry ) )
{
int bytesRead;
while ( (bytesRead = is.read( buffer )) != -1 )
{
zos.write( buffer, 0, bytesRead );
}
}
}
return null; // Finished splitting
}
}