What I need is to be able to extract the files in a .rar file into streams. I'm creating a test case to get a sense of how to use unrar source. I've been searching and tinkering for a while, but I can't figure out how to use the library. I'm surprised I can't even find documentation or a tutorial for it, considering how common .rar archives are.
I've made a bit of progress on my own, but it doesn't always work. Certain files are extracted properly. Other files are jumbled up for some reason (but not completely "garbage" binary data). All I know so far is, usually (but not always):
not working files have
fileInfo.Method = 48
. They appear to be files that have a compression ratio of 100% - i.e. no compressionworking files have
fileInfo.Method = 49
,50
,51
,52
, or53
, which correspond to the compression speeds, Fastest, Fast, Normal, Good, Best
But I have no idea why that is. Still can't find documentation or a working example.
Below is the test case source I have so far and an example rar archive that, when extracted with this program, has both working and not working files.
/* put in the same directory as the unrar source files
* compiling with:
* make clean
* make lib
* g++ rartest.cpp -o rartest libunrar.so -lboost_filesystem
*/
#include <cstring>
#include <iostream>
#include <fstream>
#include <boost/filesystem.hpp>
#define _UNIX
#define RARDLL
#include "dll.hpp"
using namespace std;
namespace fs = boost::filesystem;
//char fileName[100] = "testout0.jpg\0";
//
//// doens't work
//int PASCAL ProcessDataProc(unsigned char* buffer, int buffLen) {
// cout << "writing..." << endl;
// ofstream outFile(fileName);
// cout << buffLen << endl;
// cout << outFile.write((const char*)buffer, buffLen) << endl;
// cout << "done writing..." << endl;
// fileName[7]++;
//}
int CALLBACK CallbackProc(unsigned int msg, long myBuffer, long rarBuffer, long bufferLen) {
switch(msg) {
case UCM_CHANGEVOLUME:
break;
case UCM_PROCESSDATA:
memcpy((char*)myBuffer, (char*)rarBuffer, bufferLen);
break;
case UCM_NEEDPASSWORD:
break;
}
return 1;
}
int main(int argc, char* argv[]) {
if (argc != 2)
return 0;
ifstream archiveStream(argv[1]);
if (!archiveStream.is_open())
cout << "fstream couldn't open file\n";
// declare and set parameters
HANDLE rarFile;
RARHeaderDataEx fileInfo;
RAROpenArchiveDataEx archiveInfo;
memset(&archiveInfo, 0, sizeof(archiveInfo));
archiveInfo.CmtBuf = NULL;
//archiveInfo.OpenMode = RAR_OM_LIST;
archiveInfo.OpenMode = RAR_OM_EXTRACT;
archiveInfo.ArcName = argv[1];
// Open file
rarFile = RAROpenArchiveEx(&archiveInfo);
if (archiveInfo.OpenResult != 0) {
RARCloseArchive(rarFile);
cout << "unrar couldn't open" << endl;
exit(1);
}
fileInfo.CmtBuf = NULL;
cout << archiveInfo.Flags << endl;
// loop through archive
int numFiles = 0;
int fileSize;
int RHCode;
int PFCode;
while(true) {
RHCode = RARReadHeaderEx(rarFile, &fileInfo);
if (RHCode != 0) break;
numFiles++;
fs::path path(fileInfo.FileName);
fileSize = fileInfo.UnpSize;
cout << fileInfo.Method << " " << fileInfo.FileName << " (" << fileInfo.UnpSize << ")" << endl;
char fileBuffer[fileInfo.UnpSize];
// not sure what this does
//RARSetProcessDataProc(rarFile, ProcessDataProc);
// works for some files, but not for others
RARSetCallback(rarFile, CallbackProc, (long) &fileBuffer);
PFCode = RARProcessFile(rarFile, RAR_TEST, NULL, NULL);
// properly extracts to a directory... but I need a stream
// and I don't want to write to disk, read it, and delete from disk
//PFCode = RARProcessFile(rarFile, RAR_EXTRACT, ".", fileInfo.FileName);
// just skips
//PFCode = RARProcessFile(rarFile, RAR_SKIP, NULL, NULL);
if (PFCode != 0) {
RARCloseArchive(rarFile);
cout << "error processing this file\n" << endl;
exit(1);
}
ofstream outFile(path.filename().c_str());
outFile.write(fileBuffer, fileSize);
}
if (RHCode != ERAR_END_ARCHIVE)
cout << "error traversing through archive: " << RHCode << endl;
RARCloseArchive(rarFile);
cout << "num files: " << numFiles << endl;
}
update:
I've found a file that appears to be (claims to be?) the documentation, but according to the file, I'm not doing anything wrong. I think I might be forced to resort to CRC checking the buffers and implementing a workaround if it fails.
solution source (thanks, Denis Krjuchkov!):
/* put in the same directory as the unrar source files
* compiling with:
* make clean
* make lib
* g++ rartest.cpp -o rartest libunrar.so -lboost_filesystem
*/
#include <cstring>
#include <iostream>
#include <fstream>
#include <boost/filesystem.hpp>
#include <boost/crc.hpp>
#define _UNIX
#define RARDLL
#include "dll.hpp"
using namespace std;
namespace fs = boost::filesystem;
//char fileName[100] = "testout0.jpg\0";
//
//// doens't work
//int PASCAL ProcessDataProc(unsigned char* buffer, int buffLen) {
// cout << "writing..." << endl;
// ofstream outFile(fileName);
// cout << buffLen << endl;
// cout << outFile.write((const char*)buffer, buffLen) << endl;
// cout << "done writing..." << endl;
// fileName[7]++;
//}
int CALLBACK CallbackProc(unsigned int msg, long myBufferPtr, long rarBuffer, long bytesProcessed) {
switch(msg) {
case UCM_CHANGEVOLUME:
return -1;
break;
case UCM_PROCESSDATA:
memcpy(*(char**)myBufferPtr, (char*)rarBuffer, bytesProcessed);
*(char**)myBufferPtr += bytesProcessed;
return 1;
break;
case UCM_NEEDPASSWORD:
return -1;
break;
}
}
int main(int argc, char* argv[]) {
if (argc != 2)
return 0;
ifstream archiveStream(argv[1]);
if (!archiveStream.is_open())
cout << "fstream couldn't open file\n";
// declare and set parameters
RARHANDLE rarFile; // I renamed this macro in dll.hpp for my own purposes
RARHANDLE rarFile2;
RARHeaderDataEx fileInfo;
RAROpenArchiveDataEx archiveInfo;
memset(&archiveInfo, 0, sizeof(archiveInfo));
archiveInfo.CmtBuf = NULL;
//archiveInfo.OpenMode = RAR_OM_LIST;
archiveInfo.OpenMode = RAR_OM_EXTRACT;
archiveInfo.ArcName = argv[1];
// Open file
rarFile = RAROpenArchiveEx(&archiveInfo);
rarFile2 = RAROpenArchiveEx(&archiveInfo);
if (archiveInfo.OpenResult != 0) {
RARCloseArchive(rarFile);
cout << "unrar couldn't open" << endl;
exit(1);
}
fileInfo.CmtBuf = NULL;
// cout << archiveInfo.Flags << endl;
// loop through archive
int numFiles = 0;
int fileSize;
int RHCode;
int PFCode;
int crcVal;
bool workaroundUsed = false;
char currDir[2] = ".";
char tmpFile[11] = "buffer.tmp";
while(true) {
RHCode = RARReadHeaderEx(rarFile, &fileInfo);
if (RHCode != 0) break;
RARReadHeaderEx(rarFile2, &fileInfo);
numFiles++;
fs::path path(fileInfo.FileName);
fileSize = fileInfo.UnpSize;
crcVal = fileInfo.FileCRC;
cout << dec << fileInfo.Method << " " << fileInfo.FileName << " (" << fileInfo.UnpSize << ")" << endl;
cout << " " << hex << uppercase << crcVal << endl;
char fileBuffer[fileSize];
char* bufferPtr = fileBuffer;
// not sure what this does
//RARSetProcessDataProc(rarFile, ProcessDataProc);
// works for some files, but not for others
RARSetCallback(rarFile, CallbackProc, (long) &bufferPtr);
PFCode = RARProcessFile(rarFile, RAR_TEST, NULL, NULL);
// properly extracts to a directory... but I need a stream
// and I don't want to write to disk, read it, and delete from disk
// PFCode = RARProcessFile(rarFile, RAR_EXTRACT, currDir, fileInfo.FileName);
// just skips
//PFCode = RARProcessFile(rarFile, RAR_SKIP, NULL, NULL);
if (PFCode != 0) {
RARCloseArchive(rarFile);
cout << "error processing this file\n" << endl;
exit(1);
}
// crc check
boost::crc_32_type crc32result;
crc32result.process_bytes(&fileBuffer, fileSize);
cout << " " << hex << uppercase << crc32result.checksum() << endl;
// old workaround - crc check always succeeds now!
if (crcVal == crc32result.checksum()) {
RARProcessFile(rarFile2, RAR_SKIP, NULL, NULL);
}
else {
workaroundUsed = true;
RARProcessFile(rarFile2, RAR_EXTRACT, currDir, tmpFile);
ifstream inFile(tmpFile);
inFile.read(fileBuffer, fileSize);
}
ofstream outFile(path.filename().c_str());
outFile.write(fileBuffer, fileSize);
}
if (workaroundUsed) remove(tmpFile);
if (RHCode != ERAR_END_ARCHIVE)
cout << "error traversing through archive: " << RHCode << endl;
RARCloseArchive(rarFile);
cout << dec << "num files: " << numFiles << endl;
}
buffLen
orfileSize
when reading/writing to buffers though. At this point, I'm about ready to just put the blame on the unrar library. – Dropper