Merge Multiple PDF's into one PDF
Asked Answered
A

7

7

I am having some issues with my code. I am trying to loop through a Drive folder that contains many PDFs and then merge these into one file. When I use my code it just creates a PDF for the last PDF in the Drive folder and not merge them all together as expected.

function MergeFiles(){
  var folder = DocsList.getFolderById('myFolderID'); 
  var files = folder.getFiles(); 
  var blobs = [];    
  for( var i in files )   
    blobs.push(files[i].getBlob().getBytes());
  Logger.log(blobs.push(files[i].getBlob().getBytes()));
  var myPDF = Utilities.newBlob(blobs.pop(), "application/pdf", "newPDF.pdf");
  folder.createFile(myPDF);
}
Affiant answered 14/3, 2013 at 15:55 Comment(3)
Issue 699, a request for a Google apps-script API to "split and merge pdf pages", has been open since July 2011.Liaison
Interesting...maybe it currently can't be done.???Affiant
have you seen this app not a script but a nice drive utilty :-)Fates
S
13

So there is more to this than simply combining the data from each file. The actual usable data for each file is "packaged" with markups and other code (similar to HTML and other document formats). You actually have to decode each PDF file, combine the necessary parts, then re-encode with new "packaging." This requires a working knowledge of the PDF specifications and structure, available freely from Adobe here.

I used this information to write a script sufficient for my needs. It does not account for every possibility, however, so particularly merging any documents requiring PDF-1.4 and higher, this will need quite a bit of work.

/**
 * Merges all given PDF files into one.
 *
 * @param {Folder} directory the folder to store the output file
 * @param {string} name the desired name of the output file
 * @param {File} pdf1 the first PDF file
 * @param {File} pdf2 the second PDF file
 * @param {File} opt_pdf3 [optional] the third PDF file; add as many more as you like
 *
 * @return {File} the merged file
 */
function mergePdfs(directory, name, pdf1, pdf2, opt_pdf3) {

  if (name.slice(-4) != '.pdf') {

    name = name + '.pdf';

  }
  var newObjects = ['1 0 obj\r\n<</Type/Catalog/Pages 2 0 R >>\r\nendobj'];
  var pageAddresses = [];
  for (var argumentIndex = 2; argumentIndex < arguments.length; argumentIndex++) {

    var bytes = arguments[argumentIndex].getBlob().getBytes();
    var xrefByteOffset = '';
    var byteIndex = bytes.length - 1;
    while (!/\sstartxref\s/.test(xrefByteOffset)) {

      xrefByteOffset = String.fromCharCode(bytes[byteIndex]) + xrefByteOffset;
      byteIndex--;

    }
    xrefByteOffset = +(/\s\d+\s/.exec(xrefByteOffset)[0]);
    var objectByteOffsets = [];
    var trailerDictionary = '';
    var rootAddress = '';
    do {

      var xrefTable = '';
      var trailerEndByteOffset = byteIndex;
      byteIndex = xrefByteOffset;
      for (byteIndex; byteIndex <= trailerEndByteOffset; byteIndex++) {

        xrefTable = xrefTable + String.fromCharCode(bytes[byteIndex]);

      }
      xrefTable = xrefTable.split(/\s*trailer\s*/);
      trailerDictionary = xrefTable[1];
      if (objectByteOffsets.length < 1) {

        rootAddress = /\d+\s+\d+\s+R/.exec(/\/Root\s*\d+\s+\d+\s+R/.exec(trailerDictionary)[0])[0].replace('R', 'obj');

      }
      xrefTable = xrefTable[0].split('\n');
      xrefTable.shift();
      while (xrefTable.length > 0) {

        var xrefSectionHeader = xrefTable.shift().split(/\s+/);
        var objectNumber = +xrefSectionHeader[0];
        var numberObjects = +xrefSectionHeader[1];
        for (var entryIndex = 0; entryIndex < numberObjects; entryIndex++) {

          var entry = xrefTable.shift().split(/\s+/);
          objectByteOffsets.push([[objectNumber, +entry[1], 'obj'], +entry[0]]);
          objectNumber++;

        }

      }
      if (/\s*\/Prev/.test(trailerDictionary)) {

        xrefByteOffset = +(/\s*\d+\s/.exec(/\s*\/Prev\s*\d+\s/.exec(trailerDictionary)[0])[0]);

      }

    } while (/\s*\/Prev/.test(trailerDictionary));
    var rootObject = getObject(rootAddress, objectByteOffsets, bytes);
    var pagesAddress = /\d+\s+\d+\s+R/.exec(/\/Pages\s*\d+\s+\d+\s+R/.exec(rootObject)[0])[0].replace('R', 'obj');
    var pagesObject = getObject(pagesAddress, objectByteOffsets, bytes);
    var objects = getDependencies(pagesObject, objectByteOffsets, bytes);
    var newObjectsInsertionIndex = newObjects.length;
    for (var objectIndex = 0; objectIndex < objects.length; objectIndex++) {

      var newObjectAddress = [(newObjects.length + 3) + '', 0 + '', 'obj'];
      if (!Array.isArray(objects[objectIndex])) {

        objects[objectIndex] = [objects[objectIndex]];

      }
      objects[objectIndex].unshift(newObjectAddress);
      var objectAddress = objects[objectIndex][1].match(/\d+\s+\d+\s+obj/)[0].split(/\s+/);
      objects[objectIndex].splice(1, 0, objectAddress);
      if (/\/Type\s*\/Page[^s]/.test(objects[objectIndex][2])) {

        objects[objectIndex][2] = objects[objectIndex][2].replace(/\/Parent\s*\d+\s+\d+\s+R/.exec(objects[objectIndex][2])[0], '/Parent 2 0 R');
        pageAddresses.push(newObjectAddress.join(' ').replace('obj', 'R'));

      }
      var addressRegExp = new RegExp(objectAddress[0] + '\\s+' + objectAddress[1] + '\\s+' + 'obj');
      objects[objectIndex][2] = objects[objectIndex][2].replace(addressRegExp.exec(objects[objectIndex][2])[0], newObjectAddress.join(' '));
      newObjects.push(objects[objectIndex]);

    }
    for (var referencingObjectIndex = newObjectsInsertionIndex; referencingObjectIndex < newObjects.length; referencingObjectIndex++) {

      var references = newObjects[referencingObjectIndex][2].match(/\d+\s+\d+\s+R/g);
      if (references != null) {

        var string = newObjects[referencingObjectIndex][2];
        var referenceIndices = [];
        var currentIndex = 0;
        for (var referenceIndex = 0; referenceIndex < references.length; referenceIndex++) {

          referenceIndices.push([]);
          referenceIndices[referenceIndex].push(string.slice(currentIndex).indexOf(references[referenceIndex]) + currentIndex);
          referenceIndices[referenceIndex].push(references[referenceIndex].length);
          currentIndex += string.slice(currentIndex).indexOf(references[referenceIndex]);

        }
        for (var referenceIndex = 0; referenceIndex < references.length; referenceIndex++) {

          var objectAddress = references[referenceIndex].replace('R', 'obj').split(/\s+/);
          for (var objectIndex = newObjectsInsertionIndex; objectIndex < newObjects.length; objectIndex++) {

            if (arrayEquals(objectAddress, newObjects[objectIndex][1])) {

              var length = string.length;
              newObjects[referencingObjectIndex][2] = string.slice(0, referenceIndices[referenceIndex][0]) + newObjects[objectIndex][0].join(' ').replace('obj', 'R') +
                string.slice(referenceIndices[referenceIndex][0] + referenceIndices[referenceIndex][1]);
              string = newObjects[referencingObjectIndex][2];
              var newLength = string.length;
              if (!(length == newLength)) {

                for (var subsequentReferenceIndex = referenceIndex + 1; subsequentReferenceIndex < references.length; subsequentReferenceIndex++) {

                  referenceIndices[subsequentReferenceIndex][0] += (newLength - length);

                }

              }
              break;

            }

          }

        }

      }

    }
    for (var objectIndex = newObjectsInsertionIndex; objectIndex < newObjects.length; objectIndex++) {

      if (Array.isArray(newObjects[objectIndex])) {

        if (newObjects[objectIndex][3] != undefined) {

          newObjects[objectIndex] = newObjects[objectIndex].slice(2);

        } else {

          newObjects[objectIndex] = newObjects[objectIndex][2];

        }

      }

    }

  }
  newObjects.splice(1, 0, '2 0 obj\r\n<</Type/Pages/Count ' + pageAddresses.length + ' /Kids [' + pageAddresses.join(' ') + ' ]>>\r\nendobj');
  newObjects.splice(2, 0, '3 0 obj\r\n<</Title (' + name + ') /CreationDate (D' +
       Utilities.formatDate(new Date(), CalendarApp.getDefaultCalendar().getTimeZone(), 'yyyyMMddHHmmssZ').slice(0, -2) + "'00) /ModDate (D" + Utilities.formatDate(new Date(),
       CalendarApp.getDefaultCalendar().getTimeZone(), 'yyyyMMddHHmmssZ').slice(0, -2) + "'00)>>\r\nendobj");
  var byteOffsets = [0];
  var bytes = [];
  var header = '%PDF-1.3\r\n';
  for (var headerIndex = 0; headerIndex < header.length; headerIndex++) {

    bytes.push(header.charCodeAt(headerIndex));

  }
  bytes.push('%'.charCodeAt(0));
  for (var characterCode = -127; characterCode < -123; characterCode++) {

    bytes.push(characterCode);

  }
  bytes.push('\r'.charCodeAt(0));
  bytes.push('\n'.charCodeAt(0));
  while (newObjects.length > 0) {

    byteOffsets.push(bytes.length);
    var object = newObjects.shift();
    if (Array.isArray(object)) {

      var streamKeyword = /stream\s*\n/.exec(object[0])[0];
      if (streamKeyword.indexOf('\n\n') > streamKeyword.length - 3) {

        streamKeyword = streamKeyword.slice(0, -1);

      } else if (streamKeyword.indexOf('\r\n\r\n') > streamKeyword.length - 5) {

        streamKeyword = streamKeyword.slice(0, -2);

      }
      var streamIndex = object[0].indexOf(streamKeyword) + streamKeyword.length;
      for (var objectIndex = 0; objectIndex < streamIndex; objectIndex++) {

        bytes.push(object[0].charCodeAt(objectIndex))

      }
      bytes = bytes.concat(object[1]);
      for (var objectIndex = streamIndex; objectIndex < object[0].length; objectIndex++) {

        bytes.push(object[0].charCodeAt(objectIndex));

      }

    } else {

      for (var objectIndex = 0; objectIndex < object.length; objectIndex++) {

        bytes.push(object.charCodeAt(objectIndex));

      }

    }
    bytes.push('\r'.charCodeAt(0));
    bytes.push('\n'.charCodeAt(0));

  }
  var xrefByteOffset = bytes.length;
  var xrefHeader = 'xref\r\n';
  for (var xrefHeaderIndex = 0; xrefHeaderIndex < xrefHeader.length; xrefHeaderIndex++) {

    bytes.push(xrefHeader.charCodeAt(xrefHeaderIndex));

  }
  var xrefSectionHeader = '0 ' + byteOffsets.length + '\r\n';
  for (var xrefSectionHeaderIndex = 0; xrefSectionHeaderIndex < xrefSectionHeader.length; xrefSectionHeaderIndex++) {

    bytes.push(xrefSectionHeader.charCodeAt(xrefSectionHeaderIndex));

  }
  for (var byteOffsetIndex = 0; byteOffsetIndex < byteOffsets.length; byteOffsetIndex++) {

    for (var byteOffsetStringIndex = 0; byteOffsetStringIndex < 10; byteOffsetStringIndex++) {

      bytes.push(Utilities.formatString('%010d', byteOffsets[byteOffsetIndex]).charCodeAt(byteOffsetStringIndex));

    }
    bytes.push(' '.charCodeAt(0));
    if (byteOffsetIndex == 0) {

      for (var generationStringIndex = 0; generationStringIndex < 5; generationStringIndex++) {

        bytes.push('65535'.charCodeAt(generationStringIndex));

      }
      for (var keywordIndex = 0; keywordIndex < 2; keywordIndex++) {

        bytes.push(' f'.charCodeAt(keywordIndex));

      }

    } else {

      for (var generationStringIndex = 0; generationStringIndex < 5; generationStringIndex++) {

        bytes.push('0'.charCodeAt(0));

      }
      for (var keywordIndex = 0; keywordIndex < 2; keywordIndex++) {

        bytes.push(' n'.charCodeAt(keywordIndex));

      }

    }
    bytes.push('\r'.charCodeAt(0));
    bytes.push('\n'.charCodeAt(0));

  }
  for (var trailerHeaderIndex = 0; trailerHeaderIndex < 9; trailerHeaderIndex++) {

    bytes.push('trailer\r\n'.charCodeAt(trailerHeaderIndex));

  }
  var idBytes = Utilities.computeDigest(Utilities.DigestAlgorithm.MD5, (new Date).toString());
  var id = '';
  for (var idByteIndex = 0; idByteIndex < idBytes.length; idByteIndex++) {

    id = id + ('0' + (idBytes[idByteIndex] & 0xFF).toString(16)).slice(-2);

  }
  var trailer = '<</Size ' + (byteOffsets.length) + ' /Root 1 0 R /Info 2 0 R /ID [<' + id + '> <' + id + '>]>>\r\nstartxref\r\n' + xrefByteOffset + '\r\n%%EOF';
  for (var trailerIndex = 0; trailerIndex < trailer.length; trailerIndex++) {

    bytes.push(trailer.charCodeAt(trailerIndex));

  }
  return directory.createFile(Utilities.newBlob(bytes, 'application/pdf', name));
  function getObject(objectAddress, objectByteOffsets, bytes) {

    objectAddress = objectAddress.split(/\s+/);
    for (var addressIndex = 0; addressIndex < 2; addressIndex++) {

      objectAddress[addressIndex] = +objectAddress[addressIndex];

    }
    var object = [];
    var byteIndex = 0;
    for each (var offset in objectByteOffsets) {

      if (arrayEquals(objectAddress, offset[0])) {

        byteIndex = offset[1];
        break;

      }

    }
    object.push('');
    while (object[0].indexOf('endobj') <= -1) {

      if (/stream\s*\n/.test(object[0])) {

        var streamLength;
        var lengthFinder = object[0].slice(object[0].indexOf(/\/Length/.exec(object[0])[0]));
        if (/\/Length\s*\d+\s+\d+\s+R/.test(lengthFinder)) {

          var lengthObjectAddress = /\d+\s+\d+\s+R/.exec(/\/Length\s*\d+\s+\d+\s+R/.exec(lengthFinder)[0])[0].split(/\s+/);
          lengthObjectAddress[2] = 'obj';
          for (var addressIndex = 0; addressIndex < 2; addressIndex++) {

            lengthObjectAddress[addressIndex] = +lengthObjectAddress[addressIndex];

          }
          var lengthObject = ''
          var lengthByteIndex = 0;
          for each (var offset in objectByteOffsets) {

            if (arrayEquals(lengthObjectAddress, offset[0])) {

              lengthByteIndex = offset[1];
              break;

            }

          }
          while (lengthObject.indexOf('endobj') <= -1) {

            lengthObject = lengthObject + String.fromCharCode(bytes[lengthByteIndex]);
            lengthByteIndex++;

          }
          streamLength = +(lengthObject.match(/obj\s*\n\s*\d+\s*\n\s*endobj/)[0].match(/\d+/)[0]);

        } else {

          streamLength = +(/\d+/.exec(lengthFinder)[0]);

        }
        var streamBytes = bytes.slice(byteIndex, byteIndex + streamLength);
        object.push(streamBytes);
        byteIndex += streamLength;
        while (object[0].indexOf('endobj') <= -1) {

          object[0] = object[0] + String.fromCharCode(bytes[byteIndex]);
          byteIndex++;

        }
        return object;

      }
      object[0] = object[0] + String.fromCharCode(bytes[byteIndex]);
      byteIndex++;

    }
    return object[0];

  }
  function arrayEquals(array1, array2) {

    if (array1 == array2) {

      return true;

    }
    if (array1 == null && array2 == null) {

      return true;

    } else if (array1 == null || array2 == null) {

      return false;

    }
    if (array1.length != array2.length) {

      return false;

    }
    for (var index = 0; index < array1.length; index++) {

      if (Array.isArray(array1[index])) {

        if (!arrayEquals(array1[index], array2[index])) {

          return false;

        }
        continue;

      }
      if (array1[index] != array2[index]) {

        return false;

      }

    }
    return true;

  }
  function getDependencies(objectString, objectByteOffsets, bytes) {

    var dependencies = [];
    var references = objectString.match(/\d+\s+\d+\s+R/g);
    if (references != null) {

      while (references.length > 0) {

        if (/\/Parent/.test(objectString.slice(objectString.indexOf(references[0]) - 8, objectString.indexOf(references[0])))) {

          references.shift();
          continue;

        }
        var dependency = getObject(references.shift().replace('R', 'obj'), objectByteOffsets, bytes);
        var dependencyExists = false;
        for each (var entry in dependencies) {

          dependencyExists = (arrayEquals(dependency, entry)) ? true : dependencyExists;

        }
        if (!dependencyExists) {

          dependencies.push(dependency);

        }
        if (Array.isArray(dependency)) {

          dependencies = dependencies.concat(getDependencies(dependency[0], objectByteOffsets, bytes));

        } else {

          dependencies = dependencies.concat(getDependencies(dependency, objectByteOffsets, bytes));

        }

      }

    }
    return dependencies;

  }

}

Essentially what is happening here is that each file is having the objects containing its pages and their contents and resources identified. Then those objects are re-numbered and formatted with new "packaging" for the new file.

I wrote this code to be utilized for two files, but I imagined the possibility of needing more, so I made the code work for that. To be effective for the original question the beginning of the function,

function(mergePdfs(directory, name, pdf1, pdf2, opt_pdf3) {

  if (name.slice(-4) != '.pdf') {

    name = name + '.pdf';

  }
  var newObjects = ['1 0 obj\r\n<</Type/Catalog/Pages 2 0 R >>r\nendobj'];
  var pageAddresses = [];
  for (var argumentIndex = 2; argumentIndex < arguments.length; argumentIndex++) {

    var bytes = arguments[argumentIndex].getBlob().getBytes();

should be replaced by

function mergePdfs(directory, name) {

  if (name.slice(-4) != '.pdf') {

    name = name + '.pdf';

  }
  var newObjects = ['1 0 obj\r\n<</Type/Catalog/Pages 2 0 R >>\r\nendobj'];
  var pageAddresses = [];
  var files = directory.getFiles();
  for (var fileIndex = 0; fileIndex < files.length; fileIndex++) {

    var bytes = files[fileIndex].getBlob().getBytes();
Supererogation answered 17/8, 2019 at 3:59 Comment(13)
This code is functional for all the tests I've given it. I also have a method for splitting a pdf. You can see this and any updates I make to this code here. I have made this publicly available, because I think it is a travesty that there is no API for this.Supererogation
muchas gracies. vielen danke. many thanks. This is exactly what I've been looking for. Worked perfect on my test run. This pdf stuff is a totally new jungle for me. will be printing out and reading to try to follow for my own sake. One oddity initially noted? Around line 362 there's a return command, no closing } and then a new function? Do u know something I don't about ending a js function with a )?? w/o printing out tough to figure out what transpired there, but passes muster and works. Can u give any pointers for orientation on that ? Again, many thanks.Endopeptidase
[correction] it was line 303 in the version I was looking at, just prior to function getObject) . And FULLY AGREE with you on the travesty.Endopeptidase
The function is not ended at that point. The function that starts the next line is part of the function. Nesting functions limits which can be used by scripts that import this code. In this case, there is no security concern; I simply don't want the extra functions showing up as options in the editor when I am calling one of these functions. Hope that makes sense.Supererogation
if the goal is merely to reorder pages all contained within a single PDF, would the code be substantially simpler and easier to comply with later versions of the spec? My goal is very specific - imagine scanning a 2-sided document in an automatic sheet feeder. If you scan it forwards, and then scan the back sides in reverse you'd have pages in order: 1, 3, 5, 6, 4, 2 [Odds ascending following by even descending]. I'd like to write a script which reoders those into the proper order.Thomism
I'm sure if that's all you're doing, you could pare there code down somewhat; however that won't help with the later versions as the problem is that the trailer dictionary is a compressed data stream. Reading that is simply outside my wheelhouse. That being said, if you're using a standard scanner that produces PDFs, it almost certainly does not use later than PDF-1.4. Hope that helps.Supererogation
Thanks so much for your work and for sharing the code! Exactly what I needed. Trying to Save/Run on Apps Script V8 Runtime kept failing with Syntax Errors. I switched back to the Rhino Runtime and it's running fine.Caron
Another user informs me that my 'for' loops are what crash the V8 runtime. Evidently switching them to an iterated counter instead of a 'for each' solves the problem... I don't know as I've never tried it, but there it is.Supererogation
I can now confirm that the 'for each' loops are incompatible with V8. I have updated my personal code, so the linked version should work now.Supererogation
Thanks a lot for this solution, it work wonders. But now I'm trying to print the resulting PDF and I can't in chrome or Adobe Reader, any hints?Snuggle
I haven't had that problem. If the PDF renders correctly on-screen, then I'm afraid I have no clue. If not, it seems likely you're dealing with one of a few known issues. One is that so far, PDFs generated with Google Drive's camera-scanning tool fail. The other is if the PDF uses version number of 1.4 or higher.Supererogation
I know it's been a while, yet I'd like to ask a couple of questions: 1) what's the error in case PDFs are of the wrong format? Background: I encountered a directory.createFile is not a function, but AFAIK it should be (directory is one of the args and it is an open PDF file) 2) Does it accept PDFs version 1.4, or is 1.4 the first unsupported one? Because it happened with that one.Bouffard
1.4 is not accepted.Supererogation
M
5

I'm also having the same problem, and I'm temporarily using a RestFul API to merge the PDFs: https://www.convertapi.com/pdf-to-merge

function merge() {
  var folder = DriveApp.getFolderById('<ID FOLDER>'); // folder with files pdf
  var files = folder.getFiles(); // get all files pdf
  
  var formData = {};
  var index = 0;
  while(files.hasNext()) {
    var file = files.next();
    formData['Files[' + index + ']'] = file.getBlob();
    index++;
  }
  
  var options = {
    'method' : 'post',
    'payload' : formData,
    'muteHttpExceptions': true
  };
 
  var response = UrlFetchApp.fetch('https://v2.convertapi.com/pdf/to/merge?Secret=<YOUR SECRET>', options);
  
  if(response.getResponseCode() == 200) {
    var contentText = JSON.parse(response.getContentText());
    var blob = Utilities.base64Decode(contentText.Files[0].FileData);
    folder.createFile(Utilities.newBlob(blob, 'application/pdf', 'merge.pdf'));
  }
}
Monogram answered 13/6, 2018 at 17:7 Comment(2)
Thanks for posting this answer. It's the best merge solution!Subeditor
This worked for me after changing file.getBlob() to file.getBlob().getAs("application/pdf");Subeditor
H
5

Hopefully useful to those looking to use a external javascript library to merge PDF docs. I have seperated the merge PDF function to have greater compatability for others who may by referencing this question. Noting that as this is using a external library it may need to be modified to get it to work in the future.

eval(UrlFetchApp.fetch("https://unpkg.com/pdf-lib/dist/pdf-lib.js").getContentText());
setTimeout = (func, sleep) => (Utilities.sleep(sleep), func())

/**
 * Returns merged pdf, blobs are merged in the same order they are proivded.
 * @param {Blob[]} blobs Blob array
 * @param {String} fileName output PDF name
 * @return {Promise} Promise object, blob of merged blobs
 */
async function mergeAllPDFs(blobs, fileName) {
  const pdf = await PDFLib.PDFDocument.create();
  for (let i = 0; i < blobs.length; i++) {
    const tempBytes = await new Uint8Array(blobs[i].getBytes());
    const tempPdf = await PDFLib.PDFDocument.load(tempBytes);
    const pages = tempPdf.getPageCount();
    for (let p = 0; p < pages; p++) {
      const [tempPage] = await pdf.copyPages(tempPdf, [p]);
      pdf.addPage(tempPage);
    }
  }
  const pdfDoc = await pdf.save()
  return Utilities.newBlob(pdfDoc).setName(fileName)
}

async function MergeFiles() {
  var folder = DriveApp.getFolderById('myFolderID');
  var files = folder.getFiles();
  var blobs = [];
  while (files.hasNext()) {
    var file = files.next();
    blobs.push(file.getBlob());
  }
  var myPDF = await mergeAllPDFs(blobs, "newPDF.pdf")
  folder.createFile(myPDF);
}

Useful resources:-

External library being utilized pdf-lib.js

How to use a external javascript library

Hausmann answered 18/11, 2022 at 3:30 Comment(3)
This is amazing! Thank you. I'd recommend moving the fetch call for pdf-lib inside of the mergeAllPDFs() function because if you want to deploy this app script as a library to use in other scripts then pdf-lib will be fetched even when you are running a function that is not dependent on this package, whereas if you move it inside of mergeAllPDFs at the top of the function then pdf-lib will only be downloaded when it is needed.Porbeagle
I also recommend using the minified version which is 1/3 of the size: unpkg.com/pdf-lib/dist/pdf-lib.min.jsPorbeagle
one more optimization, some of my merged pdfs were 2-3x the size of the input pdfs. I narrowed down the issue to calling pdf.copyPages multiple times inside the for loop. To fix this I used the following: const pageCount = tempPdf.getPageCount() const pageIndicesArray = [...Array(pageCount).keys()] const pages = await pdf.copyPages(tempPdf, pageIndicesArray) pages.forEach(page => pdf.addPage(page))Porbeagle
F
3

A multipage pdf is definitely not a simple concatenation of multiple pdf file contents... I doubt that you could get any result with this approach even if I admit it seems seducing...

I've been looking for something like that also but without success until now.

Fates answered 15/3, 2013 at 9:37 Comment(1)
So where you able to come up with anything? I can't figure out how to concantenant the blob array.Affiant
D
0

Your code is behaving exactly how you've coded it to. blobs is an array and when you do a blobs.pop() , you are getting the last item in the array (which is your last PDF).

What you should do is concatenate the blobs, not in an array but in a single blob object. However, I'm not sure how this can be done in GAS

Dialectology answered 14/3, 2013 at 23:58 Comment(0)
F
0

The .getBytes() method returns a binary array for each file, so what you're created is an array of arrays with:

blobs.push(files[i].getBlob().getBytes());

Instead, I'd concatenate the array for the current item in the loop with an accumulator array that grows with each loop iteration. Then, after exiting the loop the contents of the accumulator array can be passed into .newBlob() or .setBytes()

Fionnula answered 15/3, 2013 at 2:9 Comment(1)
I am not coming up with a way to concantenate the blob array. Any additional thoughts/code snippets?Affiant
S
-1

Here's the code to concatenate the blobs, but like Serge mentioned, it doesn't actually work, and the PDF comes out corrupt.

function MergeFiles(){
var folder = DocsList.getFolderById('myFolderID'); 
var files = folder.getFiles(); 

var blobs = "";    
for( var i in files )   
blobs=blobs+(files[i].getBlob().getBytes());
Logger.log(blobs);
var myPDF = Utilities.newBlob(blobs, "application/pdf", "newPDF.pdf");
folder.createFile(myPDF);

    }
Spat answered 10/11, 2013 at 22:30 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.