Success! I had to install/use the following tools:
npm install html-pdf-chrome --save-dev
npm install pdfreader --save-dev
html-pdf-chrome is used to magically call Chrome to convert some given HTML to a PDF in the manner that Chrome would normally use to print. pdfreader is a package that reads said PDF and then provides the text inside of it.
After browsing to the page I want to print using webdriver, I can call:
this.When(/^I print the page to a PDF named "([^"]*)"$/,
async function(outputFilename) {
console.log("Getting the html...");
let sourceHTML = await browser.getSource();
console.log("Printing the html using Chrome...");
let pdf = await HtmlPdf.create(sourceHTML);
console.log("Saving the PDF to " + outputFilename + "...");
await pdf.toFile(path.join(DEFAULT_PRINT_PATH, outputFilename));
});
Then, to get the text in the PDF, I call this function:
function readPdfText(filename) {
return new Promise((resolve, reject) => {
let pdfText = "";
new pdfReader.PdfReader().parseFileItems(path.join(DEFAULT_PRINT_PATH, filename), function(err, item){
if (err){
console.log("Error received on parsing PDF: " + err, err.stack);
reject(err);
}
else if (!item) {
resolve(pdfText);
}
else if (item.text) {
if(item.text.trim() === ":") {
pdfText += item.text;
} else {
pdfText += "\n" + item.text;
}
}
});
});
}