I'm developing web screen capture app with Node.js & Google Puppeteer. Now I have to capture 38000 pages and most of the functions are works find but it has errors in some points and I don't know where the errors are coming from.
I have two assumptions. First, I use headless option to check the problem and I found that some pages have lots of GIF files so It loads too long so the timeout error shows. Second, the website sometimes loads fail so it shows the error.
Here's my full code
const puppeteer = require("puppeteer");
const fs = require('fs');
let galleryName = "frozen"; // Enter gallery name
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
// Adjustments particular to this page to ensure we hit desktop breakpoint.
page.setViewport({
width: 1000,
height: 10000000,
deviceScaleFactor: 1
});
fs.readFile('db.txt', async function (err, data) {
if (err) throw err;
let array = data.toString().split("\n");
for (i in array) {
console.log(`Now Processing : ${array[i]} | ${array.length - i -1} items left`);
await page.goto(`https://gall.dcinside.com/${galleryName}/${array[i]}`), {
waitUntil: "networkidle2",
// timeout: 0
};
await page.waitForSelector(".view_content_wrap"), {
waitUntil: 'networkidle2'
}
/* ScreenShot Functions */
async function screenshotDOMElement(opts = {}) {
const padding = "padding" in opts ? opts.padding : 0;
const path = "path" in opts ? opts.path : null;
const selector = opts.selector;
if (!selector) throw Error("Please provide a selector.");
const rect = await page.evaluate(selector => {
const element = document.querySelector(selector);
if (!element) return null;
const {
x,
y,
width,
height
} = element.getBoundingClientRect();
return {
left: x,
top: y,
width,
height,
id: element.id
};
}, selector);
if (!rect)
throw Error(
`Could not find element that matches selector: ${selector}.`
);
return await page.screenshot({
path,
clip: {
x: rect.left - padding,
y: rect.top - padding,
width: rect.width,
height: rect.height + padding * 2
}
});
}
await screenshotDOMElement({
path: `./result/${array[i]}.png`,
selector: ".view_content_wrap",
padding: 10
});
}
});
// // await browser.close();
})();
page.goto(url, {waitUntil: "domcontentloaded"})
which resolves thegoto()
as fast as possible, without waiting for the default"load"
event. – Gaitskell