Why is puppeteer reporting "UnhandledPromiseRejectionWarning: Error: Navigation failed because browser has disconnected!"?
Asked Answered
B

4

35

I've got a simple node.js script to capture screenshots of a few web pages. It appears I'm getting tripped up somewhere along the line with my use of async/await, but I can't figure out where. I'm currently using puppeteer v1.11.0.

const puppeteer = require('puppeteer');

//a list of sites to screenshot
const papers =
{
     nytimes: "https://www.nytimes.com/",
     wapo: "https://www.washingtonpost.com/"
};

//launch puppeteer, do everything in .then() handler
puppeteer.launch({devtools:false}).then(function(browser){

//create a load_page function that returns a promise which resolves when screenshot is taken
async function load_page(paper){

    const url = papers[paper];

    return new Promise(async function(resolve, reject){

        const page = await browser.newPage();
        await page.setViewport({width:1024, height: 768});

        //screenshot on first console message
        page.once("console", async console_msg => {        
            await page.pdf({path: paper + '.pdf',
                            printBackground:true,
                            width:'1024px',
                            height:'768px',
                            margin: {top:"0px", right:"0px", bottom:"0px", left:"0px"}
                        });

            //close page
            await page.close();

            //resolve promise
            resolve();
        });

        //go to page
        await page.goto(url, {"waitUntil":["load", "networkidle0"]});

    })     
}

//step through the list of papers, calling the above load_page()
async function stepThru(){
    for(var p in papers){
        if(papers.hasOwnProperty(p)){
            //wait to load page and screenshot before loading next page
            await load_page(p);
        }
    }

    //close browser after loop has finished (and all promises resolved)
    await browser.close();  
}

//kick it off
stepThru();

//getting this error message:
//UnhandledPromiseRejectionWarning: Error: Navigation failed because browser has disconnected!

});
Bekki answered 5/2, 2019 at 5:8 Comment(0)
B
35

The Navigation failed because browser has disconnected error usually means that the node scripts that launched Puppeteer ends without waiting for the Puppeteer actions to be completed. Hence it's a problem with some waitings as you told.

About your script, I made some changes to make it work:

  1. First of all you're not awaiting the (async) end of the stepThru function so change
stepThru();

to

await stepThru();

and

puppeteer.launch({devtools:false}).then(function(browser){

to

puppeteer.launch({devtools:false}).then(async function(browser){

(I added async)

  1. I changed the way you manage the goto and page.once promises

The PDF promise is now:

new Promise(async function(resolve, reject){
    //screenshot on first console message
    page.once("console", async () => {
      await page.pdf({
        path: paper + '.pdf', 
        printBackground:true, 
        width:'1024px', 
        height:'768px', 
        margin: {
          top:"0px", 
          right:"0px", 
          bottom:"0px", 
          left:"0px"
        } 
      });
      resolve();
    });
})

and it has a single responsibility, just the PDF creation.

  1. Then I managed both the page.goto and PDF promises with a Promise.all
await Promise.all([
    page.goto(url, {"waitUntil":["load", "networkidle2"]}),
    new Promise(async function(resolve, reject){
        // ... pdf creation as above        
    })
]);
  1. I moved the page.close after the Promise.all
await Promise.all([
    // page.goto
    // PDF creation
]);

await page.close();
resolve();

And now it works, here the full working script:

const puppeteer = require('puppeteer');

//a list of sites to screenshot
const papers =
{
  nytimes: "https://www.nytimes.com/",
  wapo: "https://www.washingtonpost.com/"
};

//launch puppeteer, do everything in .then() handler
puppeteer.launch({devtools:false}).then(async function(browser){

  //create a load_page function that returns a promise which resolves when screenshot is taken
  async function load_page(paper){
    const url = papers[paper];
    return new Promise(async function(resolve, reject){
      const page = await browser.newPage();
      await page.setViewport({width:1024, height: 768});

      await Promise.all([
        page.goto(url, {"waitUntil":["load", "networkidle2"]}),
        new Promise(async function(resolve, reject){

          //screenshot on first console message
          page.once("console", async () => {
            await page.pdf({path: paper + '.pdf', printBackground:true, width:'1024px', height:'768px', margin: {top:"0px", right:"0px", bottom:"0px", left:"0px"} });
            resolve();
          });
        })
      ]);

      await page.close();
      resolve();
    })
  }

  //step through the list of papers, calling the above load_page()
  async function stepThru(){
    for(var p in papers){
      if(papers.hasOwnProperty(p)){
        //wait to load page and screenshot before loading next page
        await load_page(p);
      }
    }

    await browser.close();
  }

  await stepThru();
});

Please note that:

  • I changed networkidle0 to networkidle2 because the nytimes.com website takes a very long time to land a 0 network requests state (because of the AD etc.). You can wait for networkidle0 obviously but it's up to you, it's out of the scope of your question (increase the page.goto timeout in that case).

  • The www.washingtonpost.com site goes to TOO_MANY_REDIRECTS error so I changed to washingtonpost.com but I think that you should investigate more about it. To test the script I used more times the nytimes site and other websites. Again: it's out of the scope of your question.

Let me know if you need some more help πŸ˜‰

Bouzoun answered 5/2, 2019 at 7:34 Comment(3)
This was a really nice, detailed answer, thanks! One question, is there a reason you chose to resolve() the PDF promise immediately, rather than placing resolve inside of page.once() callback? – Bekki
It was a mistake, I fixed it. Both work but I think that the first one was just a lucky case πŸ˜‰ – Bouzoun
you don't need to use async if you are not awaiting in the function, e.g. async function load_page(paper) become function load_page(paper) – Enervated
H
8

I Had the same error when system disk was full.

Hatchment answered 2/10, 2020 at 10:3 Comment(1)
How do U check if this? – Scylla
M
6

If you are using Puppeteer in AWS lambda, then roll back your run time configuration, and this issue will be resolved.

Mcneill answered 2/5, 2024 at 5:9 Comment(3)
I'm on Node 16, can you share the runtime version that's working for you please? I haven't been able to find a list and it's very likely this is the root cause in my case too. Appreciate it. – Stilwell
Thanks for your appreciation, My Node Run time version is Node 18.x . – Mcneill
more specifically my run time version is 18.v26 – Mcneill
L
0

This happened to me after updating @aws-sdk packages. It seems that there was an incompatibility with the Puppeteer version. Upgrading Puppeteer to the latest version solved the issue.

Lustrate answered 10/6, 2024 at 14:51 Comment(0)

© 2022 - 2025 β€” McMap. All rights reserved.