errorMessage":"spawn /opt/chromedriver/chromedriver ENOENT
Asked Answered
O

0

0

Context: I plan to run a selenium script as an Amazon lambda function. My deployment package will be in the form of a container image. Before that I wanted to test locally if everything is working as expected.

I have a Dockerfile as shown below:

FROM public.ecr.aws/lambda/nodejs:18

# Install unzip separately
RUN yum install -y unzip

# Install Chrome dependencies
RUN yum install -y atk at-spi2-atk gtk3 cups-libs pango libdrm \ 
    libXcomposite libXcursor libXdamage libXext libXtst libXt \
    libXrandr libXScrnSaver alsa-lib -y

# Set your desired versions
ARG CHROMIUM_VERSION="118.0.5993.70"
ARG CHROME_DRIVER_VERSION="118.0.5993.70"

# Download and install Chromium and Chromedriver with your specified versions
RUN curl -Lo "/tmp/chromedriver-linux64.zip" "https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/${CHROME_DRIVER_VERSION}/linux64/chromedriver-linux64.zip" && \
    curl -Lo "/tmp/chrome-linux64.zip" "https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/${CHROMIUM_VERSION}/linux64/chrome-linux64.zip" && \
    unzip /tmp/chromedriver-linux64.zip -d /opt/chromedriver && \
    unzip /tmp/chrome-linux64.zip -d /opt/chromium && \
    rm -rf /tmp/chromedriver-linux64.zip /tmp/chrome-linux64.zip

# Install selenium-webdriver version 4.14.0
RUN npm install [email protected]

# Copy your Node.js code
COPY index.js ./

# Set the CMD directive
CMD [ "index.handler" ]

My index.js file is as follows:

const { Builder, By } = require("selenium-webdriver");
const chrome = require("selenium-webdriver/chrome");
const { ServiceBuilder } = require("selenium-webdriver/chrome");

async function validateReviewCount(driver, reviewCount, url) {
    // Navigate to the specified URL
    await driver.get(url);

    // Locate the parent div with id filter-info-section
    const filterInfoSection = await driver.findElement(By.id('filter-info-section'));

    // Locate and extract the review count text within the parent div
    const reviewCountTextElement = await filterInfoSection.findElement(By.css('[data-hook="cr-filter-info-review-rating-count"]'));
    const reviewCountText = await reviewCountTextElement.getText();

    // Extract the second number from the review count text
    const match = reviewCountText.match(/\d+/g);
    const actualReviewCount = match && match[1] ? Number(match[1]) : null;

    console.log("Actual Review Count:", actualReviewCount);

    // Compare reviewCount and actualReviewCount
    if (actualReviewCount === null) {
        throw new Error('Error fetching review count');
    } else if (actualReviewCount === 0) {
        throw new Error('The product has 0 reviews');
    } else if (actualReviewCount < reviewCount) {
        throw new Error(`The product has less than ${reviewCount} reviews`);
    }
}

async function fetchReviews(driver, url) {
    const reviewData = [];
    await driver.get(url);

    const reviewSection = await driver.findElement(By.css(".reviews-content"));
    const reviewViews = await reviewSection.findElement(By.css(".review-views"));
    const allReviews = await reviewViews.findElements(By.css('[data-hook="review"]'));

    for (const review of allReviews) {
        const reviewDateElement = await review.findElement(By.css('[data-hook="review-date"]'));
        const reviewDateText = await reviewDateElement.getText();

        const reviewBodyElement = await review.findElement(By.css('[data-hook="review-body"]'));
        const reviewBodyText = await reviewBodyElement.getText();

        reviewData.push({
            reviewDate: reviewDateText,
            reviewBody: reviewBodyText,
        });
    }

    return reviewData;
}


// Lambda entry point

exports.handler = async (event, context) => {
    const reviewCount = event.reviewCount;

    const options = new chrome.Options();
    options.addArguments("--headless");
    options.addArguments("--no-sandbox");
    options.addArguments("--disable-gpu");
    options.addArguments("--window-size=1280x1696");
    options.addArguments("--single-process");
    options.addArguments("--disable-dev-shm-usage");
    options.addArguments("--disable-dev-tools");
    options.addArguments("--no-zygote");
    options.addArguments(`--user-data-dir=/tmp/chromium`);
    options.setChromeBinaryPath("/opt/chromium/chrome");

    const urls = [
        "https://www.amazon.com/product-reviews/B000FH2Y9S/ref=acr_dp_hist_1?ie=UTF8&filterByStar=one_star&reviewerType=all_reviews#reviews-filter-bar",
        "https://www.amazon.com/product-reviews/B000FH2Y9S/ref=cm_cr_arp_d_paging_btm_next_2?ie=UTF8&filterByStar=one_star&reviewerType=all_reviews&pageNumber=2#reviews-filter-bar",
        "https://www.amazon.com/product-reviews/B000FH2Y9S/ref=cm_cr_getr_d_paging_btm_next_3?ie=UTF8&filterByStar=one_star&reviewerType=all_reviews&pageNumber=3#reviews-filter-bar",
    ];

    const serviceBuilder = new ServiceBuilder('/opt/chromedriver/chromedriver');

    const driver = await new Builder()
        .forBrowser("chrome")
        .setChromeOptions(options)
        .setChromeService(serviceBuilder)
        .build();

    const reviewDataArray = [];

    try {
        driver.manage().setTimeouts({ implicit: 60000 });

        await validateReviewCount(driver, reviewCount, urls[0]);

        for (const url of urls) {
            const reviews = await fetchReviews(driver, url);
            reviewDataArray.push(...reviews);
        }

        return {
            statusCode: 200,
            body: JSON.stringify({
                message: "Scraping completed",
                data: reviewDataArray,
            }),
        };
    } catch (error) {
        const errorMessage = error instanceof Error ? error.message : "Internal Server Error";
        return {
            statusCode: 500,
            body: JSON.stringify({
                error: errorMessage
            }),
        };
    } finally {
        await driver.quit();
    }
};

First I created a Docker image:

enter image description here

Then I ran it using the command below:

docker run -p 9000:8080 reviews:test

Then I posted an event to the local end point using the command below:

curl "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"reviewCount": 100}'

When I did this, I got the following error:

{"errorType":"Error","errorMessage":"spawn /opt/chromedriver/chromedriver ENOENT","trace":["Error: spawn /opt/chromedriver/chromedriver ENOENT","    at /var/task/node_modules/selenium-webdriver/remote/index.js:259:24","    at process.processTicksAndRejections (node:internal/process/task_queues:95:5)"]}% 

enter image description here

What's the issue? Need help in debugging this issue.

Objective answered 31/10, 2023 at 11:29 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.