I created a web scraping app, which checks for a certain problem on an ecommerce website.
What it does:
- Loops through an array of pages
- checks for a condition on every page
- if condition is met - pushes page to temparray
- sends an email with temparray as body
I wrapped that function in a cronjob function. On my local machine it runs fine.
Deployed like this:
- headless: true
- '--no-sandbox',
- '--disable-setuid-sandbox'
- Added the pptr buildpack link to settings in heroku
- slugsize is 259.6 MiB of 500 MiB
It didnt work.
- set boot timeout to 120s (instead of 60s)
It worked. But only ran once.
Since it want to run that function several times per day, I need to fix the issue.
I have another app running which uses the same cronjob and notification function and it works on heroku.
Here's my code, if anyone is interested.
const puppeteer = require('puppeteer');
const nodemailer = require("nodemailer");
const CronJob = require('cron').CronJob;
let articleInfo ='';
const mailArr = [];
let body = '';
const testArr = [
'https://bxxxx..', https://b.xxx..', https://b.xxxx..',
];
async function sendNotification() {
let transporter = nodemailer.createTransport({
host: 'mail.brxxxxx.dxx',
port: 587,
secure: false,
auth: {
user: '[email protected]',
pass: process.env.heyBfPW2
}
});
let textToSend = 'This is the heading';
let htmlText = body;
let info = await transporter.sendMail({
from: '"BB Checker" <hey@baxxxxx>',
to: "[email protected]",
subject: 'Hi there',
text: textToSend,
html: htmlText
});
console.log("Message sent: %s", info.messageId);
}
async function boxLookUp (item) {
const browser = await puppeteer.launch({
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
],
});
const page = await browser.newPage();
await page.goto(item);
const content = await page.$eval('.set-article-info', div => div.textContent);
const title = await page.$eval('.product--title', div => div.textContent);
const orderNumber = await page.$eval('.entry--content', div => div.textContent);
// Check if deliveryTime is already updated
try {
await page.waitForSelector('.delivery--text-more-is-coming');
// if not
} catch (e) {
if (e instanceof puppeteer.errors.TimeoutError) {
// if not updated check if all parts of set are available
if (content != '3 von 3 Artikeln ausgewählt' && content != '4 von 4 Artikeln ausgewählt' && content != '5 von 5 Artikeln ausgewählt'){
articleInfo = `${title} ${orderNumber} ${item}`;
mailArr.push(articleInfo)
}
}
}
await browser.close();
};
const checkBoxes = async (arr) => {
for (const i of arr) {
await boxLookUp(i);
}
console.log(mailArr)
body = mailArr.toString();
sendNotification();
}
async function startCron() {
let job = new CronJob('0 */10 8-23 * * *', function() { // run every_10_minutes_between_8_and_11
checkBoxes(testArr);
}, null, true, null, null, true);
job.start();
}
startCron();