CasperJS - How to open up all links in an array of links
Asked Answered
L

7

8

I'm trying to make it so that CasperJS will open up every link in an array of links. I have it so that after I open a link, it will display the title of that page. Yet when I run it, nothing is displayed.

I can use a for loop to display the links and it works perfectly.

This is the code for what I just explained:

var x;

casper.start(URL, function() {

    x = links.split(" "); // now x is an array of links

    for (var i = 0; j < x.length; i++) // for every link...
    {
        casper.thenOpen(partialURL + x[i], function() { // open that link
            console.log(this.getTitle() + '\n'); // display the title of page
        });
    }

    this.exit();
});

casper.run();

This is another method I tried:

var x;

casper.start(URL, function() {
    x = links.split(" "); // now x is an array of links
    this.exit();
});

for (var i = 0; j < x.length; i++) // for every link...
{
    casper.thenOpen(partialURL + x[i], function() { // open that link
        console.log(this.getTitle() + '\n'); // display the title of page
    });
}

casper.run();

It says that 'x' in undefined. Notice that I set x to be a global variable though. Any modifications that you could make would be great. Thanks.

Librium answered 29/7, 2013 at 14:22 Comment(2)
I just realized the only problem I had was this line: for (var i = 0; j < x.length; i++) <-- I accidentally wrote j instead of i in the middle condition.Librium
Also, this.exit(); seems to make the script output nothing (I could take it away or just change it to casper.then(function() { this.exit(); } );). If I change those, the code in the question works.Librium
L
8
var x; var i = -1;

casper.start(URL, function() {
    x = links.split(" "); // now x is an array of links
});

casper.then(function() {
    this.each(x, function() { 
        i++; // change the link being opened (has to be here specifically)
        this.thenOpen((partialURL + x[i]), function() {
            this.echo(this.getTitle()); // display the title of page
        });
    });
});

casper.run();
Librium answered 29/7, 2013 at 22:20 Comment(1)
Struggling with something similar but there aren't exactly any links available to select and open. Instead there are divs with links. Here's the problem: #50578013Simoneaux
C
8
var i = 0;
var nTimes = x.length;

casper.repeat(nTimes, function() {
    //... do your stuff
    i++;
});

worked for me.

Cortico answered 1/5, 2014 at 23:9 Comment(0)
L
5
casper.start('about:blank');

var urls = ['http://google.fr', 'http://yahoo.fr', 'http://amazon.fr'];

casper.each(urls, function(casper, url) {
  casper.thenOpen(url, function() {
        this.echo("I'm in your " + url + ".");
    });
});
Liver answered 9/5, 2016 at 15:52 Comment(0)
L
2

In my case, I had to scrape a site that had an unknown number of pages. Each page (except the last) had a <a class="next-page" href="/page/N">Next page</a> link (where N is the page number). There was no way for the scraper to know when it was finished except when the "Next Page" link was no longer present.

Of course you'll have to make adjustments depending on what type of pagination links might exist on your page.

Here's what I did. Ymmv.

// imports
var fs = require('fs');

// scraper state
var state = {page: 1, data: []};

// casper
var casper = require("casper").create();

// scraper function
function scrape() {
  this.echo('Scraping page ' + state.page + '...', 'INFO');

  state.data = state.data.concat(this.evaluate(function() {
    // get some stuff from the page
    return someData;
  });

  var nextUrl = this.evaluate(function() {
    var nextLink = document.querySelector("a.next-page");
    return nextLink && nextLink.href;
  });

  if (nextUrl) {
    state.page = state.page + 1;
    casper.thenOpen(nextUrl, scrape); // <- recursion
  }
});

// run
casper.run(function() {
  fs.write('./data.json', JSON.stringify(state.data, null, '\t'), 'w');
  this.echo('Done!', 'INFO');
});

Hope this helps someone. If you have other questions, I'll be happy to try to help.

Loehr answered 8/7, 2015 at 20:9 Comment(1)
Could you please point me in the right direction here: #50578013Simoneaux
S
1
casper.start();
casper.each(Object.keys(array), function(casper, array_elem) {
    this.thenOpen(partialURL+array[attay_item], function() {
        ...
};

And as to "undefined" error. Try not to use this too much. I experience this error with CasperJS to often, so I prefer to write casper instead of this.

Smarmy answered 8/1, 2015 at 17:1 Comment(1)
If array is an actual array, then it is better to use array.forEach(function(item){casper.thenOpen(partial+item)}).Demoss
A
0

Try something like this.

var x;

casper.start(URL, function() {
    x = links.split(" "); // now x is an array of links
});

casper.then(function() {
    this.eachThen(x, function(response) {
        this.thenOpen((partialURL + response.data), function() {
            this.echo(this.getTitle()); // display the title of page
        });
    });
});

casper.run();

x was undefined because the for loop was being executed before casper.start. In the above code, the eachThen() block is nested inside of a casper.then block in order to delay its execution.

Auscultation answered 29/7, 2013 at 17:18 Comment(7)
Nothing happens still.Librium
I tried editing your answer and I guess it got denied for some weird reason. Could you please add var i; beside var x;, and could you also add i++; below console.log...Librium
I changed it to use eachThen instead of repeat. It should be working nowAuscultation
TypeError: 'undefined' is not a function <evaluating 'this.eachThen'>Librium
What version of CasperJS are you using? If it's before 1.1, then try changing this.eachThen to this.eachAuscultation
It works but a few edits need to be made to your answer. response.data didn't work for me. I used x[i] and added 1 to i each time.Librium
@mikeyaworski, you likely got that error from CasperJS 1.0, whereas 1.1 adds eachThen functionAstragal
R
0

I have solved the same issue with this code:

casper.then(function () {
    var i = -1;
    this.eachThen(locations, function () {
        i++;
        //Do stuff here like for example:
        this.thenOpen(YOUR_URL, function () {
            this.waitForSelector("MYSELECTOR", 
            function () {

            },                
            function () {

            })
        });
    })
});
Rolanderolando answered 2/5, 2016 at 13:32 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.