Group nested links by first-level iframe using casperjs
Asked Answered
A

1

7

I have the following code:

var casper = require("casper").create({
        //  verbose: true,
        //    logLevel: "debug",
        webSecurityEnabled : false
    });
var links = [];
function get_links(obj) {
    return obj.evaluate(function () {
        var i,
            l = document.querySelectorAll("a"),
            l2 = [];
        for (i = 0; i < l.length; i++) {
            l2[i] = l[i].href;
        }
        return l2
    });
}
function unique(arr) {
    var obj = {};
    for (var i = 0; i < arr.length; i++) {
        if (/http(.*)?/.test(arr[i])) {
            var str = arr[i];
            obj[str] = true;
        }
    }
    return Object.keys(obj);
}

function getLinksFromIframes(callback) {
    this.echo("Here we come: " + this.getCurrentUrl() + "\n");
    function to_frame(obj) {
        var iframes = to_evaluate(obj);
        iframes.forEach(function (index) {
            this.withFrame(index, function () {
                this.echo("We are here: " + this.getCurrentUrl());
                var l = unique(get_links(this));
                var i;
                for (i = 0; i < l.length; i++) {
                    console.log(l[i]);
                    links.push(l[i])
                }
                links = unique(links);
                console.log("");
                to_frame(this)
            });
        }, obj);
    }
    function to_evaluate(obj) {
        return obj.evaluate(function () {
            var iframes = [];
            [].forEach.call(document.querySelectorAll("iframe"), function (iframe, i) {
                iframes.push(i);
            });
            return iframes;
        })
    }
    to_frame(this);
    this.then(function () {
        callback.call(this);
    });
}

casper.start("http://domu-test-2/node/1", function () {
    getLinksFromIframes.call(this, function () {
        console.log("Done!\n");
        var i;
        for (i = 0; i < links.length; i++) {
            console.log(links[i]);
        }
    });
}).then(function () {}).run();

And now the question is:

if I want get links by the first-level iframe, how should I refactor the getLinksFromIframes() function. Currently they share a 'global' variable links. I think definitely the links will be list of link list and initialize new list within withFrame function, then pass this new reference to child iframes. So how should I pass it and 'backtrace' all links in nested iframes?

Armond answered 10/12, 2016 at 20:33 Comment(0)
A
1

If I get it correctly, you want to select which iframe from you get the links and only from that iframe. If that's the case, then you can simple use switchToChildFrame to switch to the desired iframe and then just get the links by calling get_links(obj).

My example has 3 pages. An index.html that load iframe1.html inside an iframe and iframe1.html has another iframe inside that loads iframe2.html. Each file has 3 links inside:

index.html

<a href="link1/from/index">Link 1 from index</a>
<a href="link2/from/index">Link 2 from index</a>
<a href="link3/from/index">Link 3 from index</a>

<iframe src="iframe1.html"></iframe>

iframe1.html

<a href="link1/from/iframe1">Link 1 from iframe 1</a>
<a href="link2/from/iframe1">Link 2 from iframe 1</a>
<a href="link3/from/iframe1">Link 3 from iframe 1</a>

<iframe src="iframe2.html"></iframe>

iframe2.html

<a href="link1/from/iframe2">Link 1 from iframe 2</a>
<a href="link2/from/iframe2">Link 2 from iframe 2</a>
<a href="link3/from/iframe2">Link 3 from iframe 2</a>

and the refactored getLinksFromIframes function would be like this:

function getLinksFromIframes(callback) {
    this.echo("Here we come: " + this.getCurrentUrl() + "\n");

    function to_frame(obj) {
        obj.echo("We are here: " + obj.getCurrentUrl());
        var l = unique(get_links(obj));
        var i;
        for (i = 0; i < l.length; i++) {
            console.log(l[i]);
            links.push(l[i])
        }
        links = unique(links);
        console.log("");
    }
    function to_evaluate(obj) {
        return obj.evaluate(function () {
            var iframes = [];
            [].forEach.call(document.querySelectorAll("iframe"), function (iframe, i) {
                iframes.push(i);
            });
            return iframes;
        })
    }

    // Leave both switchToChildFrame as comments to get the "index.html" links
    this.page.switchToChildFrame(0); // Uncomment to get the links of "iframe1.html"
    //this.page.switchToChildFrame(0); // Uncomment to get the links of "iframe2.html"
    to_frame(this);

    this.then(function () {
        callback.call(this);
    });
}

RESULTS

If you comment both switchToChildFrame you'll get the links of the index.html:

casperjs caspers-read-iframes.js
Here we come: http://pjs.lytrax.net/node/1/

We are here: http://pjs.lytrax.net/node/1/
http://pjs.lytrax.net/node/1/link1/from/index
http://pjs.lytrax.net/node/1/link2/from/index
http://pjs.lytrax.net/node/1/link3/from/index

Done!

http://pjs.lytrax.net/node/1/link1/from/index
http://pjs.lytrax.net/node/1/link2/from/index
http://pjs.lytrax.net/node/1/link3/from/index

If you uncomment the first switchToChildFrame, you'll get the links of the first level iframe1.html:

casperjs caspers-read-iframes.js
Here we come: http://pjs.lytrax.net/node/1/

We are here: http://pjs.lytrax.net/node/1/iframe1.html
http://pjs.lytrax.net/node/1/link1/from/iframe1
http://pjs.lytrax.net/node/1/link2/from/iframe1
http://pjs.lytrax.net/node/1/link3/from/iframe1

Done!

http://pjs.lytrax.net/node/1/link1/from/iframe1
http://pjs.lytrax.net/node/1/link2/from/iframe1
http://pjs.lytrax.net/node/1/link3/from/iframe1

And if you uncomment both the first and the second switchToChildFrame, you'll get the links of the second level iframe2.html:

casperjs caspers-read-iframes.js
Here we come: http://pjs.lytrax.net/node/1/

We are here: http://pjs.lytrax.net/node/1/iframe2.html
http://pjs.lytrax.net/node/1/link1/from/iframe2
http://pjs.lytrax.net/node/1/link2/from/iframe2
http://pjs.lytrax.net/node/1/link3/from/iframe2

Done!

http://pjs.lytrax.net/node/1/link1/from/iframe2
http://pjs.lytrax.net/node/1/link2/from/iframe2
http://pjs.lytrax.net/node/1/link3/from/iframe2
Abagael answered 27/12, 2016 at 1:54 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.