I am trying to scrape the text from youtube live chat feeds using casper. I am having problems selecting the correct selector. There are many nested elements and dynamically generated elements for each new message that gets pushed out. How might one go about continually pulling the nested
<span id="message">some message</span>
as they occur? I currently can't seem to grab just even one! Here's my test code: note: you can substitute any youtube url that has a live chat feed.
const casper = require("casper").create({
viewportSize: {
width: 1080,
height: 724
}
});
const ua = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'
const url = "https://www.youtube.com/watch?v=NksKCLsMUsI";
casper.start();
casper.userAgent(ua)
casper.thenOpen(url, function() {
this.wait(3000, function() {
if (this.exists("span#message")) {
this.echo("found the a message!");
} else {
this.echo("can't find a message");
}
casper.capture("test.png");
});
});
casper.run();
My question is exactly this. How do i properly select the messages? And 2, how might i continually listen for new ones?
UPDATE: I have been playing with nightmare (electron testing suite) and that is looking promising however I still can't seem to select the chat elements. I know i'm missing something simple.
EDIT / UPDATE (using cadabra's fine example)
var casper = require("casper").create({
viewportSize: {
width: 1024,
height: 768
}
});
url = 'https://www.youtube.com/live_chat?continuation=0ofMyAMkGiBDZzhLRFFvTFJVRTFVVlkwZEV4MFRFVWdBUSUzRCUzRDAB'
ua = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'
casper.start(url)
casper.userAgent(ua);
var currentMessage = '';
(function getPosts() {
var post = null;
casper.wait(1000, function () {
casper.capture('test.png')
post = this.evaluate(function () {
var nodes = document.querySelectorAll('yt-live-chat-text-message-renderer'),
author = nodes[nodes.length - 1].querySelector('#author-name').textContent,
message = nodes[nodes.length - 1].querySelector('#message').textContent;
return {
author: author,
message: message
};
});
});
casper.then(function () {
if (currentMessage !== post.message) {
currentMessage = post.message;
this.echo(post.author + ' - ' + post.message);
}
});
casper.then(function () {
getPosts();
});
})();
casper.run();