programmatically access google search watchlist

There is still no API to access Google's watchlists, but you can use a tool like Axios to access the watchlist page and then jsdom to walk down the document tree and scrape the data.

The trick is finding a spot where Google will accurately show you your entire watchlist, because when accessing it from a search, Google will truncate long lists no matter what. Here are a few key steps to find the full list:

Find and open your Google watchlist at this page: https://www.google.com/interests/saved
Click the "Share" button, turn on sharing, choose "View only link", and click "Continue". Copy the resulting link.

Here's some example code to return your watchlist in an array:

// Paste your watchlist URL here:
const GOOGLE_WATCHLIST_URL = 'https://'; 

const jsdom = require('jsdom');
const { JSDOM } = jsdom;
const axios = require('axios');

// Scrape Google's 'my watchlist'
async function scrape() {
    let document = {};
    let elements = [];
    let items = [];
    let prevFirstItem = null;

    console.log('Scraping new data.');

    for (let i = 0; i <= 5; i++) {
        await axios.request({
            method: 'GET',
            url: GOOGLE_WATCHLIST_URL + '?pageNumber=' + (i + 1),
            headers: {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'
            }
        }).then(response => {
            document = new JSDOM(response.data).window.document;

            // Important: This selector could change in future, but it works as of July 29, 2024
            // Find the selector that grabs the name of each movie on your watchlist.
            elements = document.querySelectorAll('[data-hveid] a[aria-label]');

            items.push([]);

            // Find and collect items
            for (let el of elements) {
                if (el.getAttribute('aria-label') === prevFirstItem) {
                    // Stop because this item is same as prev
                    return;
                }

                items[i].push(el.getAttribute('aria-label'));
            };

            if (items[i][0] === prevFirstItem) {
                // Stop because first item here is same as prev
                console.log('Stopping because prev equals current: ' + items[i][0] + ' = ' + prevFirstItem)
                return;
            } else {
                // Set first item for next iteration
                prevFirstItem = items[i][0];
                console.log((i + 1) + ': ' + prevFirstItem);
            }
            
        }).catch(error => {
            console.error(error);
        });
    }

    items = items.filter(arr => arr.length);
    items = items.flat(Infinity);

    return await items;
}

(async () => {
    await scrape();
})()

This code works by accessing your watchlist page, reading the items, then looping through pages using the pageNumber URL parameter until there are no more unique items. Technically this code only scrapes up to 5 pages, but you could modify that by changing the for loop:

for (let i = 0; i <= 10; i++)

It may even be possible to exit the loop early if there are no more items, but this does the trick.

Recommended topics

Hot tags