Logging in to LinkedIn with python requests sessions

Asked 20/9, 2013 at 1:22 Answered 22/8, 2020 at 21:18

I'm trying to log into LinkedIn using Python requests:

import sys
import requests
from BeautifulSoup import BeautifulSoup


payload={
    'session-key' : '[email protected]',
    'session-password' : 'password'
}

URL='https://www.linkedin.com/uas/login-submit'
s=requests.session()
s.post(URL,data=payload)

r=s.get('http://www.linkedin.com/nhome')
soup = BeautifulSoup(r.text)
print soup.find('title')

I can't seem to log in using this method. I even tried playing with csrf etc. in the payload, but aren't sessions supposed to take care of that for you?

Note about the last line: I use the title to check if I've successfully logged in. (I should see "Welcome! | LinkedIn" if I have signed in, instead I see "World's Largest Professional Network | LinkedIn"

Am I missing something?

Riorsson answered 20/9, 2013 at 1:22 Comment(1)

developer.linkedin.com/apis Anything wrong with using their API? – Brinkema 20/9, 2013 at 1:32

I modified a web-scraping template I use for most of my Python-based scraping needs to fit your needs. Verified it worked with my own login info.

The way it works is by mimic-ing a browser and maintaining a cookieJar that stores your user session. Got it to work with BeautifulSoup for you as well.

Note: This is a Python2 version. I added a working Python3 example further below by request.

import cookielib
import os
import urllib
import urllib2
import re
import string
from BeautifulSoup import BeautifulSoup

username = "[email protected]"
password = "password"

cookie_filename = "parser.cookies.txt"

class LinkedInParser(object):

    def __init__(self, login, password):
        """ Start up... """
        self.login = login
        self.password = password

        # Simulate browser with cookies enabled
        self.cj = cookielib.MozillaCookieJar(cookie_filename)
        if os.access(cookie_filename, os.F_OK):
            self.cj.load()
        self.opener = urllib2.build_opener(
            urllib2.HTTPRedirectHandler(),
            urllib2.HTTPHandler(debuglevel=0),
            urllib2.HTTPSHandler(debuglevel=0),
            urllib2.HTTPCookieProcessor(self.cj)
        )
        self.opener.addheaders = [
            ('User-agent', ('Mozilla/4.0 (compatible; MSIE 6.0; '
                           'Windows NT 5.2; .NET CLR 1.1.4322)'))
        ]

        # Login
        self.loginPage()

        title = self.loadTitle()
        print title

        self.cj.save()


    def loadPage(self, url, data=None):
        """
        Utility function to load HTML from URLs for us with hack to continue despite 404
        """
        # We'll print the url in case of infinite loop
        # print "Loading URL: %s" % url
        try:
            if data is not None:
                response = self.opener.open(url, data)
            else:
                response = self.opener.open(url)
            return ''.join(response.readlines())
        except:
            # If URL doesn't load for ANY reason, try again...
            # Quick and dirty solution for 404 returns because of network problems
            # However, this could infinite loop if there's an actual problem
            return self.loadPage(url, data)

    def loginPage(self):
        """
        Handle login. This should populate our cookie jar.
        """
        html = self.loadPage("https://www.linkedin.com/")
        soup = BeautifulSoup(html)
        csrf = soup.find(id="loginCsrfParam-login")['value']

        login_data = urllib.urlencode({
            'session_key': self.login,
            'session_password': self.password,
            'loginCsrfParam': csrf,
        })

        html = self.loadPage("https://www.linkedin.com/uas/login-submit", login_data)
        return

    def loadTitle(self):
        html = self.loadPage("https://www.linkedin.com/feed/")
        soup = BeautifulSoup(html)
        return soup.find("title")

parser = LinkedInParser(username, password)

Update June 19, 2014: Added parsing for CSRF token from homepage for use in updated login process.

Update July 23, 2015: Adding a Python 3 example here. Basically requires substituting library locations and removing deprecated methods. It's not perfectly formatted or anything, but it functions. Sorry for the rush job. In the end the principals and steps are identical.

import http.cookiejar as cookielib
import os
import urllib
import re
import string
from bs4 import BeautifulSoup

username = "[email protected]"
password = "password"

cookie_filename = "parser.cookies.txt"

class LinkedInParser(object):

    def __init__(self, login, password):
        """ Start up... """
        self.login = login
        self.password = password

        # Simulate browser with cookies enabled
        self.cj = cookielib.MozillaCookieJar(cookie_filename)
        if os.access(cookie_filename, os.F_OK):
            self.cj.load()
        self.opener = urllib.request.build_opener(
            urllib.request.HTTPRedirectHandler(),
            urllib.request.HTTPHandler(debuglevel=0),
            urllib.request.HTTPSHandler(debuglevel=0),
            urllib.request.HTTPCookieProcessor(self.cj)
        )
        self.opener.addheaders = [
            ('User-agent', ('Mozilla/4.0 (compatible; MSIE 6.0; '
                           'Windows NT 5.2; .NET CLR 1.1.4322)'))
        ]

        # Login
        self.loginPage()

        title = self.loadTitle()
        print(title)

        self.cj.save()


    def loadPage(self, url, data=None):
        """
        Utility function to load HTML from URLs for us with hack to continue despite 404
        """
        # We'll print the url in case of infinite loop
        # print "Loading URL: %s" % url
        try:
            if data is not None:
                response = self.opener.open(url, data)
            else:
                response = self.opener.open(url)
            return ''.join([str(l) for l in response.readlines()])
        except Exception as e:
            # If URL doesn't load for ANY reason, try again...
            # Quick and dirty solution for 404 returns because of network problems
            # However, this could infinite loop if there's an actual problem
            return self.loadPage(url, data)

    def loadSoup(self, url, data=None):
        """
        Combine loading of URL, HTML, and parsing with BeautifulSoup
        """
        html = self.loadPage(url, data)
        soup = BeautifulSoup(html, "html5lib")
        return soup

    def loginPage(self):
        """
        Handle login. This should populate our cookie jar.
        """
        soup = self.loadSoup("https://www.linkedin.com/")
        csrf = soup.find(id="loginCsrfParam-login")['value']
        login_data = urllib.parse.urlencode({
            'session_key': self.login,
            'session_password': self.password,
            'loginCsrfParam': csrf,
        }).encode('utf8')

        self.loadPage("https://www.linkedin.com/uas/login-submit", login_data)
        return

    def loadTitle(self):
        soup = self.loadSoup("https://www.linkedin.com/feed/")
        return soup.find("title")

parser = LinkedInParser(username, password)

Mores answered 20/9, 2013 at 3:18 Comment(18)

I'm a bit late to the show here, but this isn't working for me? I just added my email and password, didn't change anything else but I'm still just getting back <title>World's Largest Professional Network | LinkedIn</title> - anyone able to help me out? – Receptacle 11/6, 2014 at 11:55

@Cian, where you see the lines: title = self.loadTitle() and print title, you'll want to add your own code. Is it that the title that's printing isn't the one you expect? – Mores 12/6, 2014 at 19:45

@Mores Yes, I'm still getting the logged-out homepage (title I pasted above), instead of the one when someone has logged in. I don't think the login is going through for me? – Receptacle 13/6, 2014 at 10:54

@garromark, I am having the same problem as Cian. Is it possible that something changed about the LinkedIn authentication in the recent months? – Wow 17/6, 2014 at 18:14

@Receptacle Updated the code to accommodate changes to LinkedIn's login process that now requires a CSRF token. – Mores 19/6, 2014 at 6:7

@Alexandra Updated the code to accommodate changes to LinkedIn's login process that now requires a CSRF token. – Mores 19/6, 2014 at 6:7

@Mores - I could not find the "loginCsrfParam-login" id in the html obtained from linkedin, but your code works if I search for the "csrfToken-postModuleForm" id instead. Thank you! – Wow 20/6, 2014 at 2:46

@Mores Do you have a working version in Python 3? – Haversack 19/7, 2015 at 15:59

@kodingralph Added a functioning Python3 example for you. Note it requires BeautifulSoup4 to be installed. – Mores 23/7, 2015 at 23:4

@Mores print soup.find(id="loginCsrfParam-login")['value'] TypeError: 'NoneType' object has no attribute 'getitem' Unable to get csrf value. Can you help? – Lynwoodlynx 3/10, 2016 at 12:54

@MansoorAkram they seem to have moved their initial login form. I believe changing the URL on the line before it from html = self.loadPage("https://www.linkedin.com/") to html = self.loadPage("https://www.linkedin.com/uas/login-submit") should resolve that issue. Could you let me know if it works so I can modify my answer? What it's doing is loading an initial login form so the server will issue a CSRF token that we are pulling for our cookie jar. They seem to have moved that form to a new page. – Mores 4/10, 2016 at 16:48

How can I add chrome as user agent? – Cyclostome 16/2, 2017 at 16:19

@Mores Thanks for posting this. The code is not working anymore, so I suspect LinkedIn changed login homepage again. Any insight on this? Best :) – Vaules 21/11, 2017 at 21:18

https://www.linkedin.com/uas/login-submit doesn't work anymore. I got it working with https://www.linkedin.com/uas/login. – Langbehn 14/2, 2018 at 5:48

The python3 version is unusable. It does not work at all. All libraries are still python2 – Vogt 25/1, 2020 at 15:14

I get with python 3 csrf = soup.find(id='loginCsrfParam-login')['value'] TypeError: 'NoneType' object is not subscriptable – Cailean 30/1, 2021 at 15:13

@Alex, did you try the other answers from this post? You are not correct with the libs, bs4 is for python3 – Cailean 30/1, 2021 at 15:13

@Mores this works great for me with minor adjustments. I would like to upload this to heroku to be able to call it from another project I have without having to open up ports on my home... can that be done? I've tried with Heroku but it seems to be unable to login. I don't get an error but it just won't login and on my personal computer it works great. – Cloven 20/5, 2022 at 21:17

This is a much simpler version.

import requests
from bs4 import BeautifulSoup

client = requests.Session()

HOMEPAGE_URL = 'https://www.linkedin.com'
LOGIN_URL = 'https://www.linkedin.com/uas/login-submit'

html = client.get(HOMEPAGE_URL).content
soup = BeautifulSoup(html, "html.parser")
csrf = soup.find(id="loginCsrfParam-login")['value']

login_information = {
    'session_key':'Login',
    'session_password':'Password',
    'loginCsrfParam': csrf,
}

client.post(LOGIN_URL, data=login_information)

client.get('Any_Linkedin_URL')

Chifley answered 10/9, 2014 at 0:20 Comment(4)

it Indeed is simple, Thank you. But I am not able to find where you got csrf = soup.find(id="loginCsrfParam-login")['value'] from. I cant find it in the request headers while checking in Chrome? – Inordinate 23/9, 2014 at 18:18

Let me rephrase --- I am curious why we need this -- csrf = soup.find(id="loginCsrfParam-login")['value'] from. I cant find it in the request headers while checking in Chrome? – Inordinate 23/9, 2014 at 18:32

@morgan-allen print soup.find(id="loginCsrfParam-login")['value'] TypeError: 'NoneType' object has no attribute 'getitem' Unable to get csrf value. Can you help? – Lynwoodlynx 3/10, 2016 at 12:56

csrf = soup.find('input', {'name': 'loginCsrfParam'}).get('value'): 'NoneType' object has no attribute 'get' – Cailean 30/1, 2021 at 15:20

2019 Version.

Slightly revised version working that takes into account the new structure of the page to find the connection cookie and adds the trk parameter.

import requests
from bs4 import BeautifulSoup

email = ""
password = ""

client = requests.Session()

HOMEPAGE_URL = 'https://www.linkedin.com'
LOGIN_URL = 'https://www.linkedin.com/uas/login-submit'

html = client.get(HOMEPAGE_URL).content
soup = BeautifulSoup(html, "html.parser")
csrf = soup.find('input', {'name': 'loginCsrfParam'}).get('value')

login_information = {
    'session_key': email,
    'session_password': password,
    'loginCsrfParam': csrf,
    'trk': 'guest_homepage-basic_sign-in-submit'
}

client.post(LOGIN_URL, data=login_information)

response = client.get('')

Parshall answered 6/12, 2019 at 21:52 Comment(2)

hello, your client works but when I do the same thing in js it doesn't. If you know js/nodejs would you look at my question at #62455537 – Quay 19/6, 2020 at 13:11

@AkilDemir your question does not exist. – Cailean 30/1, 2021 at 14:53

2020 version of @garromark's accepted solution:

import http.cookiejar as cookielib
import os
import urllib
import re
import string
from bs4 import BeautifulSoup

username = ""
password = ""

cookie_filename = "parser.cookies.txt"


class LinkedInParser(object):

    def __init__(self, login, password):
        """ Start up... """
        self.login = login
        self.password = password

        # Simulate browser with cookies enabled
        self.cj = cookielib.MozillaCookieJar(cookie_filename)
        if os.access(cookie_filename, os.F_OK):
            self.cj.load()
        self.opener = urllib.request.build_opener(
            urllib.request.HTTPRedirectHandler(),
            urllib.request.HTTPHandler(debuglevel=0),
            urllib.request.HTTPSHandler(debuglevel=0),
            urllib.request.HTTPCookieProcessor(self.cj)
        )
        self.opener.addheaders = [
            ('User-agent', 'Mozilla/5.0')
        ]

        # Login
        self.loginPage()

        title = self.loadTitle()
        print(title)

        # self.cj.save()

    def loadPage(self, url, data=None):
        """
        Utility function to load HTML from URLs for us with hack to continue despite 404
        """
        # We'll print the url in case of infinite loop
        # print "Loading URL: %s" % url
        try:
            if data is not None:
                response = self.opener.open(url, data)
            else:
                response = self.opener.open(url)
            content = ''.join([str(l) for l in response.readlines()])
            print("Page loaded: %s \n Content: %s \n" % (url, content))
            return content
        except Exception as e:
            # If URL doesn't load for ANY reason, try again...
            # Quick and dirty solution for 404 returns because of network problems
            # However, this could infinite loop if there's an actual problem
            print("Exception on %s load: %s" % (url, e))
            # return self.loadPage(url, data)

    def loadSoup(self, url, data=None):
        """
        Combine loading of URL, HTML, and parsing with BeautifulSoup
        """
        html = self.loadPage(url, data)
        soup = BeautifulSoup(html, "html5lib")
        return soup

    def loginPage(self):
        """
        Handle login. This should populate our cookie jar.
        """
        soup = self.loadSoup("https://www.linkedin.com/login")
        loginCsrfParam = soup.find("input", {"name": "loginCsrfParam"})['value']
        csrfToken = soup.find("input", {"name": "csrfToken"})['value']
        sIdString = soup.find("input", {"name": "sIdString"})['value']
        print("loginCsrfParam: %s" % loginCsrfParam)
        print("csrfToken: %s" % csrfToken)
        print("sIdString: %s" % sIdString)
        login_data = urllib.parse.urlencode({
            'session_key': self.login,
            'session_password': self.password,
            'loginCsrfParam': loginCsrfParam,
            'csrfToken': csrfToken,
            'sIdString': sIdString
        }).encode('utf8')

        self.loadPage("https://www.linkedin.com/checkpoint/lg/login-submit", login_data)

    def loadTitle(self):
        soup = self.loadSoup("https://www.linkedin.com/feed/")
        return soup.find("title")


parser = LinkedInParser(username, password)

Matildamatilde answered 22/8, 2020 at 21:18 Comment(2)

This won't work with sales nav premium accounts as some cookies are missing like li_a . Any idea how to get those? I tried loading sales navigator search URL but it always gives 403. – Booby 5/9, 2020 at 9:8

Thanks, I get as title <title>LinkedIn</title> although the title should show my personal feeds.This is not needed: 'csrfToken': csrfToken, 'sIdString': sIdString and self.opener.addheaders = [('User-agent', 'Mozilla/5.0')] and not needed: if os.access(cookie_filename, os.F_OK): self.cj.load() – Cailean 30/1, 2021 at 16:31

The OP's solution worked for me with only a very slight modification.

Change 'session-key' to 'session_key' and change 'session-password' to session_password.'

Other than that, the code is good as it stands.

Malcolm answered 24/1, 2014 at 11:27 Comment(0)

Hot tags

Godot Unity Godot Help Programming Godot 4.X GUI GDScript 3D 2D Physics CSharp Godot 3.X VR XR Projects C++

Recommended topics

Hot tags