Not sure how this will compare with the other packages, but to a certain extent you need to parse the arguments being passed to cursor.execute
. This bit of pyparsing code looks for:
arguments using string interpolation
arguments using string concatenation with variable names
arguments that are just variable names
But sometimes arguments use string concatenation just to break up a long string into - if all the strings in the expression are literals being added together, there is no risk of SQL injection.
This pyparsing snippet will look for calls to cursor.execute, and then look for the at-risk argument forms:
from pyparsing import *
import re
identifier = Word(alphas, alphanums+'_')
integer = Word(nums)
LPAR,RPAR,PLUS,PERCENT = map(Literal, '()+%')
stringInterpRE = re.compile(r"%-?\d*\*?\.?\d*\*?s")
def containsStringInterpolation(s,l,tokens):
if not stringInterpRE.search(tokens[0]):
raise ParseException(s,l,"No string interpolation")
tupleContents = identifier | integer
tupleExpr = LPAR + delimitedList(tupleContents) + RPAR
stringInterpArg = identifier | tupleExpr
interpolatedString = originalTextFor(quotedString.copy().setParseAction(containsStringInterpolation) +
PERCENT + stringInterpArg)
stringTerm = interpolatedString | OneOrMore(quotedString.copy()) | identifier
stringTerm.setName("stringTerm")
unsafeStringExpr = (stringTerm + OneOrMore(PLUS + stringTerm)) | identifier | interpolatedString
def unsafeExpr(s,l,tokens):
if not any(term == interpolatedString or term == identifier
for term in tokens):
raise ParseException(s,l,"No unsafe string terms")
unsafeStringExpr.setParseAction(unsafeExpr)
unsafeStringExpr.setName("unsafeExpr")
func = Literal("cursor.execute")
statement = func + LPAR + unsafeStringExpr + RPAR
statement.setName("execute stmt")
#statement.ignore(pythonComment)
for tokens in statement.searchString(sample):
print ' '.join(tokens.asList())
This will scan through the following sample:
sample = """
import MySQLdb
def get_data(id):
db = MySQLdb.connect(db='TEST')
cursor = db.cursor()
cursor.execute("SELECT * FROM TEST WHERE ID = '%s' -- UNSAFE" % id)
cursor.execute("SELECT * FROM TEST WHERE ID = '" + id + "' -- UNSAFE")
cursor.execute(sqlVar + " -- UNSAFE")
cursor.execute("SELECT * FROM TEST WHERE ID = 'FRED' -- SAFE")
cursor.execute("SELECT * FROM TEST WHERE ID = " +
"'FRED' -- SAFE")
cursor.execute("SELECT * FROM TEST "
"WHERE ID = "
"'FRED' -- SAFE")
cursor.execute("SELECT * FROM TEST "
"WHERE ID = " +
"'%s' -- UNSAFE" % name)
return cursor.fetchall()
print(get_data(1))"""
and report these unsafe statements:
cursor.execute ( "SELECT * FROM TEST WHERE ID = '%s' -- UNSAFE" % id )
cursor.execute ( "SELECT * FROM TEST WHERE ID = '" + id + "' -- UNSAFE" )
cursor.execute ( sqlVar + " -- UNSAFE" )
cursor.execute ( "SELECT * FROM TEST " "WHERE ID = " + "'%s' -- UNSAFE" % name )
You can also have pyparsing report the location of the found lines, using scanString instead of searchString.