This is the solution I went with. For a discussion of the security of this approach, see
Thanks to arifwn, I got into exploring Python's ast
(abstract syntax tree) module. This module provides a class ast.NodeVisitor
for traversing the tree. This code subclasses NodeVisitor
to create a syntax checker that whitelists the code necessary for basic math. Function calls and names are specially monitored, as only certain functions should be allowed and only unused names should be permitted.
import ast
allowed_functions = set([
#math library
'acos', 'acosh', 'asin', 'asinh', 'atan', 'atan2', 'atanh',
'ceil', 'copysign', 'cos', 'cosh', 'degrees', 'e', 'erf',
'erfc', 'exp', 'expm1', 'fabs', 'factorial', 'floor', 'fmod',
'frexp', 'fsum', 'gamma', 'hypot', 'isinf', 'isnan', 'ldexp',
'lgamma', 'log', 'log10', 'log1p', 'modf', 'pi', 'pow', 'radians',
'sin', 'sinh', 'sqrt', 'tan', 'tanh', 'trunc',
#builtins
'abs', 'max', 'min', 'range', 'xrange'
])
allowed_node_types = set([
#Meta
'Module', 'Assign', 'Expr',
#Control
'For', 'If', 'Else',
#Data
'Store', 'Load', 'AugAssign', 'Subscript',
#Datatypes
'Num', 'Tuple', 'List',
#Operations
'BinOp', 'Add', 'Sub', 'Mult', 'Div', 'Mod', 'Compare'
])
safe_names = set([
'True', 'False', 'None'
])
class SyntaxChecker(ast.NodeVisitor):
def check(self, syntax):
tree = ast.parse(syntax)
self.passed=True
self.visit(tree)
def visit_Call(self, node):
if node.func.id not in allowed_functions:
raise SyntaxError("%s is not an allowed function!"%node.func.id)
else:
ast.NodeVisitor.generic_visit(self, node)
def visit_Name(self, node):
try:
eval(node.id)
except NameError:
ast.NodeVisitor.generic_visit(self, node)
else:
if node.id not in safe_names and node.id not in allowed_functions:
raise SyntaxError("%s is a reserved name!"%node.id)
else:
ast.NodeVisitor.generic_visit(self, node)
def generic_visit(self, node):
if type(node).__name__ not in allowed_node_types:
raise SyntaxError("%s is not allowed!"%type(node).__name__)
else:
ast.NodeVisitor.generic_visit(self, node)
if __name__ == '__main__':
x = SyntaxChecker()
while True:
try:
x.check(raw_input())
except Exception as e:
print e
Note that this is designed to accept only the mathematical part of the code, the function definition and return statement are provided.
This method of whitelisting all the required safe constructs and specifically whitelisting required unsafe constructs, could be modified to produce many useful subsets of Python; excellent for user scripts!
Note that in order for this to be executed securely, it should be in it's own thread with a timeout, to reduce name collisions and time out if the user code generates an infinite loop or similar.