Evaluating a mathematical expression in a string
Asked Answered
A

15

169
stringExp = "2^4"
intVal = int(stringExp)      # Expected value: 16

This returns the following error:

Traceback (most recent call last):  
File "<stdin>", line 1, in <module>
ValueError: invalid literal for int()
with base 10: '2^4'

I know that eval can work around this, but isn't there a better and - more importantly - safer method to evaluate a mathematical expression that is being stored in a string?

Absentee answered 3/3, 2010 at 13:10 Comment(5)
^ is the XOR operator. Expected value is 6. You probably want pow(2,4).Tullusus
or more pythonically 2**4Lockridge
If you don't want to use eval, then the only solution is to implement the appropriate grammar parser. Have a look at pyparsing.Tullusus
For simple operations you can check out this code github.com/louisfisch/mathematical-expressions-parserLara
Either you should take @fortran's approach, or you need to have your own parser and evaluator for custom operators.Croak
S
133

Pyparsing can be used to parse mathematical expressions. In particular, fourFn.py shows how to parse basic arithmetic expressions. Below, I've rewrapped fourFn into a numeric parser class for easier reuse.

from __future__ import division
from pyparsing import (Literal, CaselessLiteral, Word, Combine, Group, Optional,
                       ZeroOrMore, Forward, nums, alphas, oneOf)
import math
import operator

__author__ = 'Paul McGuire'
__version__ = '$Revision: 0.0 $'
__date__ = '$Date: 2009-03-20 $'
__source__ = '''http://pyparsing.wikispaces.com/file/view/fourFn.py
http://pyparsing.wikispaces.com/message/view/home/15549426
'''
__note__ = '''
All I've done is rewrap Paul McGuire's fourFn.py as a class, so I can use it
more easily in other places.
'''


class NumericStringParser(object):
    '''
    Most of this code comes from the fourFn.py pyparsing example

    '''

    def pushFirst(self, strg, loc, toks):
        self.exprStack.append(toks[0])

    def pushUMinus(self, strg, loc, toks):
        if toks and toks[0] == '-':
            self.exprStack.append('unary -')

    def __init__(self):
        """
        expop   :: '^'
        multop  :: '*' | '/'
        addop   :: '+' | '-'
        integer :: ['+' | '-'] '0'..'9'+
        atom    :: PI | E | real | fn '(' expr ')' | '(' expr ')'
        factor  :: atom [ expop factor ]*
        term    :: factor [ multop factor ]*
        expr    :: term [ addop term ]*
        """
        point = Literal(".")
        e = CaselessLiteral("E")
        fnumber = Combine(Word("+-" + nums, nums) +
                          Optional(point + Optional(Word(nums))) +
                          Optional(e + Word("+-" + nums, nums)))
        ident = Word(alphas, alphas + nums + "_$")
        plus = Literal("+")
        minus = Literal("-")
        mult = Literal("*")
        div = Literal("/")
        lpar = Literal("(").suppress()
        rpar = Literal(")").suppress()
        addop = plus | minus
        multop = mult | div
        expop = Literal("^")
        pi = CaselessLiteral("PI")
        expr = Forward()
        atom = ((Optional(oneOf("- +")) +
                 (ident + lpar + expr + rpar | pi | e | fnumber).setParseAction(self.pushFirst))
                | Optional(oneOf("- +")) + Group(lpar + expr + rpar)
                ).setParseAction(self.pushUMinus)
        # by defining exponentiation as "atom [ ^ factor ]..." instead of
        # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right
        # that is, 2^3^2 = 2^(3^2), not (2^3)^2.
        factor = Forward()
        factor << atom + \
            ZeroOrMore((expop + factor).setParseAction(self.pushFirst))
        term = factor + \
            ZeroOrMore((multop + factor).setParseAction(self.pushFirst))
        expr << term + \
            ZeroOrMore((addop + term).setParseAction(self.pushFirst))
        # addop_term = ( addop + term ).setParseAction( self.pushFirst )
        # general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term)
        # expr <<  general_term
        self.bnf = expr
        # map operator symbols to corresponding arithmetic operations
        epsilon = 1e-12
        self.opn = {"+": operator.add,
                    "-": operator.sub,
                    "*": operator.mul,
                    "/": operator.truediv,
                    "^": operator.pow}
        self.fn = {"sin": math.sin,
                   "cos": math.cos,
                   "tan": math.tan,
                   "exp": math.exp,
                   "abs": abs,
                   "trunc": lambda a: int(a),
                   "round": round,
                   "sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0}

    def evaluateStack(self, s):
        op = s.pop()
        if op == 'unary -':
            return -self.evaluateStack(s)
        if op in "+-*/^":
            op2 = self.evaluateStack(s)
            op1 = self.evaluateStack(s)
            return self.opn[op](op1, op2)
        elif op == "PI":
            return math.pi  # 3.1415926535
        elif op == "E":
            return math.e  # 2.718281828
        elif op in self.fn:
            return self.fn[op](self.evaluateStack(s))
        elif op[0].isalpha():
            return 0
        else:
            return float(op)

    def eval(self, num_string, parseAll=True):
        self.exprStack = []
        results = self.bnf.parseString(num_string, parseAll)
        val = self.evaluateStack(self.exprStack[:])
        return val

You can use it like this

nsp = NumericStringParser()
result = nsp.eval('2^4')
print(result)
# 16.0

result = nsp.eval('exp(2^4)')
print(result)
# 8886110.520507872
Surinam answered 3/3, 2010 at 13:52 Comment(0)
M
265

eval is evil

eval("__import__('os').remove('important file')") # arbitrary commands
eval("9**9**9**9**9**9**9**9", {'__builtins__': None}) # CPU, memory

Note: even if you use set __builtins__ to None it still might be possible to break out using introspection:

eval('(1).__class__.__bases__[0].__subclasses__()', {'__builtins__': None})

Evaluate arithmetic expression using ast

import ast
import operator as op

# supported operators
operators = {ast.Add: op.add, ast.Sub: op.sub, ast.Mult: op.mul,
             ast.Div: op.truediv, ast.Pow: op.pow, ast.BitXor: op.xor,
             ast.USub: op.neg}

def eval_expr(expr):
    """
    >>> eval_expr('2^6')
    4
    >>> eval_expr('2**6')
    64
    >>> eval_expr('1 + 2*3**(4^5) / (6 + -7)')
    -5.0
    """
    return eval_(ast.parse(expr, mode='eval').body)

def eval_(node):
    match node:
        case ast.Constant(value) if isinstance(value, int):
            return value  # integer
        case ast.BinOp(left, op, right):
            return operators[type(op)](eval_(left), eval_(right))
        case ast.UnaryOp(op, operand):  # e.g., -1
            return operators[type(op)](eval_(operand))
        case _:
            raise TypeError(node)

You can easily limit allowed range for each operation or any intermediate result, e.g., to limit input arguments for a**b:

def power(a, b):
    if any(abs(n) > 100 for n in [a, b]):
        raise ValueError((a,b))
    return op.pow(a, b)
operators[ast.Pow] = power

Or to limit magnitude of intermediate results:

import functools

def limit(max_=None):
    """Return decorator that limits allowed returned values."""
    def decorator(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            ret = func(*args, **kwargs)
            try:
                mag = abs(ret)
            except TypeError:
                pass # not applicable
            else:
                if mag > max_:
                    raise ValueError(ret)
            return ret
        return wrapper
    return decorator

eval_ = limit(max_=10**100)(eval_)

Example

>>> evil = "__import__('os').remove('important file')"
>>> eval_expr(evil) #doctest:+IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
TypeError:
>>> eval_expr("9**9")
387420489
>>> eval_expr("9**9**9**9**9**9**9**9") #doctest:+IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
ValueError:
Magel answered 4/3, 2012 at 19:15 Comment(15)
Very cool post, Thanks. I've taken that concept, and tried to make a library which should be easy to use: github.com/danthedeckie/simpleevalPredicate
can this be extended for functions of import math?Eckblad
@Eckblad yes, it can be extended to anything that uses Python syntax.Magel
@J.F. Sebastian: If you know this by heart, could you extend your example?Eckblad
@Hotschke: Just look at ast.dump(ast.parse(python_exression, mode='eval')) output and extend eval_() to handle ast.Call type. Note: if something can be done; it doesn't mean that it should be done.Magel
@J.F.Sebastian: What do you mean in particular by 'doesn't mean that it should be done' ?Eckblad
@Hotschke: it means that there could be better alternatives. Solution that is appropriate to evaluate a simple arithmetic expression might not be the best one in a more complex case. What is better depends on a particular use-case.Magel
Here is also my blog post about the matter which expands the answer a bit to cover functions, etc. opensourcehacker.com/2014/10/29/…Footlights
Note that ast.parse is not safe. For example ast.parse('()' * 1000000, '<string>', 'single') crashes the interpreter.Septuple
@AnttiHaapala good example. Is it a bug in Python interpreter? Anyway, large input is trivially handled e.g., using if len(expr) > 10000: raise ValueError.Magel
It is just an example. There are other critical parser bugs all the time, and these are not considered security bugs, and will never be fixed in old minor versions.Septuple
@AnttiHaapala could you provide an example that can't be fixed using the len(expr) check? Or your point is that there are bugs in Python implementation and therefore it is impossible to write safe code in general?Magel
I would like to incorporate code derived from this in the MIT licensed typhon library. Would you be willing to relicense your code under an MIT-compatible license so that I can legally do so?Thais
@DanielFairhead Regarding your decision not to include bitwise xor by default in your parser - would you consider adding an optional parameter to SimpleEval()? So that for instance, we could write s = SimpleEval(includeBitwiseXor=True) instead of s = SimpleEval(); s.operators[ast.BitXor] = operator.xorStratfordonavon
I like the ast solution, but it should be noted, that this works for Python>=3.10. As I had to run my code with Python-3.9, I got a syntax error because of case statement. I had to change that to if isinstance(node, ast.xxx) blocks. Otherwise a very elegant solution.Oosphere
S
133

Pyparsing can be used to parse mathematical expressions. In particular, fourFn.py shows how to parse basic arithmetic expressions. Below, I've rewrapped fourFn into a numeric parser class for easier reuse.

from __future__ import division
from pyparsing import (Literal, CaselessLiteral, Word, Combine, Group, Optional,
                       ZeroOrMore, Forward, nums, alphas, oneOf)
import math
import operator

__author__ = 'Paul McGuire'
__version__ = '$Revision: 0.0 $'
__date__ = '$Date: 2009-03-20 $'
__source__ = '''http://pyparsing.wikispaces.com/file/view/fourFn.py
http://pyparsing.wikispaces.com/message/view/home/15549426
'''
__note__ = '''
All I've done is rewrap Paul McGuire's fourFn.py as a class, so I can use it
more easily in other places.
'''


class NumericStringParser(object):
    '''
    Most of this code comes from the fourFn.py pyparsing example

    '''

    def pushFirst(self, strg, loc, toks):
        self.exprStack.append(toks[0])

    def pushUMinus(self, strg, loc, toks):
        if toks and toks[0] == '-':
            self.exprStack.append('unary -')

    def __init__(self):
        """
        expop   :: '^'
        multop  :: '*' | '/'
        addop   :: '+' | '-'
        integer :: ['+' | '-'] '0'..'9'+
        atom    :: PI | E | real | fn '(' expr ')' | '(' expr ')'
        factor  :: atom [ expop factor ]*
        term    :: factor [ multop factor ]*
        expr    :: term [ addop term ]*
        """
        point = Literal(".")
        e = CaselessLiteral("E")
        fnumber = Combine(Word("+-" + nums, nums) +
                          Optional(point + Optional(Word(nums))) +
                          Optional(e + Word("+-" + nums, nums)))
        ident = Word(alphas, alphas + nums + "_$")
        plus = Literal("+")
        minus = Literal("-")
        mult = Literal("*")
        div = Literal("/")
        lpar = Literal("(").suppress()
        rpar = Literal(")").suppress()
        addop = plus | minus
        multop = mult | div
        expop = Literal("^")
        pi = CaselessLiteral("PI")
        expr = Forward()
        atom = ((Optional(oneOf("- +")) +
                 (ident + lpar + expr + rpar | pi | e | fnumber).setParseAction(self.pushFirst))
                | Optional(oneOf("- +")) + Group(lpar + expr + rpar)
                ).setParseAction(self.pushUMinus)
        # by defining exponentiation as "atom [ ^ factor ]..." instead of
        # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right
        # that is, 2^3^2 = 2^(3^2), not (2^3)^2.
        factor = Forward()
        factor << atom + \
            ZeroOrMore((expop + factor).setParseAction(self.pushFirst))
        term = factor + \
            ZeroOrMore((multop + factor).setParseAction(self.pushFirst))
        expr << term + \
            ZeroOrMore((addop + term).setParseAction(self.pushFirst))
        # addop_term = ( addop + term ).setParseAction( self.pushFirst )
        # general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term)
        # expr <<  general_term
        self.bnf = expr
        # map operator symbols to corresponding arithmetic operations
        epsilon = 1e-12
        self.opn = {"+": operator.add,
                    "-": operator.sub,
                    "*": operator.mul,
                    "/": operator.truediv,
                    "^": operator.pow}
        self.fn = {"sin": math.sin,
                   "cos": math.cos,
                   "tan": math.tan,
                   "exp": math.exp,
                   "abs": abs,
                   "trunc": lambda a: int(a),
                   "round": round,
                   "sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0}

    def evaluateStack(self, s):
        op = s.pop()
        if op == 'unary -':
            return -self.evaluateStack(s)
        if op in "+-*/^":
            op2 = self.evaluateStack(s)
            op1 = self.evaluateStack(s)
            return self.opn[op](op1, op2)
        elif op == "PI":
            return math.pi  # 3.1415926535
        elif op == "E":
            return math.e  # 2.718281828
        elif op in self.fn:
            return self.fn[op](self.evaluateStack(s))
        elif op[0].isalpha():
            return 0
        else:
            return float(op)

    def eval(self, num_string, parseAll=True):
        self.exprStack = []
        results = self.bnf.parseString(num_string, parseAll)
        val = self.evaluateStack(self.exprStack[:])
        return val

You can use it like this

nsp = NumericStringParser()
result = nsp.eval('2^4')
print(result)
# 16.0

result = nsp.eval('exp(2^4)')
print(result)
# 8886110.520507872
Surinam answered 3/3, 2010 at 13:52 Comment(0)
H
14

Some safer alternatives to eval() and sympy.sympify().evalf()*:

*SymPy sympify is also unsafe according to the following warning from the documentation.

Warning: Note that this function uses eval, and thus shouldn’t be used on unsanitized input.

Hoax answered 7/8, 2013 at 6:28 Comment(0)
U
12

The reason eval and exec are so dangerous is that the default compile function will generate bytecode for any valid python expression, and the default eval or exec will execute any valid python bytecode. All the answers to date have focused on restricting the bytecode that can be generated (by sanitizing input) or building your own domain-specific-language using the AST.

Instead, you can easily create a simple eval function that is incapable of doing anything nefarious and can easily have runtime checks on memory or time used. Of course, if it is simple math, than there is a shortcut.

c = compile(stringExp, 'userinput', 'eval')
if c.co_code[0]==b'd' and c.co_code[3]==b'S':
    return c.co_consts[ord(c.co_code[1])+ord(c.co_code[2])*256]

The way this works is simple, any constant mathematic expression is safely evaluated during compilation and stored as a constant. The code object returned by compile consists of d, which is the bytecode for LOAD_CONST, followed by the number of the constant to load (usually the last one in the list), followed by S, which is the bytecode for RETURN_VALUE. If this shortcut doesn't work, it means that the user input isn't a constant expression (contains a variable or function call or similar).

This also opens the door to some more sophisticated input formats. For example:

stringExp = "1 + cos(2)"

This requires actually evaluating the bytecode, which is still quite simple. Python bytecode is a stack oriented language, so everything is a simple matter of TOS=stack.pop(); op(TOS); stack.put(TOS) or similar. The key is to only implement the opcodes that are safe (loading/storing values, math operations, returning values) and not unsafe ones (attribute lookup). If you want the user to be able to call functions (the whole reason not to use the shortcut above), simple make your implementation of CALL_FUNCTION only allow functions in a 'safe' list.

from dis import opmap
from Queue import LifoQueue
from math import sin,cos
import operator

globs = {'sin':sin, 'cos':cos}
safe = globs.values()

stack = LifoQueue()

class BINARY(object):
    def __init__(self, operator):
        self.op=operator
    def __call__(self, context):
        stack.put(self.op(stack.get(),stack.get()))

class UNARY(object):
    def __init__(self, operator):
        self.op=operator
    def __call__(self, context):
        stack.put(self.op(stack.get()))


def CALL_FUNCTION(context, arg):
    argc = arg[0]+arg[1]*256
    args = [stack.get() for i in range(argc)]
    func = stack.get()
    if func not in safe:
        raise TypeError("Function %r now allowed"%func)
    stack.put(func(*args))

def LOAD_CONST(context, arg):
    cons = arg[0]+arg[1]*256
    stack.put(context['code'].co_consts[cons])

def LOAD_NAME(context, arg):
    name_num = arg[0]+arg[1]*256
    name = context['code'].co_names[name_num]
    if name in context['locals']:
        stack.put(context['locals'][name])
    else:
        stack.put(context['globals'][name])

def RETURN_VALUE(context):
    return stack.get()

opfuncs = {
    opmap['BINARY_ADD']: BINARY(operator.add),
    opmap['UNARY_INVERT']: UNARY(operator.invert),
    opmap['CALL_FUNCTION']: CALL_FUNCTION,
    opmap['LOAD_CONST']: LOAD_CONST,
    opmap['LOAD_NAME']: LOAD_NAME
    opmap['RETURN_VALUE']: RETURN_VALUE,
}

def VMeval(c):
    context = dict(locals={}, globals=globs, code=c)
    bci = iter(c.co_code)
    for bytecode in bci:
        func = opfuncs[ord(bytecode)]
        if func.func_code.co_argcount==1:
            ret = func(context)
        else:
            args = ord(bci.next()), ord(bci.next())
            ret = func(context, args)
        if ret:
            return ret

def evaluate(expr):
    return VMeval(compile(expr, 'userinput', 'eval'))

Obviously, the real version of this would be a bit longer (there are 119 opcodes, 24 of which are math related). Adding STORE_FAST and a couple others would allow for input like 'x=5;return x+x or similar, trivially easily. It can even be used to execute user-created functions, so long as the user created functions are themselves executed via VMeval (don't make them callable!!! or they could get used as a callback somewhere). Handling loops requires support for the goto bytecodes, which means changing from a for iterator to while and maintaining a pointer to the current instruction, but isn't too hard. For resistance to DOS, the main loop should check how much time has passed since the start of the calculation, and certain operators should deny input over some reasonable limit (BINARY_POWER being the most obvious).

While this approach is somewhat longer than a simple grammar parser for simple expressions (see above about just grabbing the compiled constant), it extends easily to more complicated input, and doesn't require dealing with grammar (compile take anything arbitrarily complicated and reduces it to a sequence of simple instructions).

Uniformed answered 11/3, 2016 at 0:34 Comment(3)
Thanks for this amazing shortcut! But there's a bug, at least in Python 3.6: c.co_code[0]==b'd' always evaluates as False, because, oddly, b'foo'[0] evaluates as int, not bytes. A fix is to either use c.co_code[0:1]==b'd' or chr(c.co_code[0])=='d'Elsaelsbeth
While trying to make the shortcut work I've stumbled on other issues, so I've created a function that works on Python 3.6+ and tries to cover some corner cases: https://mcmap.net/q/41518/-evaluating-a-mathematical-expression-in-a-stringElsaelsbeth
I wrote this almost 5 years ago... I don't even remember what python version I was targeting, quite possibly 2.7. The faster comparison would be c.co_code[0]==100. I pretty much never need just math input, so the second approach is what I've actually continued using.Uniformed
U
11

Okay, so the problem with eval is that it can escape its sandbox too easily, even if you get rid of __builtins__. All the methods for escaping the sandbox come down to using getattr or object.__getattribute__ (via the . operator) to obtain a reference to some dangerous object via some allowed object (''.__class__.__bases__[0].__subclasses__ or similar). getattr is eliminated by setting __builtins__ to None. object.__getattribute__ is the difficult one, since it cannot simply be removed, both because object is immutable and because removing it would break everything. However, __getattribute__ is only accessible via the . operator, so purging that from your input is sufficient to ensure eval cannot escape its sandbox.
In processing formulas, the only valid use of a decimal is when it is preceded or followed by [0-9], so we just remove all other instances of ..

import re
inp = re.sub(r"\.(?![0-9])","", inp)
val = eval(inp, {'__builtins__':None})

Note that while python normally treats 1 + 1. as 1 + 1.0, this will remove the trailing . and leave you with 1 + 1. You could add ),, and EOF to the list of things allowed to follow ., but why bother?

Uniformed answered 21/8, 2014 at 23:59 Comment(4)
A related question with interesting discussion can be found here.Heinrich
Whether or not the argument about removing . is correct at the moment, this leaves the potential for security vulnerabilities if future versions of Python to introduce new syntax allowing unsafe objects or functions to be accessed some other way. This solution is already unsafe in Python 3.6 because of f-strings, which allow the following attack: f"{eval('()' + chr(46) + '__class__')}". A solution based on whitelisting rather than blacklisting will be safer, but really it's better to solve this problem without eval at all.Cilo
That's an excellent point about future language features introducing new security issues.Uniformed
That said, f-strings don't presently break this. f-strings are syntactic sugar around an explicit string format call, which is compiled into multi-step bytecode inside .py files. While it might be possible to exploit in some way, trying that f-string as input to the eval function above dies on a key error, because it relies on a function call to get the .. I expect an explot taking advantage of python's unicode library to obtain the '.' is more likely.Uniformed
P
9

You can use the ast module and write a NodeVisitor that verifies that the type of each node is part of a whitelist.

import ast, math

locals =  {key: value for (key,value) in vars(math).items() if key[0] != '_'}
locals.update({"abs": abs, "complex": complex, "min": min, "max": max, "pow": pow, "round": round})

class Visitor(ast.NodeVisitor):
    def visit(self, node):
       if not isinstance(node, self.whitelist):
           raise ValueError(node)
       return super().visit(node)

    whitelist = (ast.Module, ast.Expr, ast.Load, ast.Expression, ast.Add, ast.Sub, ast.UnaryOp, ast.Num, ast.BinOp,
            ast.Mult, ast.Div, ast.Pow, ast.BitOr, ast.BitAnd, ast.BitXor, ast.USub, ast.UAdd, ast.FloorDiv, ast.Mod,
            ast.LShift, ast.RShift, ast.Invert, ast.Call, ast.Name)

def evaluate(expr, locals = {}):
    if any(elem in expr for elem in '\n#') : raise ValueError(expr)
    try:
        node = ast.parse(expr.strip(), mode='eval')
        Visitor().visit(node)
        return eval(compile(node, "<string>", "eval"), {'__builtins__': None}, locals)
    except Exception: raise ValueError(expr)

Because it works via a whitelist rather than a blacklist, it is safe. The only functions and variables it can access are those you explicitly give it access to. I populated a dict with math-related functions so you can easily provide access to those if you want, but you have to explicitly use it.

If the string attempts to call functions that haven't been provided, or invoke any methods, an exception will be raised, and it will not be executed.

Because this uses Python's built in parser and evaluator, it also inherits Python's precedence and promotion rules as well.

>>> evaluate("7 + 9 * (2 << 2)")
79
>>> evaluate("6 // 2 + 0.0")
3.0

The above code has only been tested on Python 3.

If desired, you can add a timeout decorator on this function.

Pyrognostics answered 28/5, 2015 at 20:13 Comment(1)
Looking back on this, it would be better to use ast.walk than to (ab)use ast.NodeVisitor in this way. Just mentioning this in a comment since I'm not sure on the etiquette of changing answers in this way after the fact.Pyrognostics
E
7

Based on Perkins' amazing approach, I've updated and improved his "shortcut" for simple algebraic expressions (no functions or variables). Now it works on Python 3.6+ and avoids some pitfalls:

import re

# Kept outside simple_eval() just for performance
_re_simple_eval = re.compile(rb'd([\x00-\xFF]+)S\x00')

def simple_eval(expr):
    try:
        c = compile(expr, 'userinput', 'eval')
    except SyntaxError:
        raise ValueError(f"Malformed expression: {expr}")
    m = _re_simple_eval.fullmatch(c.co_code)
    if not m:
        raise ValueError(f"Not a simple algebraic expression: {expr}")
    try:
        return c.co_consts[int.from_bytes(m.group(1), sys.byteorder)]
    except IndexError:
        raise ValueError(f"Expression not evaluated as constant: {expr}")

Testing, using some of the examples in other answers:

for expr, res in (
    ('2^4',                         6      ),
    ('2**4',                       16      ),
    ('1 + 2*3**(4^5) / (6 + -7)',  -5.0    ),
    ('7 + 9 * (2 << 2)',           79      ),
    ('6 // 2 + 0.0',                3.0    ),
    ('2+3',                         5      ),
    ('6+4/2*2',                    10.0    ),
    ('3+2.45/8',                    3.30625),
    ('3**3*3/3+3',                 30.0    ),
):
    result = simple_eval(expr)
    ok = (result == res and type(result) == type(res))
    print("{} {} = {}".format("OK!" if ok else "FAIL!", expr, result))
OK! 2^4 = 6
OK! 2**4 = 16
OK! 1 + 2*3**(4^5) / (6 + -7) = -5.0
OK! 7 + 9 * (2 << 2) = 79
OK! 6 // 2 + 0.0 = 3.0
OK! 2+3 = 5
OK! 6+4/2*2 = 10.0
OK! 3+2.45/8 = 3.30625
OK! 3**3*3/3+3 = 30.0

Testing bad input:

for expr in (
    'foo bar',
    'print("hi")',
    '2*x',
    'lambda: 10',
    '2**1234',
):
    try:
        result = simple_eval(expr)
    except ValueError as e:
        print(e)
        continue
    print("OK!")  # will never happen
Malformed expression: foo bar
Not a simple algebraic expression: print("hi")
Expression not evaluated as constant: 2*x
Expression not evaluated as constant: lambda: 10
Expression not evaluated as constant: 2**1234
Elsaelsbeth answered 28/1, 2021 at 22:11 Comment(3)
This is excellent. Much cleaner than my approach was. Note that feeding lambda: 10 as the input causes an unhandled exception, as your regex finds a return statement, but it doesn't match a constant return. There are a few solutions to it (look for MAKE_FUNCTION opcode, detect too-long of LOAD_CONST). Also worth noting that this is still suscpetible to a DoS attack (which needs external mitigation).Uniformed
It's worth noting that this relies on the fact that whatever Python implementation is used, it must implement constant folding (which, admittedly, the popular ones do). However, not all constant expressions are optimized that way, for example 2**12345 is left as is for evaluation at runtime. In this case your function encounters a IndexError: tuple index out of range.Guria
Thanks @a_guest! I've updated the solution to catch this case!Elsaelsbeth
P
6

I think I would use eval(), but would first check to make sure the string is a valid mathematical expression, as opposed to something malicious. You could use a regex for the validation.

eval() also takes additional arguments which you can use to restrict the namespace it operates in for greater security.

Papist answered 3/3, 2010 at 13:22 Comment(6)
But, of course, don't rely on regular expressions to validate arbitrary mathematical expressions.Clown
@High-Performance Mark: Yes, I guess it depends on what sort of mathematical expressions he has in mind . . . e.g., just simple arithmetic with numbers and +,-,*,/,**,(,) or something more complicatedPapist
@Tim -- it's the () I'm worried about, or rather the (((((())))))). In truth, I think OP should worry about them, my brow is unfurrowed by OP's problems.Clown
@Mark: Yeah, that could get kind of hairy . . . . Maybe it'd be simpler to just make sure that the string contains only those characters (in whatever order), and then run it in a try-except. Of course one could also use a parser for mathematical expressions, as others have suggested.Papist
Don't use eval() if you don't control input even if you restrict the namespace e.g., eval("9**9**9**9**9**9**9**9", {'__builtins__': None}) consumes CPU, memory.Magel
Restricting the namespace of eval does not add to security.Septuple
C
6

[I know this is an old question, but it is worth pointing out new useful solutions as they pop up]

Since python3.6, this capability is now built into the language, coined "f-strings".

See: PEP 498 -- Literal String Interpolation

For example (note the f prefix):

f'{2**4}'
=> '16'
Comedic answered 16/12, 2016 at 16:12 Comment(3)
Very interesting link. But I guess f-strings are here to make writing source code easier, while the question seems to be about working with strings inside of variables (possibly from untrusted sources). f-strings cannot be used in that case.Shanleigh
is there any way to do something to the effect of f'{2{operator}4}' where you can now assign the operator to do 2+4 or 2*4 or 2-4 or etcNissen
This is practically equivalent to just doing str(eval(...)), so it is certainly not safer than eval.Cilo
D
4

This is a massively late reply, but I think useful for future reference. Rather than write your own math parser (although the pyparsing example above is great) you could use SymPy. I don't have a lot of experience with it, but it contains a much more powerful math engine than anyone is likely to write for a specific application and the basic expression evaluation is very easy:

>>> import sympy
>>> x, y, z = sympy.symbols('x y z')
>>> sympy.sympify("x**3 + sin(y)").evalf(subs={x:1, y:-3})
0.858879991940133

Very cool indeed! A from sympy import * brings in a lot more function support, such as trig functions, special functions, etc., but I've avoided that here to show what's coming from where.

Derwon answered 4/3, 2012 at 16:8 Comment(3)
Is sympy "safe"? There seem to be numerous posts that suggest it is a wrapper around eval() that could be exploited in the same way. Also evalf doesn't take numpy ndarrays.Hoax
No sympy is not safe for untrusted input. Try sympy.sympify("""[].__class__.__base__.__subclasses__()[158]('ls')""") this calls subprocess.Popen() which I passed ls instead of rm -rf /. The index will probably be different on other computers. This is a variant of the Ned Batchelder exploitHoax
Indeed, it does not add to safety at all.Septuple
S
3

Using lark parser library

from operator import add, sub, mul, truediv, neg, pow
from lark import Lark, Transformer, v_args

calc_grammar = f"""
    ?start: sum
    ?sum: product
        | sum "+" product   -> {add.__name__}
        | sum "-" product   -> {sub.__name__}
    ?product: power
        | product "*" power  -> {mul.__name__}
        | product "/" power  -> {truediv.__name__}
    ?power: atom
        | power "^" atom -> {pow.__name__}
    ?atom: NUMBER           -> number
         | "-" atom         -> {neg.__name__}
         | "(" sum ")"

    %import common.NUMBER
    %import common.WS_INLINE

    %ignore WS_INLINE
"""


@v_args(inline=True)
class CalculateTree(Transformer):
    add = add
    sub = sub
    neg = neg
    mul = mul
    truediv = truediv
    pow = pow
    number = float


calc_parser = Lark(calc_grammar, parser="lalr", transformer=CalculateTree())
calc = calc_parser.parse


def eval_expr(expression: str) -> float:
    return calc(expression)


print(eval_expr("2^4"))
print(eval_expr("-1*2^4"))
print(eval_expr("-2^3 + 1"))
print(eval_expr("2**4"))  # Error

Slavey answered 11/5, 2021 at 17:18 Comment(0)
I
1

Use eval in a clean namespace:

>>> ns = {'__builtins__': None}
>>> eval('2 ** 4', ns)
16

The clean namespace should prevent injection. For instance:

>>> eval('__builtins__.__import__("os").system("echo got through")', ns)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "<string>", line 1, in <module>
AttributeError: 'NoneType' object has no attribute '__import__'

Otherwise you would get:

>>> eval('__builtins__.__import__("os").system("echo got through")')
got through
0

You might want to give access to the math module:

>>> import math
>>> ns = vars(math).copy()
>>> ns['__builtins__'] = None
>>> eval('cos(pi/3)', ns)
0.50000000000000011
Impenitent answered 3/3, 2010 at 14:42 Comment(4)
eval("(1).__class__.__bases__[0].__subclasses__()[81]('echo got through'.split())",{'builtins':None}) #escapes your sandboxUniformed
Python 3.4: eval("""[i for i in (1).__class__.__bases__[0].__subclasses__() if i.__name__.endswith('BuiltinImporter')][0]().load_module('sys').modules['sys'].modules['os'].system('/bin/sh')""", {'__builtins__': None}) executes the bourne shell...Septuple
This is not safe. Malicious code can still be executed.Aton
This is not safe - well, I reckon it is just as safe as using bash overall. BTW: eval('math.sqrt(2.0)') <- "math." is required as written above.Argolis
T
0

Here's my solution to the problem without using eval. Works with Python2 and Python3. It doesn't work with negative numbers.

$ python -m pytest test.py

test.py

from solution import Solutions

class SolutionsTestCase(unittest.TestCase):
    def setUp(self):
        self.solutions = Solutions()

    def test_evaluate(self):
        expressions = [
            '2+3=5',
            '6+4/2*2=10',
            '3+2.45/8=3.30625',
            '3**3*3/3+3=30',
            '2^4=6'
        ]
        results = [x.split('=')[1] for x in expressions]
        for e in range(len(expressions)):
            if '.' in results[e]:
                results[e] = float(results[e])
            else:
                results[e] = int(results[e])
            self.assertEqual(
                results[e],
                self.solutions.evaluate(expressions[e])
            )

solution.py

class Solutions(object):
    def evaluate(self, exp):
        def format(res):
            if '.' in res:
                try:
                    res = float(res)
                except ValueError:
                    pass
            else:
                try:
                    res = int(res)
                except ValueError:
                    pass
            return res
        def splitter(item, op):
            mul = item.split(op)
            if len(mul) == 2:
                for x in ['^', '*', '/', '+', '-']:
                    if x in mul[0]:
                        mul = [mul[0].split(x)[1], mul[1]]
                    if x in mul[1]:
                        mul = [mul[0], mul[1].split(x)[0]]
            elif len(mul) > 2:
                pass
            else:
                pass
            for x in range(len(mul)):
                mul[x] = format(mul[x])
            return mul
        exp = exp.replace(' ', '')
        if '=' in exp:
            res = exp.split('=')[1]
            res = format(res)
            exp = exp.replace('=%s' % res, '')
        while '^' in exp:
            if '^' in exp:
                itm = splitter(exp, '^')
                res = itm[0] ^ itm[1]
                exp = exp.replace('%s^%s' % (str(itm[0]), str(itm[1])), str(res))
        while '**' in exp:
            if '**' in exp:
                itm = splitter(exp, '**')
                res = itm[0] ** itm[1]
                exp = exp.replace('%s**%s' % (str(itm[0]), str(itm[1])), str(res))
        while '/' in exp:
            if '/' in exp:
                itm = splitter(exp, '/')
                res = itm[0] / itm[1]
                exp = exp.replace('%s/%s' % (str(itm[0]), str(itm[1])), str(res))
        while '*' in exp:
            if '*' in exp:
                itm = splitter(exp, '*')
                res = itm[0] * itm[1]
                exp = exp.replace('%s*%s' % (str(itm[0]), str(itm[1])), str(res))
        while '+' in exp:
            if '+' in exp:
                itm = splitter(exp, '+')
                res = itm[0] + itm[1]
                exp = exp.replace('%s+%s' % (str(itm[0]), str(itm[1])), str(res))
        while '-' in exp:
            if '-' in exp:
                itm = splitter(exp, '-')
                res = itm[0] - itm[1]
                exp = exp.replace('%s-%s' % (str(itm[0]), str(itm[1])), str(res))

        return format(exp)
Trona answered 3/4, 2019 at 0:41 Comment(0)
N
0

I came here looking for a mathematic expression parser as well. Reading through some of the answers and looking up libraries, I came across py-expression which I am now using. It basically handles a lot of operators and formula constructs, but if you're missing something you can easily add new operators/functions to it.

The basic syntax is:

from py_expression.core import Exp
exp = Exp()

parsed_formula = exp.parse('a+4')

result = exp.eval(parsed_formula, {"a":2})

The only issue that I've had with it so far is that it doesn't come with built-in mathematical constants nor a mechanism to add them in. I just proposed a solution to that however: https://github.com/FlavioLionelRita/py-expression/issues/7

Ninnette answered 22/1, 2022 at 16:33 Comment(0)
E
0

What about implementing your own expression solver tailored on your particular problem. With scinumtools expression solver you can either use preexisting operators, or create your own from scratch.

from scinumtools.solver import *

with ExpressionSolver(AtomBase) as es:
    es.solve("sin(23) < 1 && 3*2 == 6 || !(23 > 43) && cos(0) == 1")

Here is a small example how to build your own operators:

class OperatorSquare(OperatorBase):   # operate from left side
    symbol: str = '~'
    def operate_unary(self, tokens):
        right = tokens.get_right()
        tokens.put_left(right*right)
class OperatorCube(OperatorBase):     # operate from right side
    symbol: str = '^'
    def operate_unary(self, tokens):
        left = tokens.get_left()
        tokens.put_left(left*left*left)
operators ={'square':OperatorSquare,'cube':OperatorCube,'add':OperatorAdd}
steps = [
    dict(operators=['square','cube'], otype=Otype.UNARY),
    dict(operators=['add'],           otype=Otype.BINARY),
]
with ExpressionSolver(AtomBase, operators, steps) as es:
    es.solve('~3 + 2^')
# will result in: Atom(17)

You can even change the behaviour of the atom:

class AtomCustom(AtomBase):
    value: str
    def __init__(self, value:str):
        self.value = str(value)
    def __add__(self, other):
        return AtomCustom(self.value + other.value)
    def __gt__(self, other):
        return AtomCustom(len(self.value) > len(other.value))
operators = {'add':OperatorAdd,'gt':OperatorGt,'par':OperatorPar}
steps = [
    dict(operators=['par'],  otype=Otype.ARGS),
    dict(operators=['add'],  otype=Otype.BINARY),
    dict(operators=['gt'],   otype=Otype.BINARY),
]
with ExpressionSolver(AtomCustom, operators, steps) as es:
    es.solve("(limit + 100 km/s) > (limit + 50000000000 km/s)")
# will result in: Atom('False')

By the way, I am the author and I would be very interested to hear your comments and suggestion to the scinumtools project. Check it out on GitHub, or PyPi

Euhemerism answered 21/11, 2023 at 16:29 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.