view expr.py @ 3:5ac8eed85684

consolidate token classes
author Ted Mielczarek <ted.mielczarek@gmail.com>
date Thu, 02 Jun 2011 07:47:33 -0400
parents 94a293b914af
children a42bb6dc2fa7
line wrap: on
line source

#!/usr/bin/env python

# Implements a top-down parser/evaluator for simple boolean expressions.
# ideas taken from http://effbot.org/zone/simple-top-down-parsing.htm
#
# Rough grammar:
# expr := literal
#       | '(' expr ')'
#       | expr '&&' expr
#       | expr '||' expr
#       | expr '==' expr
#       | expr '!=' expr
# literal := BOOL
#          | INT
#          | STRING
#          | IDENT
# BOOL   := true|false
# INT    := [0-9]+
# STRING := "[^"]*"
# IDENT  := [A-Za-z_]\w*

# Identifiers take their values from a mapping dictionary passed as the second
# argument.

__all__ = ['parse', 'ParseError']
import re, unittest

# token classes
class ident_token:
    def __init__(self, value):
        self.value = value
    def nud(self, parser):
        # identifiers take their value from the value mappings passed
        # to the parser
        return parser.value(self.value)

class literal_token:
    def __init__(self, value):
        self.value = value
    def nud(self, parser):
        return self.value

class eq_op_token:
    "=="
    lbp = 20
    def led(self, parser, left):
        return left == parser.expression(self.lbp)
    
class neq_op_token:
    "!="
    lbp = 20
    def led(self, parser, left):
        return left != parser.expression(self.lbp)

class and_op_token:
    "&&"
    lbp = 11
    def led(self, parser, left):
        right = parser.expression(self.lbp)
        return left and right
    
class or_op_token:
    "||"
    lbp = 10
    def led(self, parser, left):
        right = parser.expression(self.lbp)
        return left or right

class lparen_token:
    "("
    lbp = 50
    def nud(self, parser):
        expr = parser.expression()
        parser.advance(rparen_token)
        return expr

class rparen_token:
    ")"
    lbp = 0

class end_token:
    # lowest left binding power, always ends parsing
    lbp = 0

class ParseError(Exception):
    pass

class ExpressionParser(object):
    def __init__(self, text, valuemapping):
        """
        Initialize the parser with input |text|, and |valuemapping| as
        a dict mapping identifier names to values.
        """
        self.text = text
        self.valuemapping = valuemapping

    def _tokenize(self):
        """
        Lex the input text into tokens and yield them in sequence.
        """
        # scanner callbacks
        def bool_(scanner, t): return literal_token({'true':True, 'false':False}[t])
        def identifier(scanner, t): return ident_token(t)
        def integer(scanner, t): return literal_token(int(t))
        def eq(scanner, t): return eq_op_token()
        def neq(scanner, t): return neq_op_token()
        def or_(scanner, t): return or_op_token()
        def and_(scanner, t): return and_op_token()
        def lparen(scanner, t): return lparen_token()
        def rparen(scanner, t): return rparen_token()
        def string_(scanner, t): return literal_token(t[1:-1])

        scanner = re.Scanner([
            (r"true|false", bool_),
            (r"[a-zA-Z_]\w*", identifier),
            (r"[0-9]+", integer),
            (r'"[^"]*"', string_),
            (r"==", eq),
            (r"!=", neq),
            (r"\|\|", or_),
            (r"&&", and_),
            (r"\(", lparen),
            (r"\)", rparen),
            (r"\s+", None), # skip whitespace
            ])
        tokens, remainder = scanner.scan(self.text)
        for t in tokens:
            yield t
        yield end_token()

    def value(self, ident):
        """
        Look up the value of |ident| in the value mapping passed in the
        constructor.
        """
        return self.valuemapping[ident]

    def advance(self, expected):
        """
        Assert that the next token is an instance of |expected|, and advance
        to the next token.
        """
        if not isinstance(self.token, expected):
            raise Exception, "Unexpected token!"
        self.token = self.iter.next()
        
    def expression(self, rbp=0):
        """
        Parse and return the value of an expression until a token with
        right binding power greater than rbp is encountered.
        """
        t = self.token
        self.token = self.iter.next()
        left = t.nud(self)
        while rbp < self.token.lbp:
            t = self.token
            self.token = self.iter.next()
            left = t.led(self, left)
        return left

    def parse(self):
        """
        Parse and return the value of the expression in the text
        passed to the constructor. Raises a ParseError if the expression
        could not be parsed.
        """
        try:
            self.iter = self._tokenize()
            self.token = self.iter.next()
            return self.expression()
        except:
            raise ParseError

def parse(text, values):
    """
    Parse and evaluate a boolean expression in |text|. Use |values| to look
    up the value of identifiers referenced in the expression. Returns the final
    value of the expression. A ParseError will be raised if parsing fails.
    """
    return ExpressionParser(text, values).parse()

class ExpressionParserUnittest(unittest.TestCase):
    def test_BasicValues(self):
        self.assertEqual(1, parse("1", {}))
        self.assertEqual(100, parse("100", {}))
        self.assertEqual(True, parse("true", {}))
        self.assertEqual(False, parse("false", {}))
        self.assertEqual("", parse('""', {}))
        self.assertEqual("foo bar", parse('"foo bar"', {}))
        self.assertEqual(1, parse("foo", {'foo':1}))
        self.assertEqual(True, parse("bar", {'bar':True}))
        self.assertEqual("xyz", parse("abc123", {'abc123':"xyz"}))

    def test_Equality(self):
        self.assertTrue(parse("true == true", {}))
        self.assertTrue(parse("false == false", {}))
        self.assertTrue(parse("false == false", {}))
        self.assertTrue(parse("1 == 1", {}))
        self.assertTrue(parse("100 == 100", {}))
        self.assertTrue(parse('"some text" == "some text"', {}))
        self.assertTrue(parse("true != false", {}))
        self.assertTrue(parse("1 != 2", {}))
        self.assertTrue(parse('"text" != "other text"', {}))
        self.assertTrue(parse("foo == true", {'foo': True}))
        self.assertTrue(parse("foo == 1", {'foo': 1}))
        self.assertTrue(parse('foo == "bar"', {'foo': 'bar'}))
        self.assertTrue(parse("foo == bar", {'foo': True, 'bar': True}))
        self.assertTrue(parse("true == foo", {'foo': True}))
        self.assertTrue(parse("foo != true", {'foo': False}))
        self.assertTrue(parse("foo != 2", {'foo': 1}))
        self.assertTrue(parse('foo != "bar"', {'foo': 'abc'}))
        self.assertTrue(parse("foo != bar", {'foo': True, 'bar': False}))
        self.assertTrue(parse("true != foo", {'foo': False}))

    def test_Conjunctions(self):
        self.assertTrue(parse("true && true", {}))
        self.assertTrue(parse("true || false", {}))
        self.assertFalse(parse("false || false", {}))
        self.assertFalse(parse("true && false", {}))
        self.assertTrue(parse("true || false && false", {}))

    def test_Parens(self):
        self.assertTrue(parse("(true)", {}))
        self.assertEquals(10, parse("(10)", {}))
        self.assertEquals('foo', parse('("foo")', {}))
        self.assertEquals(1, parse("(foo)", {'foo':1}))
        self.assertTrue(parse("(true == true)", {}))
        self.assertTrue(parse("(true != false)", {}))
        self.assertTrue(parse("(true && true)", {}))
        self.assertTrue(parse("(true || false)", {}))
        self.assertTrue(parse("(true && true || false)", {}))
        self.assertFalse(parse("(true || false) && false", {}))
        self.assertTrue(parse("(true || false) && true", {}))
        self.assertTrue(parse("true && (true || false)", {}))
        self.assertTrue(parse("true && (true || false)", {}))
        
if __name__ == '__main__':
    unittest.main()