view expr.py @ 1:c45135ec8c13

slight cleanup
author Ted Mielczarek <ted.mielczarek@gmail.com>
date Wed, 01 Jun 2011 20:00:51 -0400
parents ae57e69e4b15
children 94a293b914af
line wrap: on
line source

#!/usr/bin/env python

import re, unittest

# ideas taken from http://effbot.org/zone/simple-top-down-parsing.htm
# token classes
class ident_token:
    def __init__(self, value):
        self.value = value
    def nud(self, parser):
        # identifiers take their value from the value mappings passed
        # to the parser
        return parser.value(self.value)

class int_token:
    def __init__(self, value):
        self.value = int(value)
    def nud(self, parser):
        return self.value

class bool_token:
    def __init__(self, value):
        self.value = {'true':True, 'false':False}[value]
    def nud(self, parser):
        return self.value

class eq_op_token:
    "=="
    lbp = 20
    def led(self, parser, left):
        return left == parser.expression(self.lbp)
    
class neq_op_token:
    "!="
    lbp = 20
    def led(self, parser, left):
        return left != parser.expression(self.lbp)

class and_op_token:
    "&&"
    lbp = 11
    def led(self, parser, left):
        right = parser.expression(self.lbp)
        return left and right
    
class or_op_token:
    "||"
    lbp = 10
    def led(self, parser, left):
        right = parser.expression(self.lbp)
        return left or right

class lparen_token:
    "("
    lbp = 50
    def nud(self, parser):
        expr = parser.expression()
        parser.advance(rparen_token)
        return expr

class rparen_token:
    ")"
    lbp = 0

class string_token:
    def __init__(self, value):
        self.value = value
    def nud(self, parser):
        return self.value

class end_token:
    # lowest left binding power, always ends parsing
    lbp = 0

class ExpressionParser(object):
    def __init__(self, text, valuemapping):
        """
        Initialize the parser with input |text|, and |valuemapping| as
        a dict mapping identifier names to values.
        """
        self.text = text
        self.valuemapping = valuemapping

    def _tokenize(self):
        """
        Lex the input text into tokens and yield them in sequence.
        """
        # scanner callbacks
        def bool_(scanner, t): return bool_token(t)
        def identifier(scanner, t): return ident_token(t)
        def integer(scanner, t): return int_token(t)
        def eq(scanner, t): return eq_op_token()
        def neq(scanner, t): return neq_op_token()
        def or_(scanner, t): return or_op_token()
        def and_(scanner, t): return and_op_token()
        def lparen(scanner, t): return lparen_token()
        def rparen(scanner, t): return rparen_token()
        def string_(scanner, t): return string_token(t[1:-1])

        scanner = re.Scanner([
            (r"true|false", bool_),
            (r"[a-zA-Z_]\w*", identifier),
            (r"[0-9]+", integer),
            (r'"[^"]*"', string_),
            (r"==", eq),
            (r"!=", neq),
            (r"\|\|", or_),
            (r"&&", and_),
            (r"\(", lparen),
            (r"\)", rparen),
            (r"\s+", None), # skip whitespace
            ])
        tokens, remainder = scanner.scan(self.text)
        for t in tokens:
            yield t
        yield end_token()

    def value(self, ident):
        """
        Look up the value of |ident| in the value mapping passed in the
        constructor.
        """
        return self.valuemapping[ident]

    def advance(self, expected):
        """
        Assert that the next token is an instance of |expected|, and advance
        to the next token.
        """
        if not isinstance(self.token, expected):
            raise Exception, "Unexpected token!"
        self.token = self.iter.next()
        
    def expression(self, rbp=0):
        """
        Parse and return the value of an expression until a token with
        right binding power greater than rbp is encountered.
        """
        t = self.token
        self.token = self.iter.next()
        left = t.nud(self)
        while rbp < self.token.lbp:
            t = self.token
            self.token = self.iter.next()
            left = t.led(self, left)
        return left

    def parse(self):
        """
        Parse and return the value of the expression in the text
        passed to the constructor.
        """
        self.iter = self._tokenize()
        self.token = self.iter.next()
        return self.expression()

class ExpressionParserUnittest(unittest.TestCase):
    def parse(self, text, values):
        return ExpressionParser(text, values).parse()
    
    def test_BasicValues(self):
        self.assertEqual(1, self.parse("1", {}))
        self.assertEqual(100, self.parse("100", {}))
        self.assertEqual(True, self.parse("true", {}))
        self.assertEqual(False, self.parse("false", {}))
        self.assertEqual("", self.parse('""', {}))
        self.assertEqual("foo bar", self.parse('"foo bar"', {}))
        self.assertEqual(1, self.parse("foo", {'foo':1}))
        self.assertEqual(True, self.parse("bar", {'bar':True}))
        self.assertEqual("xyz", self.parse("abc123", {'abc123':"xyz"}))

    def test_Equality(self):
        self.assertTrue(self.parse("true == true", {}))
        self.assertTrue(self.parse("false == false", {}))
        self.assertTrue(self.parse("false == false", {}))
        self.assertTrue(self.parse("1 == 1", {}))
        self.assertTrue(self.parse("100 == 100", {}))
        self.assertTrue(self.parse('"some text" == "some text"', {}))
        self.assertTrue(self.parse("true != false", {}))
        self.assertTrue(self.parse("1 != 2", {}))
        self.assertTrue(self.parse('"text" != "other text"', {}))
        self.assertTrue(self.parse("foo == true", {'foo': True}))
        self.assertTrue(self.parse("foo == 1", {'foo': 1}))
        self.assertTrue(self.parse('foo == "bar"', {'foo': 'bar'}))
        self.assertTrue(self.parse("foo == bar", {'foo': True, 'bar': True}))
        self.assertTrue(self.parse("true == foo", {'foo': True}))
        self.assertTrue(self.parse("foo != true", {'foo': False}))
        self.assertTrue(self.parse("foo != 2", {'foo': 1}))
        self.assertTrue(self.parse('foo != "bar"', {'foo': 'abc'}))
        self.assertTrue(self.parse("foo != bar", {'foo': True, 'bar': False}))
        self.assertTrue(self.parse("true != foo", {'foo': False}))

    def test_Conjunctions(self):
        self.assertTrue(self.parse("true && true", {}))
        self.assertTrue(self.parse("true || false", {}))
        self.assertFalse(self.parse("false || false", {}))
        self.assertFalse(self.parse("true && false", {}))
        
if __name__ == '__main__':
    unittest.main()