# HG changeset patch # User Ted Mielczarek # Date 1306972736 14400 # Node ID ae57e69e4b1568335055f6f0395fbb4596300fe1 simple expression parser diff -r 000000000000 -r ae57e69e4b15 expr.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/expr.py Wed Jun 01 19:58:56 2011 -0400 @@ -0,0 +1,207 @@ +#!/usr/bin/env python + +import re, unittest + +# ideas taken from http://effbot.org/zone/simple-top-down-parsing.htm +# token classes +class ident_token: + def __init__(self, value): + self.value = value + def nud(self, parser): + # identifiers take their value from the value mappings passed + # to the parser + return parser.value(self.value) + +class int_token: + def __init__(self, value): + self.value = int(value) + def nud(self, parser): + return self.value + +class bool_token: + def __init__(self, value): + self.value = {'true':True, 'false':False}[value] + def nud(self, parser): + return self.value + +class eq_op_token: + "==" + lbp = 20 + def led(self, parser, left): + return left == parser.expression(self.lbp) + +class neq_op_token: + "!=" + lbp = 20 + def led(self, parser, left): + return left != parser.expression(self.lbp) + +class and_op_token: + "&&" + lbp = 11 + def led(self, parser, left): + right = parser.expression(self.lbp) + return left and right + +class or_op_token: + "||" + lbp = 10 + def led(self, parser, left): + right = parser.expression(self.lbp) + return left or right + +class lparen_token: + "(" + lbp = 50 + def nud(self, parser): + expr = parser.expression() + parser.advance(rparen_token) + return expr + +class rparen_token: + ")" + lbp = 0 + +class string_token: + def __init__(self, value): + self.value = value + def nud(self, parser): + return self.value + +class end_token: + # lowest left binding power, always ends parsing + lbp = 0 + +class ExpressionParser(object): + def __init__(self, text, valuemapping): + """ + Initialize the parser with input |text|, and |valuemapping| as + a dict mapping identifier names to values. + """ + self.text = text + self.valuemapping = valuemapping + + def _tokenize(self): + """ + Lex the input text into tokens and yield them in sequence. + """ + # scanner callbacks + def bool_(scanner, t): return bool_token(t) + def identifier(scanner, t): return ident_token(t) + def integer(scanner, t): return int_token(t) + def eq(scanner, t): return eq_op_token() + def neq(scanner, t): return neq_op_token() + def or_(scanner, t): return or_op_token() + def and_(scanner, t): return and_op_token() + def lparen(scanner, t): return lparen_token() + def rparen(scanner, t): return rparen_token() + def string_(scanner, t): return string_token(t[1:-1]) + + scanner = re.Scanner([ + (r"true|false", bool_), + (r"[a-zA-Z_]\w*", identifier), + (r"[0-9]+", integer), + (r'"[^"]*"', string_), + (r"==", eq), + (r"!=", neq), + (r"\|\|", or_), + (r"&&", and_), + (r"\(", lparen), + (r"\)", rparen), + (r"\s+", None), # skip whitespace + ]) + tokens, remainder = scanner.scan(self.text) + for t in tokens: + yield t + yield end_token() + + def value(self, ident): + """ + Look up the value of |ident| in the value mapping passed in the + constructor. + """ + return self.valuemapping[ident] + + def advance(self, expected): + """ + Assert that the next token is an instance of |expected|, and advance + to the next token. + """ + if not isinstance(self.token, expected): + raise Exception, "Unexpected token!" + self.token = self.iter.next() + + def expression(self, rbp=0): + """ + Parse and return the value of an expression until a token with + right binding power greater than rbp is encountered. + """ + t = self.token + self.token = self.iter.next() + left = t.nud(self) + while rbp < self.token.lbp: + t = self.token + self.token = self.iter.next() + left = t.led(self, left) + return left + + def parse(self): + """ + Parse and return the value of the expression in the text + passed to the constructor. + """ + self.iter = self._tokenize() + self.token = self.iter.next() + return self.expression() + +class ExpressionParserUnittest(unittest.TestCase): + def parse(self, text, values): + return ExpressionParser(text, values).parse() + + def test_BasicValues(self): + self.assertEqual(1, self.parse("1", {})) + self.assertEqual(100, self.parse("100", {})) + self.assertEqual(True, self.parse("true", {})) + self.assertEqual(False, self.parse("false", {})) + self.assertEqual("", self.parse('""', {})) + self.assertEqual("foo bar", self.parse('"foo bar"', {})) + self.assertEqual(1, self.parse("foo", {'foo':1})) + self.assertEqual(True, self.parse("bar", {'bar':True})) + self.assertEqual("xyz", self.parse("abc123", {'abc123':"xyz"})) + + def test_Equality(self): + self.assertTrue(self.parse("true == true", {})) + self.assertTrue(self.parse("false == false", {})) + self.assertTrue(self.parse("false == false", {})) + self.assertTrue(self.parse("1 == 1", {})) + self.assertTrue(self.parse("100 == 100", {})) + self.assertTrue(self.parse('"some text" == "some text"', {})) + self.assertTrue(self.parse("true != false", {})) + self.assertTrue(self.parse("1 != 2", {})) + self.assertTrue(self.parse('"text" != "other text"', {})) + self.assertTrue(self.parse("foo == true", {'foo': True})) + self.assertTrue(self.parse("foo == 1", {'foo': 1})) + self.assertTrue(self.parse('foo == "bar"', {'foo': 'bar'})) + self.assertTrue(self.parse("foo == bar", {'foo': True, 'bar': True})) + self.assertTrue(self.parse("true == foo", {'foo': True})) + self.assertTrue(self.parse("foo != true", {'foo': False})) + self.assertTrue(self.parse("foo != 2", {'foo': 1})) + self.assertTrue(self.parse('foo != "bar"', {'foo': 'abc'})) + self.assertTrue(self.parse("foo != bar", {'foo': True, 'bar': False})) + self.assertTrue(self.parse("true != foo", {'foo': False})) + + def test_Conjunctions(self): + self.assertTrue(self.parse("true && true", {})) + self.assertTrue(self.parse("true || false", {})) + self.assertFalse(self.parse("false || false", {})) + self.assertFalse(self.parse("true && false", {})) + +if __name__ == '__main__': + + + + + unittest.main() + +#parser = ExpressionParser(sys.argv[1], dict((a,int(b)) for a,b in (x.split('=') for x in sys.argv[2:]))) +#print "%s: %s" % (sys.argv[1],parser.parse())