Mercurial > hg > expressionparser
view expr.py @ 3:5ac8eed85684
consolidate token classes
author | Ted Mielczarek <ted.mielczarek@gmail.com> |
---|---|
date | Thu, 02 Jun 2011 07:47:33 -0400 |
parents | 94a293b914af |
children | a42bb6dc2fa7 |
line wrap: on
line source
#!/usr/bin/env python # Implements a top-down parser/evaluator for simple boolean expressions. # ideas taken from http://effbot.org/zone/simple-top-down-parsing.htm # # Rough grammar: # expr := literal # | '(' expr ')' # | expr '&&' expr # | expr '||' expr # | expr '==' expr # | expr '!=' expr # literal := BOOL # | INT # | STRING # | IDENT # BOOL := true|false # INT := [0-9]+ # STRING := "[^"]*" # IDENT := [A-Za-z_]\w* # Identifiers take their values from a mapping dictionary passed as the second # argument. __all__ = ['parse', 'ParseError'] import re, unittest # token classes class ident_token: def __init__(self, value): self.value = value def nud(self, parser): # identifiers take their value from the value mappings passed # to the parser return parser.value(self.value) class literal_token: def __init__(self, value): self.value = value def nud(self, parser): return self.value class eq_op_token: "==" lbp = 20 def led(self, parser, left): return left == parser.expression(self.lbp) class neq_op_token: "!=" lbp = 20 def led(self, parser, left): return left != parser.expression(self.lbp) class and_op_token: "&&" lbp = 11 def led(self, parser, left): right = parser.expression(self.lbp) return left and right class or_op_token: "||" lbp = 10 def led(self, parser, left): right = parser.expression(self.lbp) return left or right class lparen_token: "(" lbp = 50 def nud(self, parser): expr = parser.expression() parser.advance(rparen_token) return expr class rparen_token: ")" lbp = 0 class end_token: # lowest left binding power, always ends parsing lbp = 0 class ParseError(Exception): pass class ExpressionParser(object): def __init__(self, text, valuemapping): """ Initialize the parser with input |text|, and |valuemapping| as a dict mapping identifier names to values. """ self.text = text self.valuemapping = valuemapping def _tokenize(self): """ Lex the input text into tokens and yield them in sequence. """ # scanner callbacks def bool_(scanner, t): return literal_token({'true':True, 'false':False}[t]) def identifier(scanner, t): return ident_token(t) def integer(scanner, t): return literal_token(int(t)) def eq(scanner, t): return eq_op_token() def neq(scanner, t): return neq_op_token() def or_(scanner, t): return or_op_token() def and_(scanner, t): return and_op_token() def lparen(scanner, t): return lparen_token() def rparen(scanner, t): return rparen_token() def string_(scanner, t): return literal_token(t[1:-1]) scanner = re.Scanner([ (r"true|false", bool_), (r"[a-zA-Z_]\w*", identifier), (r"[0-9]+", integer), (r'"[^"]*"', string_), (r"==", eq), (r"!=", neq), (r"\|\|", or_), (r"&&", and_), (r"\(", lparen), (r"\)", rparen), (r"\s+", None), # skip whitespace ]) tokens, remainder = scanner.scan(self.text) for t in tokens: yield t yield end_token() def value(self, ident): """ Look up the value of |ident| in the value mapping passed in the constructor. """ return self.valuemapping[ident] def advance(self, expected): """ Assert that the next token is an instance of |expected|, and advance to the next token. """ if not isinstance(self.token, expected): raise Exception, "Unexpected token!" self.token = self.iter.next() def expression(self, rbp=0): """ Parse and return the value of an expression until a token with right binding power greater than rbp is encountered. """ t = self.token self.token = self.iter.next() left = t.nud(self) while rbp < self.token.lbp: t = self.token self.token = self.iter.next() left = t.led(self, left) return left def parse(self): """ Parse and return the value of the expression in the text passed to the constructor. Raises a ParseError if the expression could not be parsed. """ try: self.iter = self._tokenize() self.token = self.iter.next() return self.expression() except: raise ParseError def parse(text, values): """ Parse and evaluate a boolean expression in |text|. Use |values| to look up the value of identifiers referenced in the expression. Returns the final value of the expression. A ParseError will be raised if parsing fails. """ return ExpressionParser(text, values).parse() class ExpressionParserUnittest(unittest.TestCase): def test_BasicValues(self): self.assertEqual(1, parse("1", {})) self.assertEqual(100, parse("100", {})) self.assertEqual(True, parse("true", {})) self.assertEqual(False, parse("false", {})) self.assertEqual("", parse('""', {})) self.assertEqual("foo bar", parse('"foo bar"', {})) self.assertEqual(1, parse("foo", {'foo':1})) self.assertEqual(True, parse("bar", {'bar':True})) self.assertEqual("xyz", parse("abc123", {'abc123':"xyz"})) def test_Equality(self): self.assertTrue(parse("true == true", {})) self.assertTrue(parse("false == false", {})) self.assertTrue(parse("false == false", {})) self.assertTrue(parse("1 == 1", {})) self.assertTrue(parse("100 == 100", {})) self.assertTrue(parse('"some text" == "some text"', {})) self.assertTrue(parse("true != false", {})) self.assertTrue(parse("1 != 2", {})) self.assertTrue(parse('"text" != "other text"', {})) self.assertTrue(parse("foo == true", {'foo': True})) self.assertTrue(parse("foo == 1", {'foo': 1})) self.assertTrue(parse('foo == "bar"', {'foo': 'bar'})) self.assertTrue(parse("foo == bar", {'foo': True, 'bar': True})) self.assertTrue(parse("true == foo", {'foo': True})) self.assertTrue(parse("foo != true", {'foo': False})) self.assertTrue(parse("foo != 2", {'foo': 1})) self.assertTrue(parse('foo != "bar"', {'foo': 'abc'})) self.assertTrue(parse("foo != bar", {'foo': True, 'bar': False})) self.assertTrue(parse("true != foo", {'foo': False})) def test_Conjunctions(self): self.assertTrue(parse("true && true", {})) self.assertTrue(parse("true || false", {})) self.assertFalse(parse("false || false", {})) self.assertFalse(parse("true && false", {})) self.assertTrue(parse("true || false && false", {})) def test_Parens(self): self.assertTrue(parse("(true)", {})) self.assertEquals(10, parse("(10)", {})) self.assertEquals('foo', parse('("foo")', {})) self.assertEquals(1, parse("(foo)", {'foo':1})) self.assertTrue(parse("(true == true)", {})) self.assertTrue(parse("(true != false)", {})) self.assertTrue(parse("(true && true)", {})) self.assertTrue(parse("(true || false)", {})) self.assertTrue(parse("(true && true || false)", {})) self.assertFalse(parse("(true || false) && false", {})) self.assertTrue(parse("(true || false) && true", {})) self.assertTrue(parse("true && (true || false)", {})) self.assertTrue(parse("true && (true || false)", {})) if __name__ == '__main__': unittest.main()