Mercurial > hg > expressionparser
view expr.py @ 0:ae57e69e4b15
simple expression parser
author | Ted Mielczarek <ted.mielczarek@gmail.com> |
---|---|
date | Wed, 01 Jun 2011 19:58:56 -0400 |
parents | |
children | c45135ec8c13 |
line wrap: on
line source
#!/usr/bin/env python import re, unittest # ideas taken from http://effbot.org/zone/simple-top-down-parsing.htm # token classes class ident_token: def __init__(self, value): self.value = value def nud(self, parser): # identifiers take their value from the value mappings passed # to the parser return parser.value(self.value) class int_token: def __init__(self, value): self.value = int(value) def nud(self, parser): return self.value class bool_token: def __init__(self, value): self.value = {'true':True, 'false':False}[value] def nud(self, parser): return self.value class eq_op_token: "==" lbp = 20 def led(self, parser, left): return left == parser.expression(self.lbp) class neq_op_token: "!=" lbp = 20 def led(self, parser, left): return left != parser.expression(self.lbp) class and_op_token: "&&" lbp = 11 def led(self, parser, left): right = parser.expression(self.lbp) return left and right class or_op_token: "||" lbp = 10 def led(self, parser, left): right = parser.expression(self.lbp) return left or right class lparen_token: "(" lbp = 50 def nud(self, parser): expr = parser.expression() parser.advance(rparen_token) return expr class rparen_token: ")" lbp = 0 class string_token: def __init__(self, value): self.value = value def nud(self, parser): return self.value class end_token: # lowest left binding power, always ends parsing lbp = 0 class ExpressionParser(object): def __init__(self, text, valuemapping): """ Initialize the parser with input |text|, and |valuemapping| as a dict mapping identifier names to values. """ self.text = text self.valuemapping = valuemapping def _tokenize(self): """ Lex the input text into tokens and yield them in sequence. """ # scanner callbacks def bool_(scanner, t): return bool_token(t) def identifier(scanner, t): return ident_token(t) def integer(scanner, t): return int_token(t) def eq(scanner, t): return eq_op_token() def neq(scanner, t): return neq_op_token() def or_(scanner, t): return or_op_token() def and_(scanner, t): return and_op_token() def lparen(scanner, t): return lparen_token() def rparen(scanner, t): return rparen_token() def string_(scanner, t): return string_token(t[1:-1]) scanner = re.Scanner([ (r"true|false", bool_), (r"[a-zA-Z_]\w*", identifier), (r"[0-9]+", integer), (r'"[^"]*"', string_), (r"==", eq), (r"!=", neq), (r"\|\|", or_), (r"&&", and_), (r"\(", lparen), (r"\)", rparen), (r"\s+", None), # skip whitespace ]) tokens, remainder = scanner.scan(self.text) for t in tokens: yield t yield end_token() def value(self, ident): """ Look up the value of |ident| in the value mapping passed in the constructor. """ return self.valuemapping[ident] def advance(self, expected): """ Assert that the next token is an instance of |expected|, and advance to the next token. """ if not isinstance(self.token, expected): raise Exception, "Unexpected token!" self.token = self.iter.next() def expression(self, rbp=0): """ Parse and return the value of an expression until a token with right binding power greater than rbp is encountered. """ t = self.token self.token = self.iter.next() left = t.nud(self) while rbp < self.token.lbp: t = self.token self.token = self.iter.next() left = t.led(self, left) return left def parse(self): """ Parse and return the value of the expression in the text passed to the constructor. """ self.iter = self._tokenize() self.token = self.iter.next() return self.expression() class ExpressionParserUnittest(unittest.TestCase): def parse(self, text, values): return ExpressionParser(text, values).parse() def test_BasicValues(self): self.assertEqual(1, self.parse("1", {})) self.assertEqual(100, self.parse("100", {})) self.assertEqual(True, self.parse("true", {})) self.assertEqual(False, self.parse("false", {})) self.assertEqual("", self.parse('""', {})) self.assertEqual("foo bar", self.parse('"foo bar"', {})) self.assertEqual(1, self.parse("foo", {'foo':1})) self.assertEqual(True, self.parse("bar", {'bar':True})) self.assertEqual("xyz", self.parse("abc123", {'abc123':"xyz"})) def test_Equality(self): self.assertTrue(self.parse("true == true", {})) self.assertTrue(self.parse("false == false", {})) self.assertTrue(self.parse("false == false", {})) self.assertTrue(self.parse("1 == 1", {})) self.assertTrue(self.parse("100 == 100", {})) self.assertTrue(self.parse('"some text" == "some text"', {})) self.assertTrue(self.parse("true != false", {})) self.assertTrue(self.parse("1 != 2", {})) self.assertTrue(self.parse('"text" != "other text"', {})) self.assertTrue(self.parse("foo == true", {'foo': True})) self.assertTrue(self.parse("foo == 1", {'foo': 1})) self.assertTrue(self.parse('foo == "bar"', {'foo': 'bar'})) self.assertTrue(self.parse("foo == bar", {'foo': True, 'bar': True})) self.assertTrue(self.parse("true == foo", {'foo': True})) self.assertTrue(self.parse("foo != true", {'foo': False})) self.assertTrue(self.parse("foo != 2", {'foo': 1})) self.assertTrue(self.parse('foo != "bar"', {'foo': 'abc'})) self.assertTrue(self.parse("foo != bar", {'foo': True, 'bar': False})) self.assertTrue(self.parse("true != foo", {'foo': False})) def test_Conjunctions(self): self.assertTrue(self.parse("true && true", {})) self.assertTrue(self.parse("true || false", {})) self.assertFalse(self.parse("false || false", {})) self.assertFalse(self.parse("true && false", {})) if __name__ == '__main__': unittest.main() #parser = ExpressionParser(sys.argv[1], dict((a,int(b)) for a,b in (x.split('=') for x in sys.argv[2:]))) #print "%s: %s" % (sys.argv[1],parser.parse())