Mercurial > hg > expressionparser
comparison expr.py @ 0:ae57e69e4b15
simple expression parser
author | Ted Mielczarek <ted.mielczarek@gmail.com> |
---|---|
date | Wed, 01 Jun 2011 19:58:56 -0400 |
parents | |
children | c45135ec8c13 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ae57e69e4b15 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import re, unittest | |
4 | |
5 # ideas taken from http://effbot.org/zone/simple-top-down-parsing.htm | |
6 # token classes | |
7 class ident_token: | |
8 def __init__(self, value): | |
9 self.value = value | |
10 def nud(self, parser): | |
11 # identifiers take their value from the value mappings passed | |
12 # to the parser | |
13 return parser.value(self.value) | |
14 | |
15 class int_token: | |
16 def __init__(self, value): | |
17 self.value = int(value) | |
18 def nud(self, parser): | |
19 return self.value | |
20 | |
21 class bool_token: | |
22 def __init__(self, value): | |
23 self.value = {'true':True, 'false':False}[value] | |
24 def nud(self, parser): | |
25 return self.value | |
26 | |
27 class eq_op_token: | |
28 "==" | |
29 lbp = 20 | |
30 def led(self, parser, left): | |
31 return left == parser.expression(self.lbp) | |
32 | |
33 class neq_op_token: | |
34 "!=" | |
35 lbp = 20 | |
36 def led(self, parser, left): | |
37 return left != parser.expression(self.lbp) | |
38 | |
39 class and_op_token: | |
40 "&&" | |
41 lbp = 11 | |
42 def led(self, parser, left): | |
43 right = parser.expression(self.lbp) | |
44 return left and right | |
45 | |
46 class or_op_token: | |
47 "||" | |
48 lbp = 10 | |
49 def led(self, parser, left): | |
50 right = parser.expression(self.lbp) | |
51 return left or right | |
52 | |
53 class lparen_token: | |
54 "(" | |
55 lbp = 50 | |
56 def nud(self, parser): | |
57 expr = parser.expression() | |
58 parser.advance(rparen_token) | |
59 return expr | |
60 | |
61 class rparen_token: | |
62 ")" | |
63 lbp = 0 | |
64 | |
65 class string_token: | |
66 def __init__(self, value): | |
67 self.value = value | |
68 def nud(self, parser): | |
69 return self.value | |
70 | |
71 class end_token: | |
72 # lowest left binding power, always ends parsing | |
73 lbp = 0 | |
74 | |
75 class ExpressionParser(object): | |
76 def __init__(self, text, valuemapping): | |
77 """ | |
78 Initialize the parser with input |text|, and |valuemapping| as | |
79 a dict mapping identifier names to values. | |
80 """ | |
81 self.text = text | |
82 self.valuemapping = valuemapping | |
83 | |
84 def _tokenize(self): | |
85 """ | |
86 Lex the input text into tokens and yield them in sequence. | |
87 """ | |
88 # scanner callbacks | |
89 def bool_(scanner, t): return bool_token(t) | |
90 def identifier(scanner, t): return ident_token(t) | |
91 def integer(scanner, t): return int_token(t) | |
92 def eq(scanner, t): return eq_op_token() | |
93 def neq(scanner, t): return neq_op_token() | |
94 def or_(scanner, t): return or_op_token() | |
95 def and_(scanner, t): return and_op_token() | |
96 def lparen(scanner, t): return lparen_token() | |
97 def rparen(scanner, t): return rparen_token() | |
98 def string_(scanner, t): return string_token(t[1:-1]) | |
99 | |
100 scanner = re.Scanner([ | |
101 (r"true|false", bool_), | |
102 (r"[a-zA-Z_]\w*", identifier), | |
103 (r"[0-9]+", integer), | |
104 (r'"[^"]*"', string_), | |
105 (r"==", eq), | |
106 (r"!=", neq), | |
107 (r"\|\|", or_), | |
108 (r"&&", and_), | |
109 (r"\(", lparen), | |
110 (r"\)", rparen), | |
111 (r"\s+", None), # skip whitespace | |
112 ]) | |
113 tokens, remainder = scanner.scan(self.text) | |
114 for t in tokens: | |
115 yield t | |
116 yield end_token() | |
117 | |
118 def value(self, ident): | |
119 """ | |
120 Look up the value of |ident| in the value mapping passed in the | |
121 constructor. | |
122 """ | |
123 return self.valuemapping[ident] | |
124 | |
125 def advance(self, expected): | |
126 """ | |
127 Assert that the next token is an instance of |expected|, and advance | |
128 to the next token. | |
129 """ | |
130 if not isinstance(self.token, expected): | |
131 raise Exception, "Unexpected token!" | |
132 self.token = self.iter.next() | |
133 | |
134 def expression(self, rbp=0): | |
135 """ | |
136 Parse and return the value of an expression until a token with | |
137 right binding power greater than rbp is encountered. | |
138 """ | |
139 t = self.token | |
140 self.token = self.iter.next() | |
141 left = t.nud(self) | |
142 while rbp < self.token.lbp: | |
143 t = self.token | |
144 self.token = self.iter.next() | |
145 left = t.led(self, left) | |
146 return left | |
147 | |
148 def parse(self): | |
149 """ | |
150 Parse and return the value of the expression in the text | |
151 passed to the constructor. | |
152 """ | |
153 self.iter = self._tokenize() | |
154 self.token = self.iter.next() | |
155 return self.expression() | |
156 | |
157 class ExpressionParserUnittest(unittest.TestCase): | |
158 def parse(self, text, values): | |
159 return ExpressionParser(text, values).parse() | |
160 | |
161 def test_BasicValues(self): | |
162 self.assertEqual(1, self.parse("1", {})) | |
163 self.assertEqual(100, self.parse("100", {})) | |
164 self.assertEqual(True, self.parse("true", {})) | |
165 self.assertEqual(False, self.parse("false", {})) | |
166 self.assertEqual("", self.parse('""', {})) | |
167 self.assertEqual("foo bar", self.parse('"foo bar"', {})) | |
168 self.assertEqual(1, self.parse("foo", {'foo':1})) | |
169 self.assertEqual(True, self.parse("bar", {'bar':True})) | |
170 self.assertEqual("xyz", self.parse("abc123", {'abc123':"xyz"})) | |
171 | |
172 def test_Equality(self): | |
173 self.assertTrue(self.parse("true == true", {})) | |
174 self.assertTrue(self.parse("false == false", {})) | |
175 self.assertTrue(self.parse("false == false", {})) | |
176 self.assertTrue(self.parse("1 == 1", {})) | |
177 self.assertTrue(self.parse("100 == 100", {})) | |
178 self.assertTrue(self.parse('"some text" == "some text"', {})) | |
179 self.assertTrue(self.parse("true != false", {})) | |
180 self.assertTrue(self.parse("1 != 2", {})) | |
181 self.assertTrue(self.parse('"text" != "other text"', {})) | |
182 self.assertTrue(self.parse("foo == true", {'foo': True})) | |
183 self.assertTrue(self.parse("foo == 1", {'foo': 1})) | |
184 self.assertTrue(self.parse('foo == "bar"', {'foo': 'bar'})) | |
185 self.assertTrue(self.parse("foo == bar", {'foo': True, 'bar': True})) | |
186 self.assertTrue(self.parse("true == foo", {'foo': True})) | |
187 self.assertTrue(self.parse("foo != true", {'foo': False})) | |
188 self.assertTrue(self.parse("foo != 2", {'foo': 1})) | |
189 self.assertTrue(self.parse('foo != "bar"', {'foo': 'abc'})) | |
190 self.assertTrue(self.parse("foo != bar", {'foo': True, 'bar': False})) | |
191 self.assertTrue(self.parse("true != foo", {'foo': False})) | |
192 | |
193 def test_Conjunctions(self): | |
194 self.assertTrue(self.parse("true && true", {})) | |
195 self.assertTrue(self.parse("true || false", {})) | |
196 self.assertFalse(self.parse("false || false", {})) | |
197 self.assertFalse(self.parse("true && false", {})) | |
198 | |
199 if __name__ == '__main__': | |
200 | |
201 | |
202 | |
203 | |
204 unittest.main() | |
205 | |
206 #parser = ExpressionParser(sys.argv[1], dict((a,int(b)) for a,b in (x.split('=') for x in sys.argv[2:]))) | |
207 #print "%s: %s" % (sys.argv[1],parser.parse()) |