changeset 0:ae57e69e4b15

simple expression parser
author Ted Mielczarek <ted.mielczarek@gmail.com>
date Wed, 01 Jun 2011 19:58:56 -0400
parents
children c45135ec8c13
files expr.py
diffstat 1 files changed, 207 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/expr.py	Wed Jun 01 19:58:56 2011 -0400
@@ -0,0 +1,207 @@
+#!/usr/bin/env python
+
+import re, unittest
+
+# ideas taken from http://effbot.org/zone/simple-top-down-parsing.htm
+# token classes
+class ident_token:
+    def __init__(self, value):
+        self.value = value
+    def nud(self, parser):
+        # identifiers take their value from the value mappings passed
+        # to the parser
+        return parser.value(self.value)
+
+class int_token:
+    def __init__(self, value):
+        self.value = int(value)
+    def nud(self, parser):
+        return self.value
+
+class bool_token:
+    def __init__(self, value):
+        self.value = {'true':True, 'false':False}[value]
+    def nud(self, parser):
+        return self.value
+
+class eq_op_token:
+    "=="
+    lbp = 20
+    def led(self, parser, left):
+        return left == parser.expression(self.lbp)
+    
+class neq_op_token:
+    "!="
+    lbp = 20
+    def led(self, parser, left):
+        return left != parser.expression(self.lbp)
+
+class and_op_token:
+    "&&"
+    lbp = 11
+    def led(self, parser, left):
+        right = parser.expression(self.lbp)
+        return left and right
+    
+class or_op_token:
+    "||"
+    lbp = 10
+    def led(self, parser, left):
+        right = parser.expression(self.lbp)
+        return left or right
+
+class lparen_token:
+    "("
+    lbp = 50
+    def nud(self, parser):
+        expr = parser.expression()
+        parser.advance(rparen_token)
+        return expr
+
+class rparen_token:
+    ")"
+    lbp = 0
+
+class string_token:
+    def __init__(self, value):
+        self.value = value
+    def nud(self, parser):
+        return self.value
+
+class end_token:
+    # lowest left binding power, always ends parsing
+    lbp = 0
+
+class ExpressionParser(object):
+    def __init__(self, text, valuemapping):
+        """
+        Initialize the parser with input |text|, and |valuemapping| as
+        a dict mapping identifier names to values.
+        """
+        self.text = text
+        self.valuemapping = valuemapping
+
+    def _tokenize(self):
+        """
+        Lex the input text into tokens and yield them in sequence.
+        """
+        # scanner callbacks
+        def bool_(scanner, t): return bool_token(t)
+        def identifier(scanner, t): return ident_token(t)
+        def integer(scanner, t): return int_token(t)
+        def eq(scanner, t): return eq_op_token()
+        def neq(scanner, t): return neq_op_token()
+        def or_(scanner, t): return or_op_token()
+        def and_(scanner, t): return and_op_token()
+        def lparen(scanner, t): return lparen_token()
+        def rparen(scanner, t): return rparen_token()
+        def string_(scanner, t): return string_token(t[1:-1])
+
+        scanner = re.Scanner([
+            (r"true|false", bool_),
+            (r"[a-zA-Z_]\w*", identifier),
+            (r"[0-9]+", integer),
+            (r'"[^"]*"', string_),
+            (r"==", eq),
+            (r"!=", neq),
+            (r"\|\|", or_),
+            (r"&&", and_),
+            (r"\(", lparen),
+            (r"\)", rparen),
+            (r"\s+", None), # skip whitespace
+            ])
+        tokens, remainder = scanner.scan(self.text)
+        for t in tokens:
+            yield t
+        yield end_token()
+
+    def value(self, ident):
+        """
+        Look up the value of |ident| in the value mapping passed in the
+        constructor.
+        """
+        return self.valuemapping[ident]
+
+    def advance(self, expected):
+        """
+        Assert that the next token is an instance of |expected|, and advance
+        to the next token.
+        """
+        if not isinstance(self.token, expected):
+            raise Exception, "Unexpected token!"
+        self.token = self.iter.next()
+        
+    def expression(self, rbp=0):
+        """
+        Parse and return the value of an expression until a token with
+        right binding power greater than rbp is encountered.
+        """
+        t = self.token
+        self.token = self.iter.next()
+        left = t.nud(self)
+        while rbp < self.token.lbp:
+            t = self.token
+            self.token = self.iter.next()
+            left = t.led(self, left)
+        return left
+
+    def parse(self):
+        """
+        Parse and return the value of the expression in the text
+        passed to the constructor.
+        """
+        self.iter = self._tokenize()
+        self.token = self.iter.next()
+        return self.expression()
+
+class ExpressionParserUnittest(unittest.TestCase):
+    def parse(self, text, values):
+        return ExpressionParser(text, values).parse()
+    
+    def test_BasicValues(self):
+        self.assertEqual(1, self.parse("1", {}))
+        self.assertEqual(100, self.parse("100", {}))
+        self.assertEqual(True, self.parse("true", {}))
+        self.assertEqual(False, self.parse("false", {}))
+        self.assertEqual("", self.parse('""', {}))
+        self.assertEqual("foo bar", self.parse('"foo bar"', {}))
+        self.assertEqual(1, self.parse("foo", {'foo':1}))
+        self.assertEqual(True, self.parse("bar", {'bar':True}))
+        self.assertEqual("xyz", self.parse("abc123", {'abc123':"xyz"}))
+
+    def test_Equality(self):
+        self.assertTrue(self.parse("true == true", {}))
+        self.assertTrue(self.parse("false == false", {}))
+        self.assertTrue(self.parse("false == false", {}))
+        self.assertTrue(self.parse("1 == 1", {}))
+        self.assertTrue(self.parse("100 == 100", {}))
+        self.assertTrue(self.parse('"some text" == "some text"', {}))
+        self.assertTrue(self.parse("true != false", {}))
+        self.assertTrue(self.parse("1 != 2", {}))
+        self.assertTrue(self.parse('"text" != "other text"', {}))
+        self.assertTrue(self.parse("foo == true", {'foo': True}))
+        self.assertTrue(self.parse("foo == 1", {'foo': 1}))
+        self.assertTrue(self.parse('foo == "bar"', {'foo': 'bar'}))
+        self.assertTrue(self.parse("foo == bar", {'foo': True, 'bar': True}))
+        self.assertTrue(self.parse("true == foo", {'foo': True}))
+        self.assertTrue(self.parse("foo != true", {'foo': False}))
+        self.assertTrue(self.parse("foo != 2", {'foo': 1}))
+        self.assertTrue(self.parse('foo != "bar"', {'foo': 'abc'}))
+        self.assertTrue(self.parse("foo != bar", {'foo': True, 'bar': False}))
+        self.assertTrue(self.parse("true != foo", {'foo': False}))
+
+    def test_Conjunctions(self):
+        self.assertTrue(self.parse("true && true", {}))
+        self.assertTrue(self.parse("true || false", {}))
+        self.assertFalse(self.parse("false || false", {}))
+        self.assertFalse(self.parse("true && false", {}))
+        
+if __name__ == '__main__':
+
+
+
+
+    unittest.main()
+
+#parser = ExpressionParser(sys.argv[1], dict((a,int(b)) for a,b in (x.split('=') for x in sys.argv[2:])))
+#print "%s: %s" % (sys.argv[1],parser.parse())