view expr.py @ 7:325dccc38308

begin using subclasses for tokens; the eventual goal is that a token class will know everything it can about what it is and the parser just knows about what tokens are
author Jeff Hammel <jhammel@mozilla.com>
date Fri, 03 Jun 2011 08:31:29 -0700
parents a42bb6dc2fa7
children 9b2bf000aeed
line wrap: on
line source

#!/usr/bin/env python

# Implements a top-down parser/evaluator for simple boolean expressions.
# ideas taken from http://effbot.org/zone/simple-top-down-parsing.htm
#
# Rough grammar:
# expr := literal
#       | '(' expr ')'
#       | expr '&&' expr
#       | expr '||' expr
#       | expr '==' expr
#       | expr '!=' expr
# literal := BOOL
#          | INT
#          | STRING
#          | IDENT
# BOOL   := true|false
# INT    := [0-9]+
# STRING := "[^"]*"
# IDENT  := [A-Za-z_]\w*

# Identifiers take their values from a mapping dictionary passed as the second
# argument.

__all__ = ['parse', 'ParseError']
import re

# token classes
class ident_token(object):
    def __init__(self, value):
        self.value = value
    def nud(self, parser):
        # identifiers take their value from the value mappings passed
        # to the parser
        return parser.value(self.value)

class literal_token(object):
    def __init__(self, value):
        self.value = value
    def nud(self, parser):
        return self.value

class eq_op_token(object):
    "=="
    lbp = 20
    def led(self, parser, left):
        return left == parser.expression(self.lbp)
    
class neq_op_token(object):
    "!="
    lbp = 20
    def led(self, parser, left):
        return left != parser.expression(self.lbp)

class and_op_token(object):
    "&&"
    lbp = 11
    def led(self, parser, left):
        right = parser.expression(self.lbp)
        return left and right
    
class or_op_token(object):
    "||"
    lbp = 10
    def led(self, parser, left):
        right = parser.expression(self.lbp)
        return left or right

class lparen_token(object):
    "("
    lbp = 50
    def nud(self, parser):
        expr = parser.expression()
        parser.advance(rparen_token)
        return expr

class rparen_token(object):
    ")"
    lbp = 0

class end_token(object):
    # lowest left binding power, always ends parsing
    lbp = 0

class bool_token(literal_token):
    def __init__(self, value):
        value = {'true':True, 'false':False}[value]
        literal_token.__init__(self, value)

precedence = [(end_token, rparen_token),
              (or_op_token,),
              (and_op_token,),
              (eq_op_token, neq_op_token),
              (lparen_token,),
              ]

class ParseError(Exception):
    """errror parsing conditional expression"""

class ExpressionParser(object):
    def __init__(self, text, valuemapping):
        """
        Initialize the parser with input |text|, and |valuemapping| as
        a dict mapping identifier names to values.
        """
        self.text = text
        self.valuemapping = valuemapping

    def _tokenize(self):
        """
        Lex the input text into tokens and yield them in sequence.
        """
        # scanner callbacks
        def bool_(scanner, t): return bool_token(t)
        def identifier(scanner, t): return ident_token(t)
        def integer(scanner, t): return literal_token(int(t))
        def eq(scanner, t): return eq_op_token()
        def neq(scanner, t): return neq_op_token()
        def or_(scanner, t): return or_op_token()
        def and_(scanner, t): return and_op_token()
        def lparen(scanner, t): return lparen_token()
        def rparen(scanner, t): return rparen_token()
        def string_(scanner, t): return literal_token(t[1:-1])

        scanner = re.Scanner([
            (r"true|false", bool_),
            (r"[a-zA-Z_]\w*", identifier),
            (r"[0-9]+", integer),
            (r'"[^"]*"', string_),
            (r"==", eq),
            (r"!=", neq),
            (r"\|\|", or_),
            (r"&&", and_),
            (r"\(", lparen),
            (r"\)", rparen),
            (r"\s+", None), # skip whitespace
            ])
        tokens, remainder = scanner.scan(self.text)
        for t in tokens:
            yield t
        yield end_token()

    def value(self, ident):
        """
        Look up the value of |ident| in the value mapping passed in the
        constructor.
        """
        return self.valuemapping[ident]

    def advance(self, expected):
        """
        Assert that the next token is an instance of |expected|, and advance
        to the next token.
        """
        if not isinstance(self.token, expected):
            raise Exception, "Unexpected token!"
        self.token = self.iter.next()
        
    def expression(self, rbp=0):
        """
        Parse and return the value of an expression until a token with
        right binding power greater than rbp is encountered.
        """
        t = self.token
        self.token = self.iter.next()
        left = t.nud(self)
        while rbp < self.token.lbp:
            t = self.token
            self.token = self.iter.next()
            left = t.led(self, left)
        return left

    def parse(self):
        """
        Parse and return the value of the expression in the text
        passed to the constructor. Raises a ParseError if the expression
        could not be parsed.
        """
        try:
            self.iter = self._tokenize()
            self.token = self.iter.next()
            return self.expression()
        except:
            raise ParseError("could not parse: %s" % self.text)

    __call__ = parse

def parse(text, **values):
    """
    Parse and evaluate a boolean expression in |text|. Use |values| to look
    up the value of identifiers referenced in the expression. Returns the final
    value of the expression. A ParseError will be raised if parsing fails.
    """
    return ExpressionParser(text, values).parse()