Mercurial > hg > expressionparser
view expr.py @ 11:e17a3464a0b9
get precedence from a list position vs magic constants
author | Jeff Hammel <jhammel@mozilla.com> |
---|---|
date | Fri, 03 Jun 2011 10:51:36 -0700 |
parents | 15fb1081784f |
children | 835efd8acb04 |
line wrap: on
line source
#!/usr/bin/env python # Implements a top-down parser/evaluator for simple boolean expressions. # ideas taken from http://effbot.org/zone/simple-top-down-parsing.htm # # Rough grammar: # expr := literal # | '(' expr ')' # | expr '&&' expr # | expr '||' expr # | expr '==' expr # | expr '!=' expr # literal := BOOL # | INT # | STRING # | IDENT # BOOL := true|false # INT := [0-9]+ # STRING := "[^"]*" # IDENT := [A-Za-z_]\w* # Identifiers take their values from a mapping dictionary passed as the second # argument. __all__ = ['parse', 'ParseError', 'ExpressionParser'] import re # token classes class token(object): pass class ident_token(object): def __init__(self, value): self.value = value def nud(self, parser): # identifiers take their value from the value mappings passed # to the parser return parser.value(self.value) class literal_token(object): def __init__(self, value): self.value = value def nud(self, parser): return self.value class eq_op_token(object): "==" def led(self, parser, left): return left == parser.expression(self.lbp) class neq_op_token(object): "!=" def led(self, parser, left): return left != parser.expression(self.lbp) class and_op_token(object): "&&" def led(self, parser, left): right = parser.expression(self.lbp) return left and right class or_op_token(object): "||" def led(self, parser, left): right = parser.expression(self.lbp) return left or right class lparen_token(object): "(" def nud(self, parser): expr = parser.expression() parser.advance(rparen_token) return expr class rparen_token(object): ")" class end_token(object): """always ends parsing""" ### derived literal tokens class bool_token(literal_token): def __init__(self, value): value = {'true':True, 'false':False}[value] literal_token.__init__(self, value) class int_token(literal_token): def __init__(self, value): literal_token.__init__(self, int(value)) class string_token(literal_token): def __init__(self, value): literal_token.__init__(self, value[1:-1]) precedence = [(end_token, rparen_token), (or_op_token,), (and_op_token,), (eq_op_token, neq_op_token), (lparen_token,), ] for index, rank in enumerate(precedence): for token in rank: token.lbp = index # lbp = lowest left binding power class ParseError(Exception): """errror parsing conditional expression""" class ExpressionParser(object): def __init__(self, text, valuemapping): """ Initialize the parser with input |text|, and |valuemapping| as a dict mapping identifier names to values. """ self.text = text self.valuemapping = valuemapping def _tokenize(self): """ Lex the input text into tokens and yield them in sequence. """ # scanner callbacks def bool_(scanner, t): return bool_token(t) def identifier(scanner, t): return ident_token(t) def integer(scanner, t): return int_token(t) def eq(scanner, t): return eq_op_token() def neq(scanner, t): return neq_op_token() def or_(scanner, t): return or_op_token() def and_(scanner, t): return and_op_token() def lparen(scanner, t): return lparen_token() def rparen(scanner, t): return rparen_token() def string_(scanner, t): return string_token(t) scanner = re.Scanner([ (r"true|false", bool_), (r"[a-zA-Z_]\w*", identifier), (r"[0-9]+", integer), (r'"[^"]*"', string_), (r"==", eq), (r"!=", neq), (r"\|\|", or_), (r"&&", and_), (r"\(", lparen), (r"\)", rparen), (r"\s+", None), # skip whitespace ]) tokens, remainder = scanner.scan(self.text) for t in tokens: yield t yield end_token() def value(self, ident): """ Look up the value of |ident| in the value mapping passed in the constructor. """ return self.valuemapping[ident] def advance(self, expected): """ Assert that the next token is an instance of |expected|, and advance to the next token. """ if not isinstance(self.token, expected): raise Exception, "Unexpected token!" self.token = self.iter.next() def expression(self, rbp=0): """ Parse and return the value of an expression until a token with right binding power greater than rbp is encountered. """ t = self.token self.token = self.iter.next() left = t.nud(self) while rbp < self.token.lbp: t = self.token self.token = self.iter.next() left = t.led(self, left) return left def parse(self): """ Parse and return the value of the expression in the text passed to the constructor. Raises a ParseError if the expression could not be parsed. """ try: self.iter = self._tokenize() self.token = self.iter.next() return self.expression() except: raise ParseError("could not parse: %s" % self.text) __call__ = parse def parse(text, **values): """ Parse and evaluate a boolean expression in |text|. Use |values| to look up the value of identifiers referenced in the expression. Returns the final value of the expression. A ParseError will be raised if parsing fails. """ return ExpressionParser(text, values).parse()