使用 Python Yacc\Lex 作为公式解析器
目前,我正在使用 Yacc/Lex 的 python 实现来构建一个公式解析器,用于将公式字符串转换为一组类定义的操作数。到目前为止,我基本上是成功的,但由于括号的含糊性和一些移位/归约错误,我在定义解析规则时陷入了困境。
我一直在研究的公式的巴科斯诺尔形式
phi ::= p ; !p ; phi_0 & phi_1 ; phi_0 | phi_1 ; AX phi ; AF phi ; AG phi ; AU phi_0 U phi_1.
也是我一直在尝试允许任意匹配的括号,但这也是很多混乱的来源,我正在思考移位减少错误的来源。对于我将其应用到的任务来说,括号是相当必要的,它可以强制对公式进行特定的评估,所以我必须解决这个问题。
目前我的解析器是在一个类中定义的,该类构建了词法分析器,
tokens = (
'NEGATION',
'FUTURE',
'GLOBAL',
'NEXT',
'CONJUNCTION',
'DISJUNCTION',
'EQUIVALENCE',
'IMPLICATION',
'PROPOSITION',
'LPAREN',
'RPAREN',
'TRUE',
'FALSE',
)
# regex in order of parsing precedence
t_NEGATION = r'[\s]*\![\s]*'
t_FUTURE = r'[\s]*AF[\s]*'
t_GLOBAL = r'[\s]*AG[\s]*'
t_NEXT = r'[\s]*AX[\s]*'
t_CONJUNCTION = r'[\s]*\&[\s]*'
t_DISJUNCTION = r'[\s]*\|[\s]*'
t_EQUIVALENCE = r'[\s]*\<\-\>[\s]*'
t_IMPLICATION = r'[\s]*[^<]\-\>[\s]*'
t_LPAREN = r'[\s]*\([\s]*'
t_RPAREN = r'[\s]*\)[\s]*'
t_PROPOSITION = r'[\s]*[a-z]+[-\w\._]*[\s]*'
t_TRUE = r'[\s]*TRUE[\s]*'
t_FALSE = r'[\s]*FALSE[\s]*'
precedence = (
('left', 'ASSIGNMENT'),
('left', 'NEGATION'),
('left', 'GLOBAL','NEXT','FUTURE'),
('left', 'CONJUNCTION'),
('left', 'DISJUNCTION'),
('left', 'EQUIVALENCE'),
('left', 'IMPLICATION'),
('left', 'AUB', 'AUM'),
('left', 'LPAREN', 'RPAREN', 'TRUE', 'FALSE'),
)
lexer = lex.lex()
lexer.input(formula)
并且
def p_double_neg_paren(p):
'''formula : NEGATION LPAREN NEGATION LPAREN PROPOSITION RPAREN RPAREN
'''
stack.append(p[5].strip())
def p_double_neg(p):
'''formula : NEGATION NEGATION PROPOSITION
'''
stack.append(p[3].strip())
def p_double_neg_inner_paren(p):
'''formula : NEGATION NEGATION LPAREN PROPOSITION RPAREN
'''
stack.append(p[4].strip())
def p_double_neg_mid_paren(p):
'''formula : NEGATION LPAREN NEGATION PROPOSITION RPAREN
'''
stack.append(p[4].strip())
def p_groupAssignment(p):
'''formula : PROPOSITION ASSIGNMENT ASSIGNVAL
'''
stack.append(p[1].strip() + p[2].strip() + p[3].strip())
def p_neg_paren_take_outer_token(p):
'''formula : NEGATION LPAREN PROPOSITION RPAREN
| NEGATION LPAREN TRUE RPAREN
| NEGATION LPAREN FALSE RPAREN
'''
stack.append(Neg(p[3]))
def p_neg_take_outer_token(p):
'''formula : NEGATION PROPOSITION
| NEGATION TRUE
| NEGATION FALSE
'''
stack.append(Neg(p[2].strip()))
def p_neg_take_outer_token_paren(p):
'''formula : LPAREN NEGATION PROPOSITION RPAREN
| LPAREN NEGATION TRUE RPAREN
| LPAREN NEGATION FALSE RPAREN
'''
stack.append(Neg(p[3].strip()))
def p_unary_paren_nest_take_outer_token(p):
'''formula : GLOBAL LPAREN LPAREN NEGATION formula RPAREN RPAREN
| NEXT LPAREN LPAREN NEGATION formula RPAREN RPAREN
| FUTURE LPAREN LPAREN NEGATION formula RPAREN RPAREN
'''
if len(stack) >= 1:
if p[1].strip() == 'AG':
stack.append(['AG', ['!', stack.pop()]])
elif p[1].strip() == 'AF':
stack.append(['AF', ['!', stack.pop()]])
elif p[1].strip() == 'AX':
stack.append(['AX', ['!', stack.pop()]])
def p_unary_paren_take_outer_token(p):
'''formula : GLOBAL LPAREN formula RPAREN
| NEXT LPAREN formula RPAREN
| FUTURE LPAREN formula RPAREN
'''
if len(stack) >= 1:
if p[1].strip() == "AG":
stack.append(AG(stack.pop()))
elif p[1].strip() == "AF":
stack.append(AF(stack.pop()))
elif p[1].strip() == "AX":
stack.append(AX(stack.pop()))
def p_unary_take_outer_token(p):
'''formula : GLOBAL formula
| NEXT formula
| FUTURE formula
'''
if len(stack) >= 1:
if p[1].strip() == "AG":
stack.append(AG(stack.pop()))
elif p[1].strip() == "AF":
stack.append(AF(stack.pop()))
elif p[1].strip() == "AX":
stack.append(AX(stack.pop()))
def p_unary_take_outer_token_prop(p):
'''formula : GLOBAL PROPOSITION
| NEXT PROPOSITION
| FUTURE PROPOSITION
'''
if len(stack) >= 1:
if p[1].strip() == "AG":
stack.append(AG(stack.pop()))
elif p[1].strip() == "AF":
stack.append(AF(stack.pop()))
elif p[1].strip() == "AX":
stack.append(AX(stack.pop()))
def p_binary_take_outer_token(p):
'''formula : formula CONJUNCTION formula
| formula DISJUNCTION formula
| formula EQUIVALENCE formula
| formula IMPLICATION formula
'''
if len(stack) >= 2:
a, b = stack.pop(), stack.pop()
if self.IMPLICATION.search(p[2].strip()) and not self.EQUIVALENCE.search(p[2].strip()):
stack.append(Or(a, Neg(b)))
elif self.EQUIVALENCE.search(p[2].strip()):
stack.append(And(Or(Neg(a), b), Or(Neg(b), a)))
else:
if p[2].strip() == "|":
stack.append(Or(b, a))
elif p[2].strip() == "&":
stack.append(And(b, a))
def p_binary_paren_take_outer_token(p):
'''formula : LPAREN formula RPAREN CONJUNCTION LPAREN formula RPAREN
| LPAREN formula RPAREN DISJUNCTION LPAREN formula RPAREN
| LPAREN formula RPAREN EQUIVALENCE LPAREN formula RPAREN
| LPAREN formula RPAREN IMPLICATION LPAREN formula RPAREN
'''
if len(stack) >= 2:
a, b = stack.pop(), stack.pop()
if self.IMPLICATION.search(p[4].strip()) and not self.EQUIVALENCE.search(p[4].strip()):
stack.append(Or(a, Neg(b)))
elif self.EQUIVALENCE.search(p[4].strip()):
stack.append(And(Or(Neg(a), b), Or(Neg(b), a)))
else:
if p[4].strip() == "|":
stack.append(Or(b, a))
elif p[4].strip() == "&":
stack.append(And(b, a))
def p_binary_lparen_take_outer_token(p):
'''formula : LPAREN formula RPAREN CONJUNCTION formula
| LPAREN formula RPAREN DISJUNCTION formula
| LPAREN formula RPAREN EQUIVALENCE formula
| LPAREN formula RPAREN IMPLICATION formula
'''
if len(stack) >= 2:
a = stack.pop()
b = stack.pop()
if self.IMPLICATION.search(p[4].strip()) and not self.EQUIVALENCE.search(p[4].strip()):
stack.append(Or(a, Neg(b)))
elif self.EQUIVALENCE.search(p[4].strip()):
stack.append(And(Or(Neg(a), b), Or(Neg(b), a)))
else:
if p[4].strip() == "|":
stack.append(Or(b, a))
elif p[4].strip() == "&":
stack.append(And(b, a))
def p_binary_rparen_take_outer_token(p):
'''formula : formula CONJUNCTION LPAREN formula RPAREN
| formula DISJUNCTION LPAREN formula RPAREN
| formula EQUIVALENCE LPAREN formula RPAREN
| formula IMPLICATION LPAREN formula RPAREN
'''
if len(stack) >= 2:
a = stack.pop()
b = stack.pop()
if self.IMPLICATION.search(p[4].strip()) and not self.EQUIVALENCE.search(p[4].strip()):
stack.append(Or(a, Neg(b)))
elif self.EQUIVALENCE.search(p[4].strip()):
stack.append(And(Or(Neg(a), b), Or(Neg(b), a)))
else:
if p[4].strip() == "|":
stack.append(Or(b, a))
elif p[4].strip() == "&":
stack.append(And(b, a))
def p_proposition_take_token_paren(p):
'''formula : LPAREN formula RPAREN
'''
stack.append(p[2].strip())
def p_proposition_take_token_atom(p):
'''formula : LPAREN PROPOSITION RPAREN
'''
stack.append(p[2].strip())
def p_proposition_take_token(p):
'''formula : PROPOSITION
'''
stack.append(p[1].strip())
def p_true_take_token(p):
'''formula : TRUE
'''
stack.append(p[1].strip())
def p_false_take_token(p):
'''formula : FALSE
'''
stack.append(p[1].strip())
# Error rule for syntax errors
def p_error(p):
print "Syntax error in input!: " + str(p)
os.system("pause")
return 0
我可以看到 lex\yacc 规则的解析规则相当混乱,为了简洁和整洁,我已经删除了每个规则中的大部分调试代码,但任何人都可以看看我哪里错了?我应该将括号的处理转移到另一种方法还是可以用我现在拥有的方法来完成?是否有其他方法可以将这些公式字符串处理为预定义的类操作,而不会出现所有移位/归约错误?
很抱歉在网上公开了我所有的脏代码,但我确实需要一些帮助来解决困扰我几个月的问题。谢谢。
At the moment i'm working on using the python implementation of Yacc/Lex to build a formula parser for converting strings of formulae into a set of class defined operands. So far i've been mostly successful but i've come to an empasse in defining the parsing rules due to ambiguity with parentheses and several shift/reduce errors.
The Backus Naur Form for the formulae ive been working on is
phi ::= p ; !p ; phi_0 & phi_1 ; phi_0 | phi_1 ; AX phi ; AF phi ; AG phi ; AU phi_0 U phi_1.
Also i've been trying to allow arbitrary matched parentheses but this is also where a lot of the confusion is coming from and i'm thinking where the shift reduce errors are coming from. Its fairly necessary for the task i'm applying it to that parentheses are there to force specific evaluations on formulas, so I have to work that out.
Currently my parser is defined inside a class which builds its lexical analyser with
tokens = (
'NEGATION',
'FUTURE',
'GLOBAL',
'NEXT',
'CONJUNCTION',
'DISJUNCTION',
'EQUIVALENCE',
'IMPLICATION',
'PROPOSITION',
'LPAREN',
'RPAREN',
'TRUE',
'FALSE',
)
# regex in order of parsing precedence
t_NEGATION = r'[\s]*\![\s]*'
t_FUTURE = r'[\s]*AF[\s]*'
t_GLOBAL = r'[\s]*AG[\s]*'
t_NEXT = r'[\s]*AX[\s]*'
t_CONJUNCTION = r'[\s]*\&[\s]*'
t_DISJUNCTION = r'[\s]*\|[\s]*'
t_EQUIVALENCE = r'[\s]*\<\-\>[\s]*'
t_IMPLICATION = r'[\s]*[^<]\-\>[\s]*'
t_LPAREN = r'[\s]*\([\s]*'
t_RPAREN = r'[\s]*\)[\s]*'
t_PROPOSITION = r'[\s]*[a-z]+[-\w\._]*[\s]*'
t_TRUE = r'[\s]*TRUE[\s]*'
t_FALSE = r'[\s]*FALSE[\s]*'
precedence = (
('left', 'ASSIGNMENT'),
('left', 'NEGATION'),
('left', 'GLOBAL','NEXT','FUTURE'),
('left', 'CONJUNCTION'),
('left', 'DISJUNCTION'),
('left', 'EQUIVALENCE'),
('left', 'IMPLICATION'),
('left', 'AUB', 'AUM'),
('left', 'LPAREN', 'RPAREN', 'TRUE', 'FALSE'),
)
lexer = lex.lex()
lexer.input(formula)
And the parsing rules as
def p_double_neg_paren(p):
'''formula : NEGATION LPAREN NEGATION LPAREN PROPOSITION RPAREN RPAREN
'''
stack.append(p[5].strip())
def p_double_neg(p):
'''formula : NEGATION NEGATION PROPOSITION
'''
stack.append(p[3].strip())
def p_double_neg_inner_paren(p):
'''formula : NEGATION NEGATION LPAREN PROPOSITION RPAREN
'''
stack.append(p[4].strip())
def p_double_neg_mid_paren(p):
'''formula : NEGATION LPAREN NEGATION PROPOSITION RPAREN
'''
stack.append(p[4].strip())
def p_groupAssignment(p):
'''formula : PROPOSITION ASSIGNMENT ASSIGNVAL
'''
stack.append(p[1].strip() + p[2].strip() + p[3].strip())
def p_neg_paren_take_outer_token(p):
'''formula : NEGATION LPAREN PROPOSITION RPAREN
| NEGATION LPAREN TRUE RPAREN
| NEGATION LPAREN FALSE RPAREN
'''
stack.append(Neg(p[3]))
def p_neg_take_outer_token(p):
'''formula : NEGATION PROPOSITION
| NEGATION TRUE
| NEGATION FALSE
'''
stack.append(Neg(p[2].strip()))
def p_neg_take_outer_token_paren(p):
'''formula : LPAREN NEGATION PROPOSITION RPAREN
| LPAREN NEGATION TRUE RPAREN
| LPAREN NEGATION FALSE RPAREN
'''
stack.append(Neg(p[3].strip()))
def p_unary_paren_nest_take_outer_token(p):
'''formula : GLOBAL LPAREN LPAREN NEGATION formula RPAREN RPAREN
| NEXT LPAREN LPAREN NEGATION formula RPAREN RPAREN
| FUTURE LPAREN LPAREN NEGATION formula RPAREN RPAREN
'''
if len(stack) >= 1:
if p[1].strip() == 'AG':
stack.append(['AG', ['!', stack.pop()]])
elif p[1].strip() == 'AF':
stack.append(['AF', ['!', stack.pop()]])
elif p[1].strip() == 'AX':
stack.append(['AX', ['!', stack.pop()]])
def p_unary_paren_take_outer_token(p):
'''formula : GLOBAL LPAREN formula RPAREN
| NEXT LPAREN formula RPAREN
| FUTURE LPAREN formula RPAREN
'''
if len(stack) >= 1:
if p[1].strip() == "AG":
stack.append(AG(stack.pop()))
elif p[1].strip() == "AF":
stack.append(AF(stack.pop()))
elif p[1].strip() == "AX":
stack.append(AX(stack.pop()))
def p_unary_take_outer_token(p):
'''formula : GLOBAL formula
| NEXT formula
| FUTURE formula
'''
if len(stack) >= 1:
if p[1].strip() == "AG":
stack.append(AG(stack.pop()))
elif p[1].strip() == "AF":
stack.append(AF(stack.pop()))
elif p[1].strip() == "AX":
stack.append(AX(stack.pop()))
def p_unary_take_outer_token_prop(p):
'''formula : GLOBAL PROPOSITION
| NEXT PROPOSITION
| FUTURE PROPOSITION
'''
if len(stack) >= 1:
if p[1].strip() == "AG":
stack.append(AG(stack.pop()))
elif p[1].strip() == "AF":
stack.append(AF(stack.pop()))
elif p[1].strip() == "AX":
stack.append(AX(stack.pop()))
def p_binary_take_outer_token(p):
'''formula : formula CONJUNCTION formula
| formula DISJUNCTION formula
| formula EQUIVALENCE formula
| formula IMPLICATION formula
'''
if len(stack) >= 2:
a, b = stack.pop(), stack.pop()
if self.IMPLICATION.search(p[2].strip()) and not self.EQUIVALENCE.search(p[2].strip()):
stack.append(Or(a, Neg(b)))
elif self.EQUIVALENCE.search(p[2].strip()):
stack.append(And(Or(Neg(a), b), Or(Neg(b), a)))
else:
if p[2].strip() == "|":
stack.append(Or(b, a))
elif p[2].strip() == "&":
stack.append(And(b, a))
def p_binary_paren_take_outer_token(p):
'''formula : LPAREN formula RPAREN CONJUNCTION LPAREN formula RPAREN
| LPAREN formula RPAREN DISJUNCTION LPAREN formula RPAREN
| LPAREN formula RPAREN EQUIVALENCE LPAREN formula RPAREN
| LPAREN formula RPAREN IMPLICATION LPAREN formula RPAREN
'''
if len(stack) >= 2:
a, b = stack.pop(), stack.pop()
if self.IMPLICATION.search(p[4].strip()) and not self.EQUIVALENCE.search(p[4].strip()):
stack.append(Or(a, Neg(b)))
elif self.EQUIVALENCE.search(p[4].strip()):
stack.append(And(Or(Neg(a), b), Or(Neg(b), a)))
else:
if p[4].strip() == "|":
stack.append(Or(b, a))
elif p[4].strip() == "&":
stack.append(And(b, a))
def p_binary_lparen_take_outer_token(p):
'''formula : LPAREN formula RPAREN CONJUNCTION formula
| LPAREN formula RPAREN DISJUNCTION formula
| LPAREN formula RPAREN EQUIVALENCE formula
| LPAREN formula RPAREN IMPLICATION formula
'''
if len(stack) >= 2:
a = stack.pop()
b = stack.pop()
if self.IMPLICATION.search(p[4].strip()) and not self.EQUIVALENCE.search(p[4].strip()):
stack.append(Or(a, Neg(b)))
elif self.EQUIVALENCE.search(p[4].strip()):
stack.append(And(Or(Neg(a), b), Or(Neg(b), a)))
else:
if p[4].strip() == "|":
stack.append(Or(b, a))
elif p[4].strip() == "&":
stack.append(And(b, a))
def p_binary_rparen_take_outer_token(p):
'''formula : formula CONJUNCTION LPAREN formula RPAREN
| formula DISJUNCTION LPAREN formula RPAREN
| formula EQUIVALENCE LPAREN formula RPAREN
| formula IMPLICATION LPAREN formula RPAREN
'''
if len(stack) >= 2:
a = stack.pop()
b = stack.pop()
if self.IMPLICATION.search(p[4].strip()) and not self.EQUIVALENCE.search(p[4].strip()):
stack.append(Or(a, Neg(b)))
elif self.EQUIVALENCE.search(p[4].strip()):
stack.append(And(Or(Neg(a), b), Or(Neg(b), a)))
else:
if p[4].strip() == "|":
stack.append(Or(b, a))
elif p[4].strip() == "&":
stack.append(And(b, a))
def p_proposition_take_token_paren(p):
'''formula : LPAREN formula RPAREN
'''
stack.append(p[2].strip())
def p_proposition_take_token_atom(p):
'''formula : LPAREN PROPOSITION RPAREN
'''
stack.append(p[2].strip())
def p_proposition_take_token(p):
'''formula : PROPOSITION
'''
stack.append(p[1].strip())
def p_true_take_token(p):
'''formula : TRUE
'''
stack.append(p[1].strip())
def p_false_take_token(p):
'''formula : FALSE
'''
stack.append(p[1].strip())
# Error rule for syntax errors
def p_error(p):
print "Syntax error in input!: " + str(p)
os.system("pause")
return 0
I can see the lex\yacc rules are fairly messy, i've removed much of the debugging code in each rule for brevity and tidiness but can anyone see where i'm going wrong here? Should I move handling of parentheses to another method or can it be done with what I have now? Is there some other way I can process these formula strings into the predefined class operations without getting all the shift/reduce errors?
Sorry for airing all my dirty code online but I could really use some help on something thats been bugging me for months. Thanks.
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(2)
从小事做起!!!无论您最终使用什么解析器库,请尝试仅执行简单的二进制运算,例如 expr & expr,然后让它工作。然后添加对“|”的支持。现在你有两个不同的运算符,并且有足够的空间来表示运算的优先级,并且括号实际上发挥了作用。这个 BNF 看起来像这样:
你明白这是如何工作的吗?没有明确的“取左括号”、“弹出右括号”之类的东西。弄清楚其他操作的优先级是什么,然后扩展这个递归中缀表示法解析器以反映它们。但在你首先让这个最小的部分工作之前,不要做任何其他事情。否则,你只是想一次解决太多问题。
START SMALL!!! Regardless of the parser library you end up using, try doing just a simple binary operation like expr & expr, and get that working. Then add support for '|'. Now you have two different operators, and you have enough to represent precedence of operations, and parentheses actually play a part. This BNF would look something like:
Do you see how this works? No explicit "take left paren", "pop right paren" stuff. Figure out what your precedence of your other operations is going to be, and then extend this recursive infix notation parser to reflect them. But DON'T DO ANYTHING ELSE until you get this minimal bit working first. Otherwise, you are just trying to solve too much at once.
解析器令人沮丧,而且您的符号并不简单。创建中缀表示法的解析器需要一定的心态。但如果这是您系统的核心部分,您就必须让它正常工作一段时间。我不确定您对 lepl 的困惑是什么,我相信它在概念上与 pyparsing 非常相似。本着SO的精神,也许我可以为你发布一个pyparsing starter。
您的 BNF 与您的词法分析代码并不真正匹配,因为您的代码包含对“<->”和“->”的引用运算符、赋值语句和命题(我假设基本上是小写标识符)。我在网上查找了该语言的参考资料,但没有找到。另外,您没有发布任何测试用例。所以我对你的语言 BNF 应该是什么进行了最好的猜测。
打印:
Group 类有助于将结果构建为准 AST。 pyparsing wiki 上有许多示例可以帮助您从这里获取它。我建议查看 simpleBool.py 示例,了解如何让解析器生成求值器。
Parsers are frustrating, and your notation is a non-trivial one. Creating a parser for infix notation takes a certain mind set. But if this is a core part of your system, you'll have to get it working some time. I'm not sure what was your confusion with lepl, I believe it is fairly similar in concept to pyparsing. In the spirit of SO, maybe I can post a pyparsing starter for you.
Your BNF didn't really match your lexing code, as your code includes references to '<->', and '->' operators, an assignment statement, and a proposition which I assume is basically a lowercase identifier. I looked for an online reference for this language, but didn't find one. Also, you didn't post any test cases. So I took a best guess at what your language BNF is supposed to be.
Prints:
The Group classes help structure the results into a quasi-AST. There are a number of examples on the pyparsing wiki that will help you take it from here. I would recommend looking at the simpleBool.py example on how to have the parser produce an evaluator.