如何将EBNF语法用于小语言?
我正在做一个用 EBNF 语法解析文件的作业,我对递归函数的工作有点困惑。
-前三个函数:
Prog ::= PROGRAM IDENT; DeclBlock ProgBody
DeclBlock ::= VAR {DeclStmt;}
DeclStmt ::= Ident {, Ident} : (Integer | Real | String)
我知道你应该先调用 Prog,然后调用 DeclBlock,然后调用 DeclStmt。那么当你调用DeclBlock时,是只有看到分号还是看到另一个IDENT时才再次调用它? DeclStmt 也是如此,只有当你看到逗号时才再次调用它吗?
//Prog ::= PROGRAM IDENT; DeclBlock ProgBody
bool Prog(istream& in, int& line){
bool status;
LexItem t;
t = Parser::GetNextToken(in, line);
cout << t << endl;
if(t != PROGRAM){
ParseError(line, "Missing PROGRAM");
return false;
}
LexItem i = Parser::GetNextToken(in,line);
cout << i << endl;
if(i != IDENT){
ParseError(line, "Missing Program Name");
return false;
}
LexItem semi = Parser::GetNextToken(in, line);
cout << semi << endl;
if(semi != SEMICOL){
ParseError(line, "Missing SemiColon");
return false;
}
status = DeclBlock(in, line);
if(!status){
ParseError(line, "Incorrect Declaration Section.");
return false;
}
LexItem b = Parser::GetNextToken(in, line);
cout << "here at b" << b << endl;
if(b != BEGIN){
ParseError(line, "Non-recognizable Program Block");
ParseError(line, "Incorrect Program Section");
return false;
}
status = ProgBody(in, line);
if(!status){
ParseError(line, "Incorrect Program Block");
return false;
}
LexItem e = Parser::GetNextToken(in, line);
cout << e << endl;
if(e != END){
ParseError(line, "Non-recognizable Program Block");
ParseError(line, "Incorrect Program Section");
return false;
}
return true;
}
//DeclBlock ::= VAR {DeclStmt;}
bool DeclBlock(istream& in, int& line){
bool status = false;
LexItem v = Parser::GetNextToken(in, line);
cout << v << endl;
if(v != VAR){
ParseError(line, "Non-recognizable Declaration Block.");
return false;
}
status = DeclStmt(in, line);
if(!status){
ParseError(line, "Syntactic error in Declaration Block.");
return false;
}
return true;
}
//DeclStmt ::= Ident {, Ident} : (Integer | Real | String)
bool DeclStmt(istream& in, int& line){
bool status = false;
LexItem tok = Parser::GetNextToken(in, line);
cout << "here too " << tok << endl;
if (defVar.find(tok.GetLexeme()) != defVar.end()) {
cout << "Var Exists!" << endl;
ParseError(line, "Var cant be redeclared");
return false;
}
else{
defVar.insert({tok.GetLexeme(), true});
}
LexItem c = Parser::GetNextToken(in, line);
cout << c << endl;
if(c == COMMA){
//cout << "before calling declStmt" << endl;
status = DeclStmt(in, line);
//cout << "after calling declStmt" << endl;
}
else if(c.GetToken() == IDENT){
ParseError(line, "Unrecognized Input Pattern");
cout << "( here " << c.GetLexeme() << ")" << endl;
return false;
}
// else if(c == IDENT){
// ParseError(line, "Missing comma");
// return false;
// }
else{
// Parser::PushBackToken(c);
if(c != COLON){
ParseError(line, "Missing Colon");
return false;
}
LexItem t = Parser::GetNextToken(in, line);
cout << "here t " << t.GetLexeme() << endl;
if(t.GetLexeme() != "REAL" && t.GetLexeme() != "INTEGER" && t.GetLexeme() != "STRING"){
ParseError(line, "Incorrect Declaration Type.");
return false;
}
LexItem semi = Parser::GetNextToken(in,line);
cout << semi << endl;
if(semi != SEMICOL){
ParseError(line, "Missing SemiColon");
return false;
}
return true;
}
return status;
}
I am working on an assignment that parses a file with EBNF grammar, I am a little confuse on the recursive function work.
-first three functions:
Prog ::= PROGRAM IDENT; DeclBlock ProgBody
DeclBlock ::= VAR {DeclStmt;}
DeclStmt ::= Ident {, Ident} : (Integer | Real | String)
I Know that you're supposed to call Prog first then DeclBlock and then DeclStmt. So when you call DeclBlock, do you call it again only when you see the semicolon or when you see another IDENT? Same goes for DeclStmt, do you call it again only when you see a comma?
//Prog ::= PROGRAM IDENT; DeclBlock ProgBody
bool Prog(istream& in, int& line){
bool status;
LexItem t;
t = Parser::GetNextToken(in, line);
cout << t << endl;
if(t != PROGRAM){
ParseError(line, "Missing PROGRAM");
return false;
}
LexItem i = Parser::GetNextToken(in,line);
cout << i << endl;
if(i != IDENT){
ParseError(line, "Missing Program Name");
return false;
}
LexItem semi = Parser::GetNextToken(in, line);
cout << semi << endl;
if(semi != SEMICOL){
ParseError(line, "Missing SemiColon");
return false;
}
status = DeclBlock(in, line);
if(!status){
ParseError(line, "Incorrect Declaration Section.");
return false;
}
LexItem b = Parser::GetNextToken(in, line);
cout << "here at b" << b << endl;
if(b != BEGIN){
ParseError(line, "Non-recognizable Program Block");
ParseError(line, "Incorrect Program Section");
return false;
}
status = ProgBody(in, line);
if(!status){
ParseError(line, "Incorrect Program Block");
return false;
}
LexItem e = Parser::GetNextToken(in, line);
cout << e << endl;
if(e != END){
ParseError(line, "Non-recognizable Program Block");
ParseError(line, "Incorrect Program Section");
return false;
}
return true;
}
//DeclBlock ::= VAR {DeclStmt;}
bool DeclBlock(istream& in, int& line){
bool status = false;
LexItem v = Parser::GetNextToken(in, line);
cout << v << endl;
if(v != VAR){
ParseError(line, "Non-recognizable Declaration Block.");
return false;
}
status = DeclStmt(in, line);
if(!status){
ParseError(line, "Syntactic error in Declaration Block.");
return false;
}
return true;
}
//DeclStmt ::= Ident {, Ident} : (Integer | Real | String)
bool DeclStmt(istream& in, int& line){
bool status = false;
LexItem tok = Parser::GetNextToken(in, line);
cout << "here too " << tok << endl;
if (defVar.find(tok.GetLexeme()) != defVar.end()) {
cout << "Var Exists!" << endl;
ParseError(line, "Var cant be redeclared");
return false;
}
else{
defVar.insert({tok.GetLexeme(), true});
}
LexItem c = Parser::GetNextToken(in, line);
cout << c << endl;
if(c == COMMA){
//cout << "before calling declStmt" << endl;
status = DeclStmt(in, line);
//cout << "after calling declStmt" << endl;
}
else if(c.GetToken() == IDENT){
ParseError(line, "Unrecognized Input Pattern");
cout << "( here " << c.GetLexeme() << ")" << endl;
return false;
}
// else if(c == IDENT){
// ParseError(line, "Missing comma");
// return false;
// }
else{
// Parser::PushBackToken(c);
if(c != COLON){
ParseError(line, "Missing Colon");
return false;
}
LexItem t = Parser::GetNextToken(in, line);
cout << "here t " << t.GetLexeme() << endl;
if(t.GetLexeme() != "REAL" && t.GetLexeme() != "INTEGER" && t.GetLexeme() != "STRING"){
ParseError(line, "Incorrect Declaration Type.");
return false;
}
LexItem semi = Parser::GetNextToken(in,line);
cout << semi << endl;
if(semi != SEMICOL){
ParseError(line, "Missing SemiColon");
return false;
}
return true;
}
return status;
}
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(2)
DeclBlock
在此语言中只能调用一次。DeclStmt
可以多次调用。DeclStmt
被定义为一个或多个Ident
,后跟:
,后跟类型,最后以;
结尾;.在读取
DeclStmt
末尾的;
后,您将读取下一个标记来决定下一步要做什么。如果下一个标记是另一个Ident
,您就知道您正处于另一个DeclStmt
的开头,因此您可以再次调用它。如果有其他情况,您就知道您正处于ProgBody
的开头。 (我假设最后一位。通常您会查找启动ProgBody
的标记,但这没有显示。)DeclBlock
should only be called once in this language.DeclStmt
can be called multiple times. ADeclStmt
is defined to be one or moreIdent
followed by:
, followed by a type, then ending in a;
.After you read the
;
at the end of aDeclStmt
, you'd then read the next token to decide what to do next. If the next token is anotherIdent
you know you're at the start of anotherDeclStmt
, so you'd call that again. If it's anything else, you know you're at the start ofProgBody
. (I'm assuming the last bit. Normally you'd look for the token that starts aProgBody
, but that's not shown.)你的 DeclBlock 函数应该类似于
关键是你必须有一个解析器先行函数,它可以为你提供下一个标记而不消耗它。
Your DeclBlock function should be something like
The key thing is that you MUST have a parser lookahead function that gives you the next token WITHOUT consuming it.