如何将EBNF语法用于小语言？

发布于 2025-01-19 18:49:14 字数 4160 浏览 0 评论 0原文

我正在做一个用 EBNF 语法解析文件的作业，我对递归函数的工作有点困惑。

-前三个函数：

Prog ::= PROGRAM IDENT; DeclBlock ProgBody
DeclBlock ::= VAR {DeclStmt;}
DeclStmt ::= Ident {, Ident} : (Integer | Real | String)

我知道你应该先调用 Prog，然后调用 DeclBlock，然后调用 DeclStmt。那么当你调用DeclBlock时，是只有看到分号还是看到另一个IDENT时才再次调用它？ DeclStmt 也是如此，只有当你看到逗号时才再次调用它吗？

//Prog ::= PROGRAM IDENT; DeclBlock ProgBody
bool Prog(istream& in, int& line){
    bool status;
    LexItem t;
    
    t = Parser::GetNextToken(in, line);
    cout << t << endl;

    if(t != PROGRAM){
        ParseError(line, "Missing PROGRAM");
        return false;
    }

    LexItem i = Parser::GetNextToken(in,line);

    cout << i << endl;
    if(i != IDENT){
        ParseError(line, "Missing Program Name");
        return false;
    }

    LexItem semi = Parser::GetNextToken(in, line);
    cout << semi << endl;
    if(semi != SEMICOL){
        ParseError(line, "Missing SemiColon");
        return false;
    }

    status = DeclBlock(in, line);

    if(!status){
        ParseError(line, "Incorrect Declaration Section.");
        return false;
    }

    LexItem b = Parser::GetNextToken(in, line);

    cout << "here at b" << b << endl;

    if(b != BEGIN){
        ParseError(line, "Non-recognizable Program Block");
        ParseError(line, "Incorrect Program Section");
        return false;
    }

    status = ProgBody(in, line);

    if(!status){
        ParseError(line, "Incorrect Program Block");
        return false;
    }

    LexItem e = Parser::GetNextToken(in, line);

    cout << e << endl;

    if(e != END){
        ParseError(line, "Non-recognizable Program Block");
        ParseError(line, "Incorrect Program Section");
        return false;
    }

    return true;
}
//DeclBlock ::= VAR {DeclStmt;}
bool DeclBlock(istream& in, int& line){
    bool status = false;
    
    LexItem v = Parser::GetNextToken(in, line);

    cout << v << endl;

    if(v != VAR){
        ParseError(line, "Non-recognizable Declaration Block.");
        return false;
    }

    status = DeclStmt(in, line);

    if(!status){
        ParseError(line, "Syntactic error in Declaration Block.");
        return false;
    }
    return true;
}

//DeclStmt ::= Ident {, Ident} : (Integer | Real | String)
bool DeclStmt(istream& in, int& line){
    bool status = false;
    LexItem tok = Parser::GetNextToken(in, line);
    cout << "here too " <<  tok << endl;

    if (defVar.find(tok.GetLexeme()) != defVar.end()) {
        cout << "Var Exists!" << endl;
        ParseError(line, "Var cant be redeclared");
        return false;
    }
    else{
        defVar.insert({tok.GetLexeme(), true});
    }

    LexItem c = Parser::GetNextToken(in, line);

    cout << c << endl;

    if(c == COMMA){
        //cout << "before calling declStmt" << endl;
        status = DeclStmt(in, line);
        //cout << "after calling declStmt" << endl;
    }
    else if(c.GetToken() == IDENT){
        ParseError(line, "Unrecognized Input Pattern");
        cout << "( here " << c.GetLexeme() << ")" << endl;
        return false;
    }
    // else if(c == IDENT){
    //  ParseError(line, "Missing comma");
    //  return false;
    // }
    else{
        // Parser::PushBackToken(c);

        if(c != COLON){
            ParseError(line, "Missing Colon");
            return false;
        }

        LexItem t = Parser::GetNextToken(in, line);

        cout << "here t " << t.GetLexeme() << endl;

        if(t.GetLexeme() != "REAL" && t.GetLexeme() != "INTEGER" && t.GetLexeme() != "STRING"){
            ParseError(line, "Incorrect Declaration Type.");
            return false;
        }

        LexItem semi = Parser::GetNextToken(in,line);

        cout << semi << endl;

        if(semi != SEMICOL){
            ParseError(line, "Missing SemiColon");
            return false;
        }

        

        return true;
    }

    return status;
}

原文

I am working on an assignment that parses a file with EBNF grammar, I am a little confuse on the recursive function work.

-first three functions:

Prog ::= PROGRAM IDENT; DeclBlock ProgBody
DeclBlock ::= VAR {DeclStmt;}
DeclStmt ::= Ident {, Ident} : (Integer | Real | String)

I Know that you're supposed to call Prog first then DeclBlock and then DeclStmt. So when you call DeclBlock, do you call it again only when you see the semicolon or when you see another IDENT? Same goes for DeclStmt, do you call it again only when you see a comma?

//Prog ::= PROGRAM IDENT; DeclBlock ProgBody
bool Prog(istream& in, int& line){
    bool status;
    LexItem t;
    
    t = Parser::GetNextToken(in, line);
    cout << t << endl;

    if(t != PROGRAM){
        ParseError(line, "Missing PROGRAM");
        return false;
    }

    LexItem i = Parser::GetNextToken(in,line);

    cout << i << endl;
    if(i != IDENT){
        ParseError(line, "Missing Program Name");
        return false;
    }

    LexItem semi = Parser::GetNextToken(in, line);
    cout << semi << endl;
    if(semi != SEMICOL){
        ParseError(line, "Missing SemiColon");
        return false;
    }

    status = DeclBlock(in, line);

    if(!status){
        ParseError(line, "Incorrect Declaration Section.");
        return false;
    }

    LexItem b = Parser::GetNextToken(in, line);

    cout << "here at b" << b << endl;

    if(b != BEGIN){
        ParseError(line, "Non-recognizable Program Block");
        ParseError(line, "Incorrect Program Section");
        return false;
    }

    status = ProgBody(in, line);

    if(!status){
        ParseError(line, "Incorrect Program Block");
        return false;
    }

    LexItem e = Parser::GetNextToken(in, line);

    cout << e << endl;

    if(e != END){
        ParseError(line, "Non-recognizable Program Block");
        ParseError(line, "Incorrect Program Section");
        return false;
    }

    return true;
}
//DeclBlock ::= VAR {DeclStmt;}
bool DeclBlock(istream& in, int& line){
    bool status = false;
    
    LexItem v = Parser::GetNextToken(in, line);

    cout << v << endl;

    if(v != VAR){
        ParseError(line, "Non-recognizable Declaration Block.");
        return false;
    }

    status = DeclStmt(in, line);

    if(!status){
        ParseError(line, "Syntactic error in Declaration Block.");
        return false;
    }
    return true;
}

//DeclStmt ::= Ident {, Ident} : (Integer | Real | String)
bool DeclStmt(istream& in, int& line){
    bool status = false;
    LexItem tok = Parser::GetNextToken(in, line);
    cout << "here too " <<  tok << endl;

    if (defVar.find(tok.GetLexeme()) != defVar.end()) {
        cout << "Var Exists!" << endl;
        ParseError(line, "Var cant be redeclared");
        return false;
    }
    else{
        defVar.insert({tok.GetLexeme(), true});
    }

    LexItem c = Parser::GetNextToken(in, line);

    cout << c << endl;

    if(c == COMMA){
        //cout << "before calling declStmt" << endl;
        status = DeclStmt(in, line);
        //cout << "after calling declStmt" << endl;
    }
    else if(c.GetToken() == IDENT){
        ParseError(line, "Unrecognized Input Pattern");
        cout << "( here " << c.GetLexeme() << ")" << endl;
        return false;
    }
    // else if(c == IDENT){
    //  ParseError(line, "Missing comma");
    //  return false;
    // }
    else{
        // Parser::PushBackToken(c);

        if(c != COLON){
            ParseError(line, "Missing Colon");
            return false;
        }

        LexItem t = Parser::GetNextToken(in, line);

        cout << "here t " << t.GetLexeme() << endl;

        if(t.GetLexeme() != "REAL" && t.GetLexeme() != "INTEGER" && t.GetLexeme() != "STRING"){
            ParseError(line, "Incorrect Declaration Type.");
            return false;
        }

        LexItem semi = Parser::GetNextToken(in,line);

        cout << semi << endl;

        if(semi != SEMICOL){
            ParseError(line, "Missing SemiColon");
            return false;
        }

        

        return true;
    }

    return status;
}

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

jJeQQOZ5 2025-01-26 18:49:14

DeclBlock 在此语言中只能调用一次。 DeclStmt 可以多次调用。 DeclStmt 被定义为一个或多个 Ident，后跟 :，后跟类型，最后以 ; 结尾；.

在读取 DeclStmt 末尾的 ; 后，您将读取下一个标记来决定下一步要做什么。如果下一个标记是另一个 Ident，您就知道您正处于另一个 DeclStmt 的开头，因此您可以再次调用它。如果有其他情况，您就知道您正处于 ProgBody 的开头。（我假设最后一位。通常您会查找启动 ProgBody 的标记，但这没有显示。）

回复收藏 0 原文

七七 2025-01-26 18:49:14

你的 DeclBlock 函数应该类似于

bool DeclBlock(istream& in, int& line) {
    if (Parser::GetNextToken(in, line) != VAR) {
        // missing VAR
        return false; }
    while (Parser::Lookahead(in, line) != BEGIN) {
        if (!DeclStmt(in, line)) {
            // error in the DeclStmt
            return false; }
        if (Parser::GetNextToken(in, line) != SEMICOL) {
            // error -- missing semicolon
            return false; }
    }
    return true;
}

关键是你必须有一个解析器先行函数，它可以为你提供下一个标记而不消耗它。

Your DeclBlock function should be something like

bool DeclBlock(istream& in, int& line) {
    if (Parser::GetNextToken(in, line) != VAR) {
        // missing VAR
        return false; }
    while (Parser::Lookahead(in, line) != BEGIN) {
        if (!DeclStmt(in, line)) {
            // error in the DeclStmt
            return false; }
        if (Parser::GetNextToken(in, line) != SEMICOL) {
            // error -- missing semicolon
            return false; }
    }
    return true;
}

The key thing is that you MUST have a parser lookahead function that gives you the next token WITHOUT consuming it.

回复收藏 0 原文

~没有更多了~