使用 FParsec 进行基本错误恢复

发布于 2025-01-05 03:10:23 字数 1120 浏览 7 评论 0原文

假设我有这个解析器：

let test p str =
    match run p str with
    | Success(result, _, _)   -> printfn "Success: %A" result
    | Failure(errorMsg, _, _) -> printfn "Failure: %s" errorMsg

let myStatement =
    choice (seq [
                pchar '2' >>. pchar '+' .>> pchar '3' .>> pchar ';';
                pchar '3' >>. pchar '*' .>> pchar '4' .>> pchar ';';
            ])

let myProgram = many myStatement

test myProgram "2+3;3*4;3*4;" // Success: ['+'; '*'; '*']

现在， "2+3;2*4;3*4;3+3;" 将失败，并出现 2*4; 周围的错误。但是，如果我想要 2*4; 和 3+3; 的错误，最佳实践是什么？基本上，我想扫描到最近的“;”但仅当出现致命错误时。如果发生这种情况，我想汇总错误。

亲切的问候，Lasse Espeholt

更新： recoverWith 是一个很好的解决方案，谢谢！但鉴于：

let myProgram = 
    (many1 (myStatement |> recoverWith '�')) <|>% []

test myProgram "monkey"

我希望得到没有错误的 [] 。或者也许更“公平”一点：

let myProgram = 
    (attempt (many1 (myStatement |> recoverWith '�'))) <|>% []

原文

Assume I've this parser:

let test p str =
    match run p str with
    | Success(result, _, _)   -> printfn "Success: %A" result
    | Failure(errorMsg, _, _) -> printfn "Failure: %s" errorMsg

let myStatement =
    choice (seq [
                pchar '2' >>. pchar '+' .>> pchar '3' .>> pchar ';';
                pchar '3' >>. pchar '*' .>> pchar '4' .>> pchar ';';
            ])

let myProgram = many myStatement

test myProgram "2+3;3*4;3*4;" // Success: ['+'; '*'; '*']

Now, "2+3;2*4;3*4;3+3;" will fail with error around 2*4;. But what is best practice if I want both the error for 2*4; and 3+3;? Basically, I want to scan to the nearest ';' but only if there is a fatal error. And if that happens I want to aggregate the errors.

Kind regards, Lasse Espeholt

Update: recoverWith is a nice solution, thanks! But given:

let myProgram = 
    (many1 (myStatement |> recoverWith '�')) <|>% []

test myProgram "monkey"

I would expect to get [] with no errors. Or maybe a bit more "fair":

let myProgram = 
    (attempt (many1 (myStatement |> recoverWith '�'))) <|>% []

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

疾风者 2025-01-12 03:10:23

FParsec 没有内置支持从致命解析器错误中恢复，这将允许您获取部分解析器结果并从多个位置收集错误。然而，为此目的定义自定义组合器函数非常容易。

例如，要从简单语句解析器中的错误中恢复，您可以定义以下 recoverWith 组合器：

open FParsec

type UserState = {
    Errors: (string * ParserError) list
} with
    static member Create() = {Errors = []}

type Parser<'t> = Parser<'t, UserState>

// recover from error by skipping to the char after the next newline or ';'
let recoverWith errorResult (p: Parser<_>) : Parser<_> =    
  fun stream ->
    let stateTag = stream.StateTag
    let mutable reply = p stream
    if reply.Status <> Ok then // the parser failed
        let error = ParserError(stream.Position, stream.UserState, reply.Error)
        let errorMsg = error.ToString(stream)
        stream.SkipCharsOrNewlinesWhile(fun c -> c <> ';' && c <> '\n') |> ignore                        
        stream.ReadCharOrNewline() |> ignore
        // To prevent infinite recovery attempts in certain situations,
        // the following check makes sure that either the parser p 
        // or our stream.Skip... commands consumed some input.
        if stream.StateTag <> stateTag then
            let oldErrors = stream.UserState.Errors
            stream.UserState <- {Errors = (errorMsg, error)::oldErrors}     
            reply <- Reply(errorResult)
    reply

然后您可以按如下方式使用此组合器：

let myStatement =
    choice [
        pchar '2' >>. pchar '+' .>> pchar '3' .>> pchar ';'
        pchar '3' >>. pchar '*' .>> pchar '4' .>> pchar ';'
    ]

let myProgram = 
    many (myStatement |> recoverWith '�') .>> eof

let test p str =
    let printErrors (errorMsgs: (string * ParserError) list) =        
        for msg, _ in List.rev errorMsgs do
            printfn "%s" msg        

    match runParserOnString p (UserState.Create()) "" str with
    | Success(result, {Errors = []}, _) -> printfn "Success: %A" result
    | Success(result, {Errors = errors}, _) ->
        printfn "Result with errors: %A\n" result
        printErrors errors
    | Failure(errorMsg, error, {Errors = errors}) -> 
        printfn "Failure: %s" errorMsg
        printErrors ((errorMsg, error)::errors)

使用 test myProgram "2+3;2* 进行测试4;3*4;3+3" 将产生输出：

Result with errors: ['+'; '�'; '*'; '�']

Error in Ln: 1 Col: 6
2+3;2*4;3*4;3+3
     ^
Expecting: '+'

Error in Ln: 1 Col: 14
2+3;2*4;3*4;3+3
             ^
Expecting: '*'

更新：

嗯，我以为你想从致命错误中恢复，以便收集多个错误消息并可能产生部分结果。例如，对于语法突出显示或允许用户一次修复多个错误很有用的东西。

您的更新似乎表明您只想在出现解析器错误时忽略部分输入，这要简单得多：

let skip1ToNextStatement =
    notEmpty // requires at least one char to be skipped
        (skipManySatisfy (fun c -> c <> ';' && c <> '\n') 
         >>. optional anyChar) // optional since we might be at the EOF

let myProgram =     
    many (attempt myStatement <|> (skip1ToNextStatement >>% '�'))
    |>> List.filter (fun c -> c <> '�')

更新 2：

以下是 recoverWith 它不会聚合错误，并且仅在参数解析器消耗输入（或以任何其他方式更改解析器状态）时尝试从错误中恢复：

let recoverWith2 errorResult (p: Parser<_>) : Parser<_> =
  fun stream ->
    let stateTag = stream.StateTag
    let mutable reply = p stream
    if reply.Status <> Ok && stream.StateTag <> stateTag then
        stream.SkipCharsOrNewlinesWhile(fun c -> c <> ';' && c <> '\n') |> ignore
        stream.ReadCharOrNewline() |> ignore
        reply <- Reply(errorResult)
    reply

FParsec has no built-in support for recovering from fatal parser errors that would allow you to obtain partial parser results and collect errors from multiple positions. However, it's pretty easy to define a custom combinator function for this purpose.

For example, to recover from errors in your simple statement parser you could define the following recoverWith combinator:

open FParsec

type UserState = {
    Errors: (string * ParserError) list
} with
    static member Create() = {Errors = []}

type Parser<'t> = Parser<'t, UserState>

// recover from error by skipping to the char after the next newline or ';'
let recoverWith errorResult (p: Parser<_>) : Parser<_> =    
  fun stream ->
    let stateTag = stream.StateTag
    let mutable reply = p stream
    if reply.Status <> Ok then // the parser failed
        let error = ParserError(stream.Position, stream.UserState, reply.Error)
        let errorMsg = error.ToString(stream)
        stream.SkipCharsOrNewlinesWhile(fun c -> c <> ';' && c <> '\n') |> ignore                        
        stream.ReadCharOrNewline() |> ignore
        // To prevent infinite recovery attempts in certain situations,
        // the following check makes sure that either the parser p 
        // or our stream.Skip... commands consumed some input.
        if stream.StateTag <> stateTag then
            let oldErrors = stream.UserState.Errors
            stream.UserState <- {Errors = (errorMsg, error)::oldErrors}     
            reply <- Reply(errorResult)
    reply

You could then use this combinator as follows:

let myStatement =
    choice [
        pchar '2' >>. pchar '+' .>> pchar '3' .>> pchar ';'
        pchar '3' >>. pchar '*' .>> pchar '4' .>> pchar ';'
    ]

let myProgram = 
    many (myStatement |> recoverWith '�') .>> eof

let test p str =
    let printErrors (errorMsgs: (string * ParserError) list) =        
        for msg, _ in List.rev errorMsgs do
            printfn "%s" msg        

    match runParserOnString p (UserState.Create()) "" str with
    | Success(result, {Errors = []}, _) -> printfn "Success: %A" result
    | Success(result, {Errors = errors}, _) ->
        printfn "Result with errors: %A\n" result
        printErrors errors
    | Failure(errorMsg, error, {Errors = errors}) -> 
        printfn "Failure: %s" errorMsg
        printErrors ((errorMsg, error)::errors)

Testing with test myProgram "2+3;2*4;3*4;3+3" would yield the output:

Result with errors: ['+'; '�'; '*'; '�']

Error in Ln: 1 Col: 6
2+3;2*4;3*4;3+3
     ^
Expecting: '+'

Error in Ln: 1 Col: 14
2+3;2*4;3*4;3+3
             ^
Expecting: '*'

Update:

Hmm, I thought you wanted to recover from a fatal error in order to collect multiple error messages and maybe produce a partial result. Something that would for example be useful for syntax highlighting or allowing your users to fix more than one error at a time.

Your update seems to suggest that you just want to ignore parts of the input in case of a parser error, which is much simpler:

let skip1ToNextStatement =
    notEmpty // requires at least one char to be skipped
        (skipManySatisfy (fun c -> c <> ';' && c <> '\n') 
         >>. optional anyChar) // optional since we might be at the EOF

let myProgram =     
    many (attempt myStatement <|> (skip1ToNextStatement >>% '�'))
    |>> List.filter (fun c -> c <> '�')

Update 2:

The following is a version of recoverWith that doesn't aggregate errors and only tries to recover from an error if the argument parser consumed input (or changed the parser state in any other way):

let recoverWith2 errorResult (p: Parser<_>) : Parser<_> =
  fun stream ->
    let stateTag = stream.StateTag
    let mutable reply = p stream
    if reply.Status <> Ok && stream.StateTag <> stateTag then
        stream.SkipCharsOrNewlinesWhile(fun c -> c <> ';' && c <> '\n') |> ignore
        stream.ReadCharOrNewline() |> ignore
        reply <- Reply(errorResult)
    reply

回复收藏 0 原文

~没有更多了~