弯曲/野牛有时会错过

发布于 2025-01-29 10:16:46 字数 4556 浏览 2 评论 0原文

我使用Flex/Bison构建CLI,并且我经历了flex有时不会获得令牌。

我的.l看起来像这样:

%{

#include <stdio.h>
#include <string.h>
#include "hmd.tab.h"
#include "cmd.h"
%}

%option debug
%option verbose
%option backup

%option noyywrap nounput noinput
%option reentrant bison-bridge



digit [0-9]
integer [+-]?{digit}+
uinteger {digit}+
real [+-]?({digit}+[.]{digit}*)|({digit}*[.]{digit}+)
exp [+-]?({integer}|{real})[eE]-?{integer}
alpha [:alpha:]+
any [^[:space:]](.|\n)+
printing [^[:space:]]+

%x ID ACTION ZONE_FIELD VALUE

%%

    /*subsystems*/
<INITIAL>zone {
    BEGIN(ID);
    printf("ZONE '%s'\n", yytext);
    return (cmd_sys_zone);
}

<INITIAL>device {
    return (cmd_sys_device);
}

<INITIAL>system {
    return (cmd_sys_system);
}

<INITIAL>help {
    return (cmd_sys_help);
}

<INITIAL>ver|version {
    return (cmd_sys_ver);
}


<ID>{uinteger} {
    printf("ID '%s'\n", yytext);
    yylval->number = strtoll(yytext, NULL, 0);
    BEGIN (ACTION);
    return (cmd_id);
}

    /*actions*/
<ACTION>set {
    BEGIN (ZONE_FIELD);
    printf("SET '%s'\n", yytext);
    return (cmd_action_set);
}
<ACTION>get {
    BEGIN (ZONE_FIELD);
    return (cmd_action_get);
}
<ACTION>start {
    BEGIN (ZONE_FIELD);
    return (cmd_action_start);
}
<ACTION>stop {
    BEGIN (ZONE_FIELD);
    return (cmd_action_stop);
}

<ZONE_FIELD>{alpha} {
    printf("ZONE_FIELD '%s'\n", yytext);
    yylval->name = strdup(yytext);
    BEGIN (VALUE);
    return (cmd_field);
}

<VALUE>{any} {
        yylval->name = strdup(yytext);
        printf("VALUE '%s'\n", yytext);
        return(cmd_value);
    }

%%


int cmd_parse(cmd_t *command) {
    yyscan_t scanner;
    YY_BUFFER_STATE buffer;
    int ret_val;

    ret_val = 0;

    if ((ret_val = yylex_init(&scanner)) != 0) {
        goto exit_point;
    }

    printf("INPUT '%s'\n", command->buffer);

    buffer = yy_scan_buffer(command->buffer, command->len, scanner);
    yyparse(command, scanner);
    yy_delete_buffer(buffer, scanner);
    yylex_destroy(scanner);
exit_point:
    return 0;
}

我的.y看起来像这样:

%{
#define YYDEBUG 1
#include <stdio.h>
#include <stdint.h>
#include "cmd.h"
#include "hmd.tab.h"

int yylex();
int yyerror(void *userdata, void *scanner, const char *s);

%}

%debug
%define api.pure

%define parse.error verbose

/*System tokens*/
%token cmd_sys_zone cmd_sys_device cmd_sys_system cmd_sys_ver cmd_sys_help

%token cmd_num cmd_unum cmd_real cmd_other

/*ID token*/
%token cmd_id

/*Fields*/
%token cmd_field

/*Action tokens*/
%token cmd_action_set cmd_action_get cmd_action_start cmd_action_stop

/*Value*/
%token cmd_value

%type <number> cmd_num
%type <unumber> cmd_unum
%type <real> cmd_real
%type <unumber> cmd_id
%type <name> cmd_field
%type <name> cmd_value
%type <name> cmd_other

%destructor {
    if ($$ == NULL) {
        free($$);
    }
} <name>

%union {
    char *name;
    int64_t number;
    uint64_t unumber;
    double real;
}


%parse-param {void *user_data}
%param {void *scanner}

%%

prog:
  stmts
;

stmts:
        | stmt stmts

stmt:
        cmd_sys_zone cmd_id cmd_action_set cmd_field cmd_value {
            cmd_zone_set(user_data, $2, $4, $5);
            cmd_free($4);
            cmd_free($5);
        } |
        cmd_sys_zone cmd_id cmd_action_get cmd_field {
            cmd_zone_get($2, $4);
            cmd_free($4);
        } |
        cmd_sys_ver {
            cmd_ver(user_data);
        } |
        cmd_sys_help {
            cmd_help();
        } |
        cmd_other {
            yyerror(user_data, NULL, $1);
            cmd_free($1);
        }

%%

int yyerror(void *userdata, void *scanner, const char *s)
{
    (void) scanner;
    cmd_t *cmd;
    cmd = (cmd_t*) userdata;

    cmd->response_len = sprintf(cmd->response, "ERR: %s\r\n", s);
    return 0;}

两个类似的测试用例:

INPUT 'zone 2 set haha some good result
'
ZONE 'zone'
 ID '2'
 SET 'set'
 ZONE_FIELD 'haha'
 VALUE 'some good result
'
2022-05-17T04:31:43 I CMD_SET_ZONE '2' 'haha' 'some good result /*Output of the handler*/
'
INPUT 'zone 2 set blab some bad result
'
ZONE 'zone'
 ID '2'
 SET 'set'
 bZONE_FIELD 'la' /*b is missed by Flex*/
VALUE 'b some bad result /*That b should be part of ZONE_FIELD*/
'
2022-05-17T04:31:59 I CMD_SET_ZONE '2' 'la' 'b some bad result /*Output of the handler*/
'

您可以看到,我给出了分析器几乎相同的数据,结果也有所不同。第二次,有一堆不匹配的字节,整个语法崩溃了。

I build a CLI using flex/bison, and I experience that the flex sometimes doesn't get the tokens.

My .l looks like this:

%{

#include <stdio.h>
#include <string.h>
#include "hmd.tab.h"
#include "cmd.h"
%}

%option debug
%option verbose
%option backup

%option noyywrap nounput noinput
%option reentrant bison-bridge



digit [0-9]
integer [+-]?{digit}+
uinteger {digit}+
real [+-]?({digit}+[.]{digit}*)|({digit}*[.]{digit}+)
exp [+-]?({integer}|{real})[eE]-?{integer}
alpha [:alpha:]+
any [^[:space:]](.|\n)+
printing [^[:space:]]+

%x ID ACTION ZONE_FIELD VALUE

%%

    /*subsystems*/
<INITIAL>zone {
    BEGIN(ID);
    printf("ZONE '%s'\n", yytext);
    return (cmd_sys_zone);
}

<INITIAL>device {
    return (cmd_sys_device);
}

<INITIAL>system {
    return (cmd_sys_system);
}

<INITIAL>help {
    return (cmd_sys_help);
}

<INITIAL>ver|version {
    return (cmd_sys_ver);
}


<ID>{uinteger} {
    printf("ID '%s'\n", yytext);
    yylval->number = strtoll(yytext, NULL, 0);
    BEGIN (ACTION);
    return (cmd_id);
}

    /*actions*/
<ACTION>set {
    BEGIN (ZONE_FIELD);
    printf("SET '%s'\n", yytext);
    return (cmd_action_set);
}
<ACTION>get {
    BEGIN (ZONE_FIELD);
    return (cmd_action_get);
}
<ACTION>start {
    BEGIN (ZONE_FIELD);
    return (cmd_action_start);
}
<ACTION>stop {
    BEGIN (ZONE_FIELD);
    return (cmd_action_stop);
}

<ZONE_FIELD>{alpha} {
    printf("ZONE_FIELD '%s'\n", yytext);
    yylval->name = strdup(yytext);
    BEGIN (VALUE);
    return (cmd_field);
}

<VALUE>{any} {
        yylval->name = strdup(yytext);
        printf("VALUE '%s'\n", yytext);
        return(cmd_value);
    }

%%


int cmd_parse(cmd_t *command) {
    yyscan_t scanner;
    YY_BUFFER_STATE buffer;
    int ret_val;

    ret_val = 0;

    if ((ret_val = yylex_init(&scanner)) != 0) {
        goto exit_point;
    }

    printf("INPUT '%s'\n", command->buffer);

    buffer = yy_scan_buffer(command->buffer, command->len, scanner);
    yyparse(command, scanner);
    yy_delete_buffer(buffer, scanner);
    yylex_destroy(scanner);
exit_point:
    return 0;
}

and my .y looks like this:

%{
#define YYDEBUG 1
#include <stdio.h>
#include <stdint.h>
#include "cmd.h"
#include "hmd.tab.h"

int yylex();
int yyerror(void *userdata, void *scanner, const char *s);

%}

%debug
%define api.pure

%define parse.error verbose

/*System tokens*/
%token cmd_sys_zone cmd_sys_device cmd_sys_system cmd_sys_ver cmd_sys_help

%token cmd_num cmd_unum cmd_real cmd_other

/*ID token*/
%token cmd_id

/*Fields*/
%token cmd_field

/*Action tokens*/
%token cmd_action_set cmd_action_get cmd_action_start cmd_action_stop

/*Value*/
%token cmd_value

%type <number> cmd_num
%type <unumber> cmd_unum
%type <real> cmd_real
%type <unumber> cmd_id
%type <name> cmd_field
%type <name> cmd_value
%type <name> cmd_other

%destructor {
    if ($ == NULL) {
        free($);
    }
} <name>

%union {
    char *name;
    int64_t number;
    uint64_t unumber;
    double real;
}


%parse-param {void *user_data}
%param {void *scanner}

%%

prog:
  stmts
;

stmts:
        | stmt stmts

stmt:
        cmd_sys_zone cmd_id cmd_action_set cmd_field cmd_value {
            cmd_zone_set(user_data, $2, $4, $5);
            cmd_free($4);
            cmd_free($5);
        } |
        cmd_sys_zone cmd_id cmd_action_get cmd_field {
            cmd_zone_get($2, $4);
            cmd_free($4);
        } |
        cmd_sys_ver {
            cmd_ver(user_data);
        } |
        cmd_sys_help {
            cmd_help();
        } |
        cmd_other {
            yyerror(user_data, NULL, $1);
            cmd_free($1);
        }

%%

int yyerror(void *userdata, void *scanner, const char *s)
{
    (void) scanner;
    cmd_t *cmd;
    cmd = (cmd_t*) userdata;

    cmd->response_len = sprintf(cmd->response, "ERR: %s\r\n", s);
    return 0;}

Two similar test cases:

INPUT 'zone 2 set haha some good result
'
ZONE 'zone'
 ID '2'
 SET 'set'
 ZONE_FIELD 'haha'
 VALUE 'some good result
'
2022-05-17T04:31:43 I CMD_SET_ZONE '2' 'haha' 'some good result /*Output of the handler*/
'
INPUT 'zone 2 set blab some bad result
'
ZONE 'zone'
 ID '2'
 SET 'set'
 bZONE_FIELD 'la' /*b is missed by Flex*/
VALUE 'b some bad result /*That b should be part of ZONE_FIELD*/
'
2022-05-17T04:31:59 I CMD_SET_ZONE '2' 'la' 'b some bad result /*Output of the handler*/
'

As you can see I give almost the same amount of data to the parser, and the outcome differs. The second time, there are a bunch of bytes not matched, and the whole grammar collapses.

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

放低过去 2025-02-05 10:16:46

如果您使用- debug(或生成扫描仪时命令行标志)时,FLEX将插入所有规则匹配的代码(以及某些其他重要事件)。在诸如您的重新进入扫描仪中,您还需要插入yyset_debug(1,scanner);启用日志的调用;在非诱饵扫描仪中,默认情况下启用了日志。通常,这使您可以更好地调试信息,而不是在扫描仪操作中插入您自己的printf呼叫,而工作要少得多。 (尤其是在将其关闭的时候。)

我怀疑它会给您足够的信息来查看代码中的错字,即定义

alpha [:alpha:]+

而不是正确的信息:

alpha [[:alpha:]]+

如书面,{alpha}将匹配haha​​papalala。但是它不匹配blabla,因为b不是字母ahlp之一,也不是冒号。在启用调试(如上)时,您会在输出中看到类似的内容:

--accepting rule at line 85 ("set")
 SET 'set'
--accepting default rule (" ")
--accepting default rule ("b")
--accepting rule at line 103 ("la")
 bZONE_FIELD 'la'

除了显示b不与{alpha}匹配,它表明您是您的'不正确处理空格;可能,您应该添加与水平空间(或所有空格)相匹配和忽略的模式:

<*>[ \t]+    ;

我也建议不要依靠自动后备规则。编写模式集匹配所有可能性(并使用%option nodefault来确保所有可能性都与某些规则相匹配)也可以帮助您捕获简单的模式错误。

If you use the --debug (or -d) command-line flag when generating your scanner, flex will insert code which logs all rule matches (and certain other significant events). In reentrant scanners, such as yours, you also need to insert a call to yyset_debug(1, scanner); to enable the logs; in non-reentrant scanners, the logs are enabled by default. This generally gives you better debugging information than inserting your own printf calls in your scanner actions, and is far less work. (Particularly when it comes time to turn it off.)

I suspect it would have given you enough information to see the typo in your code, which was to define

alpha [:alpha:]+

instead of the correct:

alpha [[:alpha:]]+

As written, {alpha} will match haha, papa and lala. But it won't match blabla because b isn't one of the letters ahlp and nor is it a colon. With debugging enabled (as above), you would have seen something like this in your output:

--accepting rule at line 85 ("set")
 SET 'set'
--accepting default rule (" ")
--accepting default rule ("b")
--accepting rule at line 103 ("la")
 bZONE_FIELD 'la'

Aside from showing that b is not matched by {alpha}, it shows that you're not correctly handling whitespace; probably, you should add a pattern which matches and ignores horizontal whitespace (or maybe all whitespace):

<*>[ \t]+    ;

I also recommend not relying on the automatic fallback rule. Writing patterns sets which match all possibilities (and using %option nodefault to ensure that all possibilities are matched by some rule) also helps you catch simple pattern errors.

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文