我正在编写一个 lex 代码，其中正则表达式部分的给出与详细部分中的完全相同。我遇到的主要问题是文本的 RE

发布于 2025-01-17 02:36:26 字数 9559 浏览 2 评论 0原文

    %{
    #define  FUNCT      300
    #define  IDENTIFIER 301
    #define  ASSGN      302
    #define  INTEGER    303
    #define  PRINT      304
    #define  TEXT       305
    #define  INPUT      306
    #define  CONTINUE   307
    #define  RETURN     308
    #define  IF         309
    #define  THEN       310
    #define  ENDIF      311
    #define  ELSE       312
    #define  WHILE      313
    #define  DO         314
    #define  ENDDO      315
    #define  END        316
    
    #include<stdio.h>
    #include<string.h>
    #include<stdlib.h>
    
    #define MAX_SYM 200
    int found;
    void initialize();   
    void create(char *lexeme, int scope, char type, char usage);
    int readsymtab(char *lexeme, int scope, char usage); 
    %}
    
    %%
    [\t ]+                {}
    =                     {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(ASSGN)                            ;}
    print                 {int found = readsymtab(yytext,0,'L');   //line 39
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(PRINT)                            ;}
    input                 {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(INPUT)                            ;}
    continue              {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(CONTINUE)                         ;}
    return                {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(RETURN)                           ;}
    if                    {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(IF)                               ;}
    then                  {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(THEN)                             ;}
    endif                 {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(ENDIF)                            ;}
    else                  {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(ELSE)                             ;}
    while                 {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(WHILE)                            ;}
    do                    {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(DO)                               ;}
    enddo                 {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(ENDDO)                            ;}
    end                   {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(END);
                           exit(0);                                 ;}
    funct                 {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(FUNCT)                            ;}
    [0-9]+                {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'I','L');
                           };
                           return(FUNCT)                            ;}
    [a-zA-Z]+             {int found = readsymtab(yytext,0,'I');
                           if(found == -1)
                           {
                            create(yytext,0,'S','I');
                           };
                           return(IDENTIFIER)                       ;}
    \"[^\"\n]+|[\\n]+\"   {int found = readsymtab(yytext,0,'L');  //line130
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(TEXT)                             ;}
    .                     {return(yytext[0])                        ;}
    %%
    
    
    
    //new variable declaration
    
    int num;
    int scope;
    struct symbtab                    
    {
        char Lexeme [18];
        int Scope;
        char Type;
        char Usage;
        int Reference;
    };
    struct symbtab arr_symtab[200];                                //data structure in which the symbol table entries are stored
    
    void print_fn()                                                //function which actually prints the symbol tabel in columnar form             
    {
        int rows;
        
        printf("Row No Lexeme           Scope Type Usage Reference\n");
    
        for (rows=0; rows<=num; rows++){
            printf("%6d %-16s %-7d %-7c %-7c %-7d \n",rows, arr_symtab[rows].Lexeme,arr_symtab[rows].Scope,arr_symtab[rows].Type,arr_symtab[rows].Usage,arr_symtab[rows].Reference);
        }
    }
    
    void initialize()                                              //function which enteres the initial value into the symbol table              
    {
        num = -1;
        int scope = 0;
        char lexeme[18]= "FRED";
        char type = 'I';
        char usage = 'L';
        create(lexeme,scope,type,usage);   
    }
    
    void create(char *lexeme, int scope, char type, char usage)    //function which creates a new entry in the symbol table                                                                     
    {
        
        int reference;
        if(type=='I' && usage =='L')
             reference = atoi(lexeme);
        else
             reference = -1;
    
        num = num+1;
        strcpy(arr_symtab[num].Lexeme, lexeme); 
        arr_symtab[num].Scope = scope;
        arr_symtab[num].Type = type;
        arr_symtab[num].Usage = usage;
        arr_symtab[num].Reference = reference;
        
    }
    
    int readsymtab(char *lexeme, int scope, char usage)                 //function which checks if the entry is already in the table or not and the takes the required action                                                              
    {
        for(int i=num; i>=0; i--){
            int comp = strcmp(arr_symtab[i].Lexeme, lexeme);
           if(comp==0 && arr_symtab[i].Scope==scope && arr_symtab[i].Usage==usage)
           {
               return i;
           }
           else
           {
               return -1;
           }
        }
    }
    
    int main()
    {
        //other lines
        printf("\n COURSE: CSCI50200 NAME: Aryan Banyal NN: 01 Assignment #: 04 \n");
        initialize();
        yylex();
        print_fn();
        printf("End of test.\n");
        return 0;
    }
    
    int yywrap ()
    {
        return 1;
    }

以下是打印“aryan banyal”的输出，

     COURSE: CSCI50200 NAME: Aryan Banyal NN: 01 Assignment #: 04 
    --(end of buffer or a NUL)
    --accepting rule at line 39 ("print")
    Row No Lexeme           Scope Type Usage Reference
         0 FRED             0       I       L       0       
         1 print            0       S       L       -1      
    End of test.

正如您所看到的，它甚至没有进入“aryan banyal”部分，只是执行打印操作并退出...... 以下是“aryan banyal”的输出，

     COURSE: CSCI50200 NAME: Aryan Banyal NN: 01 Assignment #: 04 
    --(end of buffer or a NUL)
    --accepting rule at line 130 (""aryan banyal")
    Row No Lexeme           Scope Type Usage Reference
         0 FRED             0       I       L       0       
         1 "aryan banyal    0       S       L       -1      
    End of test.

第一行应该是 aryan banyal，但由于某种原因前面有一个“。

原文

    %{
    #define  FUNCT      300
    #define  IDENTIFIER 301
    #define  ASSGN      302
    #define  INTEGER    303
    #define  PRINT      304
    #define  TEXT       305
    #define  INPUT      306
    #define  CONTINUE   307
    #define  RETURN     308
    #define  IF         309
    #define  THEN       310
    #define  ENDIF      311
    #define  ELSE       312
    #define  WHILE      313
    #define  DO         314
    #define  ENDDO      315
    #define  END        316
    
    #include<stdio.h>
    #include<string.h>
    #include<stdlib.h>
    
    #define MAX_SYM 200
    int found;
    void initialize();   
    void create(char *lexeme, int scope, char type, char usage);
    int readsymtab(char *lexeme, int scope, char usage); 
    %}
    
    %%
    [\t ]+                {}
    =                     {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(ASSGN)                            ;}
    print                 {int found = readsymtab(yytext,0,'L');   //line 39
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(PRINT)                            ;}
    input                 {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(INPUT)                            ;}
    continue              {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(CONTINUE)                         ;}
    return                {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(RETURN)                           ;}
    if                    {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(IF)                               ;}
    then                  {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(THEN)                             ;}
    endif                 {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(ENDIF)                            ;}
    else                  {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(ELSE)                             ;}
    while                 {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(WHILE)                            ;}
    do                    {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(DO)                               ;}
    enddo                 {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(ENDDO)                            ;}
    end                   {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(END);
                           exit(0);                                 ;}
    funct                 {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(FUNCT)                            ;}
    [0-9]+                {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'I','L');
                           };
                           return(FUNCT)                            ;}
    [a-zA-Z]+             {int found = readsymtab(yytext,0,'I');
                           if(found == -1)
                           {
                            create(yytext,0,'S','I');
                           };
                           return(IDENTIFIER)                       ;}
    \"[^\"\n]+|[\\n]+\"   {int found = readsymtab(yytext,0,'L');  //line130
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(TEXT)                             ;}
    .                     {return(yytext[0])                        ;}
    %%
    
    
    
    //new variable declaration
    
    int num;
    int scope;
    struct symbtab                    
    {
        char Lexeme [18];
        int Scope;
        char Type;
        char Usage;
        int Reference;
    };
    struct symbtab arr_symtab[200];                                //data structure in which the symbol table entries are stored
    
    void print_fn()                                                //function which actually prints the symbol tabel in columnar form             
    {
        int rows;
        
        printf("Row No Lexeme           Scope Type Usage Reference\n");
    
        for (rows=0; rows<=num; rows++){
            printf("%6d %-16s %-7d %-7c %-7c %-7d \n",rows, arr_symtab[rows].Lexeme,arr_symtab[rows].Scope,arr_symtab[rows].Type,arr_symtab[rows].Usage,arr_symtab[rows].Reference);
        }
    }
    
    void initialize()                                              //function which enteres the initial value into the symbol table              
    {
        num = -1;
        int scope = 0;
        char lexeme[18]= "FRED";
        char type = 'I';
        char usage = 'L';
        create(lexeme,scope,type,usage);   
    }
    
    void create(char *lexeme, int scope, char type, char usage)    //function which creates a new entry in the symbol table                                                                     
    {
        
        int reference;
        if(type=='I' && usage =='L')
             reference = atoi(lexeme);
        else
             reference = -1;
    
        num = num+1;
        strcpy(arr_symtab[num].Lexeme, lexeme); 
        arr_symtab[num].Scope = scope;
        arr_symtab[num].Type = type;
        arr_symtab[num].Usage = usage;
        arr_symtab[num].Reference = reference;
        
    }
    
    int readsymtab(char *lexeme, int scope, char usage)                 //function which checks if the entry is already in the table or not and the takes the required action                                                              
    {
        for(int i=num; i>=0; i--){
            int comp = strcmp(arr_symtab[i].Lexeme, lexeme);
           if(comp==0 && arr_symtab[i].Scope==scope && arr_symtab[i].Usage==usage)
           {
               return i;
           }
           else
           {
               return -1;
           }
        }
    }
    
    int main()
    {
        //other lines
        printf("\n COURSE: CSCI50200 NAME: Aryan Banyal NN: 01 Assignment #: 04 \n");
        initialize();
        yylex();
        print_fn();
        printf("End of test.\n");
        return 0;
    }
    
    int yywrap ()
    {
        return 1;
    }

Following is the output for print"aryan banyal"

     COURSE: CSCI50200 NAME: Aryan Banyal NN: 01 Assignment #: 04 
    --(end of buffer or a NUL)
    --accepting rule at line 39 ("print")
    Row No Lexeme           Scope Type Usage Reference
         0 FRED             0       I       L       0       
         1 print            0       S       L       -1      
    End of test.

As you can see it's not even going to the "aryan banyal" part just does the print thing and exits...
Following is the output for "aryan banyal"

     COURSE: CSCI50200 NAME: Aryan Banyal NN: 01 Assignment #: 04 
    --(end of buffer or a NUL)
    --accepting rule at line 130 (""aryan banyal")
    Row No Lexeme           Scope Type Usage Reference
         0 FRED             0       I       L       0       
         1 "aryan banyal    0       S       L       -1      
    End of test.

It shoud be aryan banyal there on Row No 1 but there a " before for some reason.

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

梦亿 2025-01-24 02:36:26

你有（至少）三个（有些）不相关的问题。

使用词法扫描器

您的代码在读取单个标记后停止，因为您只调用 yylex() 一次（并忽略它返回的内容）。 yylex() 每次调用它时都会返回一个标记；如果要扫描整个文件，则需要循环调用。当遇到输入结束时会返回0。

理解模式

模式 \"[^\"\n]+|[\\n]+\" 中间有一个 |；该运算符匹配以下任意一个因此，您正在匹配 \"[^\"\n]+ 或 [\\n]+\"。第一个匹配一个双引号，后跟任意数量的字符（但至少一个），不能是引号或换行符。因此，匹配 "aryan banyal ，不带结束引号，但包含左引号。替代方案的后半部分将匹配任意数量的字符（同样，至少一个），所有这些字符要么是反斜杠，要么是字母n，然后是一个双引号

（我不明白这种模式背后的想法，而且几乎可以肯定这不是您想要的。如果您调用了yylex 在匹配 "aryan 后再次banyal，结束引号不会被匹配，因为它将是紧邻的下一个字符，并且该模式坚持它前面至少有一个反斜杠或n< （也许您希望将其作为换行符，但也没有其中之一。）

我认为您可能想匹配整个带引号的字符串，然后仅保留引号之间的部分（如果有）。正确地编写了模式，这就是它应该有的匹配，然后您需要删除双引号。我将把写出正确的模式作为练习。您可能想阅读 Flex 手册中Flex 模式的简短描述；您的课堂笔记中可能还包含一些信息。

仅选择匹配的一部分

删除标记开头的引号很容易。只需要向 yytext 添加 1 即可。要去掉末尾的那个，您需要用 \0 覆盖它，从而提前一个字符终止字符串。这很容易做到，因为 Flex 在变量 yyleng 中为您提供了匹配的长度。因此，您可以设置 yytext[yyleng - 1] = '\0'，然后使用 yytext + 1 调用符号表函数。

如果上面的段落没有意义，您应该查看有关 C 中字符串处理的任何介绍性文本。请记住，在 C 中，字符串只不过是一个以 0 结尾的单个字符（小整数）数组。这使得某些事情变得非常简单。做起来很容易，而其他事情则有点痛苦（但绝不神秘）。

You have (at least) three (somewhat) unrelated problems.

Using the lexical scanner

Your code stops after reading a single token because you only call yylex() once (and ignore what it returns). yylex() returns a single token every time you call it; if you want to scan the entire file, you need to call it in a loop. It will return 0 when it encounters the end of input.

Understanding patterns

The pattern \"[^\"\n]+|[\\n]+\" has an | in the middle; that operator matches either of the patterns which surround it. So you are matching \"[^\"\n]+ or [\\n]+\". The first one matches a single double quote, followed by any number of characters (but at least one), which cannot be a quote or a new line. So that matches "aryan banyal without the closing quote but including the open quote. The second half of the alternative would match any number of characters (again, at least one) all of which are either a backslash or the letter n, and then a single double quote.

(I don't understand the thinking behind this pattern, and it is almost certainly not what you intended. Had you called yylex again after the match of "aryan banyal, the closing quote would not have been matched, because it would be the immediate next character, and the pattern insists that it be preceded by at least one backslash or n. (Maybe you intended that to be a newline, but there is not one of those either.)

I think you probably wanted to match the entire quoted string, and then to keep only the part between the quotes. If you had written the pattern correctly, that's what it would have matched, and then you would need to remove the double quotes. I'll leave writing the correct pattern as an exercise. You might want to read the short description of Flex patterns in the Flex manual; you probably also have some information in your class notes.

Selecting just a part of the match

It's easy to remove the quote at the beginning of the token. All that requires is adding one to yytext. To get rid of the one at the end, you need to overwrite it with a \0, thereby terminating the string one character earlier. That's easy to do because Flex provides you with the length of the match in the variable yyleng. So you could set yytext[yyleng - 1] = '\0' and then call your symbol table function with yytext + 1.

If the above paragraph did not make sense, you should review any introductory text on string processing in C. Remember that in C, a string is nothing but an array of single characters (small integers) terminated with a 0. That's makes some things very easy to do, and other things a bit painful (but never mysterious).

回复收藏 0 原文

~没有更多了~