C编译器的语义阶段

发布于 2024-11-02 02:48:32 字数 18146 浏览 2 评论 0原文

如果在示例 c 程序中写入 1=a,它不会将其检测为错误。我该如何解决这个问题?另外我如何处理变量的全局和局部范围。谢谢,如果有人能解决它

clexer.lex 源代码

D           [0-9]
L           [a-zA-Z_]
H           [a-fA-F0-9]
E           [Ee][+-]?{D}+
FS          (f|F|l|L)
IS          (u|U|l|L)*

%{
#include <stdio.h>
#include "y.tab.h"
int cnt=1;
int line=1;
char tempid[100];
%}

%%
"/*"            {comment();}

"auto"          { cnt+=yyleng;ECHO; return(AUTO); }
"break"         { cnt+=yyleng;ECHO; return(BREAK); }
"case"          { cnt+=yyleng;ECHO; return(CASE); }
"char"          { cnt+=yyleng;ECHO; return(CHAR); }
"const"         { cnt+=yyleng;ECHO; return(CONST); }
"continue"      { cnt+=yyleng;ECHO; return(CONTINUE); }
"default"       { cnt+=yyleng;ECHO; return(DEFAULT); }
"do"            { cnt+=yyleng;ECHO; return(DO); }
"double"        { cnt+=yyleng;ECHO; return(DOUBLE); }
"else"          { cnt+=yyleng;ECHO; return(ELSE); }
"enum"          { cnt+=yyleng;ECHO; return(ENUM); }
"extern"        { cnt+=yyleng;ECHO; return(EXTERN); }
"float"         { cnt+=yyleng;ECHO; return(FLOAT); }
"for"           { cnt+=yyleng;ECHO; return(FOR); }
"goto"          { cnt+=yyleng;ECHO; return(GOTO); }
"if"            { cnt+=yyleng;ECHO; return(IF); }
"int"           { cnt+=yyleng;ECHO; return(INT); }
"long"          { cnt+=yyleng;ECHO; return(LONG); }
"register"      { cnt+=yyleng;ECHO; return(REGISTER); }
"return"        { cnt+=yyleng;ECHO; return(RETURN); }
"short"         { cnt+=yyleng;ECHO; return(SHORT); }
"signed"        { cnt+=yyleng;ECHO; return(SIGNED); }
"sizeof"        { cnt+=yyleng;ECHO; return(SIZEOF); }
"static"        { cnt+=yyleng;ECHO; return(STATIC); }
"struct"        { cnt+=yyleng;ECHO; return(STRUCT); }
"switch"        { cnt+=yyleng;ECHO; return(SWITCH); }
"typedef"       { cnt+=yyleng;ECHO; return(TYPEDEF); }
"union"         { cnt+=yyleng;ECHO; return(UNION); }
"unsigned"      { cnt+=yyleng;ECHO; return(UNSIGNED); }
"void"          { cnt+=yyleng;ECHO; return(VOID); }
"volatile"      { cnt+=yyleng;ECHO; return(VOLATILE); }
"while"         { cnt+=yyleng;ECHO; return(WHILE); }
(['])+({L}|{D})+(['])           { cnt+=yyleng;ECHO; return(SINGLE); }
{L}({L}|{D})*       { cnt+=yyleng;ECHO; strcpy(tempid,yytext);return(IDENTIFIER); }

0[xX]{H}+{IS}?      { cnt+=yyleng;ECHO; return(CONSTANT); }
0{D}+{IS}?      { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}+{IS}?       { cnt+=yyleng;ECHO; return(CONSTANT); }
L?'(\\.|[^\\'])+'   { cnt+=yyleng;ECHO; return(CONSTANT); }

{D}+{E}{FS}?        { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}*"."{D}+({E})?{FS}?  { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}+"."{D}*({E})?{FS}?  { cnt+=yyleng;ECHO; return(CONSTANT); }

L?\"(\\.|[^\\"])*\" { cnt+=yyleng;ECHO; return(STRING_LITERAL); }

"..."           { cnt+=yyleng;ECHO; return(ELLIPSIS); }
">>="           { cnt+=yyleng;ECHO; return(RIGHT_ASSIGN); }
"<<="           { cnt+=yyleng;ECHO; return(LEFT_ASSIGN); }
"+="            { cnt+=yyleng;ECHO; return(ADD_ASSIGN); }
"-="            { cnt+=yyleng;ECHO; return(SUB_ASSIGN); }
"*="            { cnt+=yyleng;ECHO; return(MUL_ASSIGN); }
"/="            { cnt+=yyleng;ECHO; return(DIV_ASSIGN); }
"%="            { cnt+=yyleng;ECHO; return(MOD_ASSIGN); }
"&="            { cnt+=yyleng;ECHO; return(AND_ASSIGN); }
"^="            { cnt+=yyleng;ECHO; return(XOR_ASSIGN); }
"|="            { cnt+=yyleng;ECHO; return(OR_ASSIGN); }
">>"            { cnt+=yyleng;ECHO; return(RIGHT_OP); }
"<<"            { cnt+=yyleng;ECHO; return(LEFT_OP); }
"++"            { cnt+=yyleng;ECHO; return(INC_OP); }
"--"            { cnt+=yyleng;ECHO; return(DEC_OP); }
"->"            { cnt+=yyleng;ECHO; return(PTR_OP); }
"&&"            { cnt+=yyleng;ECHO; return(AND_OP); }
"||"            { cnt+=yyleng;ECHO; return(OR_OP); }
"<="            { cnt+=yyleng;ECHO; return(LE_OP); }
">="            { cnt+=yyleng;ECHO; return(GE_OP); }
"=="            { cnt+=yyleng;ECHO; return(EQ_OP); }
"!="            { cnt+=yyleng;ECHO; return(NE_OP); }
";"         { cnt+=yyleng;ECHO; return(';'); }
("{"|"<%")      { cnt+=yyleng;ECHO; return('{'); }
("}"|"%>")      { cnt+=yyleng;ECHO; return('}'); }
","         { cnt+=yyleng;ECHO; return(','); }
":"         { cnt+=yyleng;ECHO; return(':'); }
"="         { cnt+=yyleng;ECHO; return('='); }
"("         { cnt+=yyleng;ECHO; return('('); }
")"         { cnt+=yyleng;ECHO; return(')'); }
("["|"<:")      { cnt+=yyleng;ECHO; return('['); }
("]"|":>")      { cnt+=yyleng;ECHO; return(']'); }
"."         { cnt+=yyleng;ECHO; return('.'); }
"&"         { cnt+=yyleng;ECHO; return('&'); }
"!"         { cnt+=yyleng;ECHO; return('!'); }
"~"         { cnt+=yyleng;ECHO; return('~'); }
"-"         { cnt+=yyleng;ECHO; return('-'); }
"+"         { cnt+=yyleng;ECHO; return('+'); }
"*"         { cnt+=yyleng;ECHO; return('*'); }
"/"         { cnt+=yyleng;ECHO; return('/'); }
"%"         { cnt+=yyleng;ECHO; return('%'); }
"<"         { cnt+=yyleng;ECHO; return('<'); }
">"         { cnt+=yyleng;ECHO; return('>'); }
"^"         { cnt+=yyleng;ECHO; return('^'); }
"|"         { cnt+=yyleng;ECHO; return('|'); }
"?"         { cnt+=yyleng;ECHO; return('?'); }

[ ]         {cnt+=yyleng;ECHO;}
[\t\v\f]        { cnt+=yyleng; }
[\n]            {line++;cnt=1;}
.           { /* ignore bad characters */ }

%%
yywrap()
{
    return(1);
}
comment()
{
    char c, c1;
loop:
    while ((c = input()) != '*' && c != 0)
    {
        if(c=='\n') {line++;cnt=1;} 
        else    {cnt++;}
    }
        //putchar(c); PUTCHAR only if comments need to be shown! 
    if ((c1 = input()) != '/' && c1 != 0)
    {
        unput(c1);
        goto loop;
    }
}

cparser.yacc 源代码

%{
#include <stdio.h>
#include <string.h>
#include "symbol_table.h"
extern FILE *yyin;
extern FILE *yyout;
extern int column;
extern int line;
extern int cnt;
extern char *yytext,tempid[100];
int temp,err,err1=0;

install()
{ 
    symrec *s;
    s = getsym (tempid);
    if (s == 0)
    s = putsym (tempid,temp);
    else 
    {
        printf(" VOID=1 ");
     printf(" CHAR=2 ");
     printf(" INT=3 ");
     printf(" FLOAT=4 ");
     printf(" DOUBLE=4 ");
        printf( "\n\nThere is a Semantic error at Pos : %d : %d : %s is already defined as %d\n\n",line,cnt,s->name,s->type );
        exit(0);    
    }
    err1=1;
}
int context_check()
{ 
    symrec *s;
    s = getsym(tempid); 
    if (s == 0 )
    {printf( "\n\nThere is a Semantic error at Pos : %d : %d : %s is an undeclared identifier\n\n",line,cnt,tempid);exit(0);return 0;}
    else
    return(s->type);
    err1=1;

}
type_err(int t1,int t2)
{
    if(t1&&t2)
    {
     printf(" VOID=1 ");
     printf(" CHAR=2 ");
     printf(" INT=3 ");
     printf(" FLOAT=4 ");
     printf(" DOUBLE=4 ");  
    printf( "\n\nThere is a Semantic error at Pos : %d : %d : Type mismatch for %s between %d and %d \n\n",line,cnt,tempid,t1,t2);
    err1=1;
    exit(0);    
    }   
}

%}



%token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF
%token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
%token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN
%token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN
%token XOR_ASSIGN OR_ASSIGN TYPE_NAME SINGLE

%token TYPEDEF EXTERN STATIC AUTO REGISTER
%token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID
%token STRUCT UNION ENUM ELLIPSIS

%token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN
%nonassoc LOWER_THAN_ELSE
%nonassoc ELSE

%start translation_unit
%%

primary_expression
    : IDENTIFIER    {$$=context_check();}
    | CONSTANT
    | STRING_LITERAL
    | '(' expression ')' {$$= $2;}
    ;

postfix_expression
    : primary_expression    {$$=$1;}
    | postfix_expression '[' expression ']'
    | postfix_expression '(' ')'
    | postfix_expression '(' argument_expression_list ')'
    | postfix_expression '.' IDENTIFIER 
    | postfix_expression PTR_OP IDENTIFIER
    | postfix_expression INC_OP
    | postfix_expression DEC_OP
    ;

argument_expression_list
    : assignment_expression
    | argument_expression_list ',' assignment_expression
    ;

unary_expression
    : postfix_expression    {$$=$1;}
    | INC_OP unary_expression
    | DEC_OP unary_expression
    | unary_operator cast_expression
    | SIZEOF unary_expression
    | SIZEOF '(' type_name ')'
    ;

unary_operator
    : '&'
    | '*'
    | '+'
    | '-'
    | '~'
    | '!'
    ;

cast_expression
    : unary_expression  {$$=$1;}
    | '(' type_name ')' cast_expression
    ;

multiplicative_expression
    : cast_expression   {$$=$1;}
    | multiplicative_expression '*' cast_expression
    | multiplicative_expression '/' cast_expression
    | multiplicative_expression '%' cast_expression
    ;

additive_expression
    : multiplicative_expression {$$=$1;}
    | additive_expression '+' multiplicative_expression
    | additive_expression '-' multiplicative_expression
    ;

shift_expression
    : additive_expression   {$$=$1;}
    | shift_expression LEFT_OP additive_expression
    | shift_expression RIGHT_OP additive_expression
    ;

relational_expression
    : shift_expression  {$$=$1;}
    | relational_expression '<' shift_expression
    | relational_expression '>' shift_expression
    | relational_expression LE_OP shift_expression
    | relational_expression GE_OP shift_expression
    ;

equality_expression
    : relational_expression {$$=$1;}
    | equality_expression EQ_OP relational_expression
    | equality_expression NE_OP relational_expression
    ;

and_expression
    : equality_expression   {$$=$1;}
    | and_expression '&' equality_expression
    ;

exclusive_or_expression
    : and_expression    {$$=$1;}
    | exclusive_or_expression '^' and_expression
    ;

inclusive_or_expression 
    : exclusive_or_expression   {$$=$1;}
    | inclusive_or_expression '|' exclusive_or_expression
    ;

logical_and_expression
    : inclusive_or_expression   {$$=$1;}
    | logical_and_expression AND_OP inclusive_or_expression
    ;

logical_or_expression
    : logical_and_expression    {$$=$1;}
    | logical_or_expression OR_OP logical_and_expression
    ;

conditional_expression
    : logical_or_expression {$$=$1;}
    | logical_or_expression '?' expression ':' conditional_expression
    ;

assignment_expression
    : conditional_expression    {$$=$1;}
    | unary_expression assignment_operator assignment_expression    {if($1!=$3){type_err($1,$3);}}
    ;

assignment_operator
    : '='
    | MUL_ASSIGN
    | DIV_ASSIGN
    | MOD_ASSIGN
    | ADD_ASSIGN
    | SUB_ASSIGN
    | LEFT_ASSIGN
    | RIGHT_ASSIGN
    | AND_ASSIGN
    | XOR_ASSIGN
    | OR_ASSIGN
    ;

expression
    : assignment_expression {$$=$1;}
    | expression ',' assignment_expression
    ;

constant_expression
    : conditional_expression
    ;

declaration
    : declaration_specifiers ';'
    | declaration_specifiers init_declarator_list ';'
    ;

declaration_specifiers
    : storage_class_specifier
    | storage_class_specifier declaration_specifiers
    | type_specifier
    | type_specifier declaration_specifiers
    | type_qualifier
    | type_qualifier declaration_specifiers
    ;

init_declarator_list
    : init_declarator
    | init_declarator_list ',' init_declarator
    ;

init_declarator
    : declarator
    | declarator '=' initializer
    ;

storage_class_specifier
    : TYPEDEF
    | EXTERN
    | STATIC
    | AUTO
    | REGISTER
    ;

type_specifier
    : VOID  {temp=1;}
    | CHAR  {temp=2;}
    | SHORT {temp=3;}
    | INT   {temp=3;}
    | LONG  {temp=3;}
    | FLOAT {temp=4;}
    | DOUBLE    {temp=4;}
    | SIGNED
    | UNSIGNED
    | struct_or_union_specifier
    | enum_specifier
    | TYPE_NAME
    ;

struct_or_union_specifier
    : struct_or_union IDENTIFIER '{' struct_declaration_list '}'    {install();}
    | struct_or_union '{' struct_declaration_list '}'
    | struct_or_union IDENTIFIER    {install();}
    ;

struct_or_union
    : STRUCT
    | UNION
    ;

struct_declaration_list
    : struct_declaration
    | struct_declaration_list struct_declaration
    ;

struct_declaration
    : specifier_qualifier_list struct_declarator_list ';'
    ;

specifier_qualifier_list
    : type_specifier specifier_qualifier_list
    | type_specifier
    | type_qualifier specifier_qualifier_list
    | type_qualifier
    ;

struct_declarator_list
    : struct_declarator
    | struct_declarator_list ',' struct_declarator
    ;

struct_declarator
    : declarator
    | ':' constant_expression
    | declarator ':' constant_expression
    ;

enum_specifier
    : ENUM '{' enumerator_list '}'
    | ENUM IDENTIFIER '{' enumerator_list '}'
    | ENUM IDENTIFIER
    ;

enumerator_list
    : enumerator
    | enumerator_list ',' enumerator
    ;

enumerator
    : IDENTIFIER    {context_check();}
    | IDENTIFIER '=' constant_expression    //{context_check();}
    ;

type_qualifier
    : CONST
    | VOLATILE
    ;

declarator
    : pointer direct_declarator
    | direct_declarator
    ;

direct_declarator
    : IDENTIFIER    {install();}
    | '(' declarator ')'
    | direct_declarator '[' constant_expression ']'
    | direct_declarator '[' ']'
    | direct_declarator '(' parameter_type_list ')'
    | direct_declarator '(' identifier_list ')'
    | direct_declarator '(' ')'
    ;

pointer
    : '*'
    | '*' type_qualifier_list
    | '*' pointer
    | '*' type_qualifier_list pointer
    ;

type_qualifier_list
    : type_qualifier
    | type_qualifier_list type_qualifier
    ;


parameter_type_list
    : parameter_list
    | parameter_list ',' ELLIPSIS
    ;

parameter_list
    : parameter_declaration
    | parameter_list ',' parameter_declaration
    ;

parameter_declaration
    : declaration_specifiers declarator
    | declaration_specifiers abstract_declarator
    | declaration_specifiers
    ;

identifier_list
    : IDENTIFIER    {install();}
    | identifier_list ',' IDENTIFIER    {install();}
    ;

type_name
    : specifier_qualifier_list
    | specifier_qualifier_list abstract_declarator
    ;

abstract_declarator
    : pointer
    | direct_abstract_declarator
    | pointer direct_abstract_declarator
    ;

direct_abstract_declarator
    : '(' abstract_declarator ')'
    | '[' ']'
    | '[' constant_expression ']'
    | direct_abstract_declarator '[' ']'
    | direct_abstract_declarator '[' constant_expression ']'
    | '(' ')'
    | '(' parameter_type_list ')'
    | direct_abstract_declarator '(' ')'
    | direct_abstract_declarator '(' parameter_type_list ')'
    ;

initializer
    : assignment_expression {$$=$1;}
    | '{' initializer_list '}'
    | '{' initializer_list ',' '}'
    ;

initializer_list
    : initializer
    | initializer_list ',' initializer
    ;

statement
    : labeled_statement
    | compound_statement
    | expression_statement
    | selection_statement
    | iteration_statement
    | jump_statement
    ;

labeled_statement
    : IDENTIFIER ':' statement  //{context_check();}
    | CASE constant_expression ':' statement
    | DEFAULT ':' statement
    ;

compound_statement
    : '{' '}'
    | '{' statement_list '}'
    | '{' declaration_list '}'
    | '{' declaration_list statement_list '}'
    ;

declaration_list
    : declaration
    | declaration_list declaration
    ;

statement_list
    : statement
    | statement_list statement
    ;

expression_statement
    : ';'
    | expression ';'
    ;

selection_statement
    : IF '(' expression ')' statement  %prec LOWER_THAN_ELSE ;

    | IF '(' expression ')' statement ELSE statement
    | SWITCH '(' expression ')' statement
    ;

iteration_statement
    : WHILE '(' expression ')' statement
    | DO statement WHILE '(' expression ')' ';'
    | FOR '(' expression_statement expression_statement ')' statement
    | FOR '(' expression_statement expression_statement expression ')' statement
    ;

jump_statement
    : GOTO IDENTIFIER ';'   //{context_check();}
    | CONTINUE ';'
    | BREAK ';'
    | RETURN ';'
    | RETURN expression ';'
    ;

translation_unit
    : external_declaration
    | translation_unit external_declaration
    ;

external_declaration
    : function_definition
    | declaration
    ;

function_definition
    : declaration_specifiers declarator declaration_list compound_statement
    | declaration_specifiers declarator compound_statement
    | declarator declaration_list compound_statement
    | declarator compound_statement
    ;
%%
yyerror(s)
char *s;
{
    fflush(stdout);err=1;
    printf("Syntax error at Pos : %d : %d\n",line,cnt);
    exit(0);
    //printf("\n%*s\n%*s\n", column, "^", column, s);
}
main(argc,argv)
int argc;
char **argv;
{

    char *fname;    
    ++argv,--argc;/*skip program name*/
    if(argc>0)
    {
        yyin=fopen(argv[0],"r");
        fname=argv[0];
        strcat(fname,"_output");
        yyout=fopen(fname,"w");
    }
    else
    {
        printf("Please give the c filename as an argument.\n");
    }
    yyparse();
    if(err==0)
    printf("No Syntax errors found!\n");
    fname=argv[0];strcat(fname,"_symbol-table");
    FILE *sym_tab=fopen(fname,"w");
    fprintf(sym_tab,"Type\tSymbol\n");
    symrec *ptr;    
    for(ptr=sym_table;ptr!=(symrec *)0;ptr=(symrec *)ptr->next)
    {
        fprintf(sym_tab,"%d\t%s\n",ptr->type,ptr->name);
    }
    fclose(sym_tab);    

}   

符号表.h 源代码

#define t_void  1
#define t_char  2
#define t_int   3
#define t_float 4
struct symrec
{
    char *name;
    int type;
    struct symrec *next;
};
typedef struct symrec symrec;
symrec *sym_table = (symrec *)0;
symrec *putsym();
symrec *getsym();
symrec *putsym(char *sym_name,int sym_type)
{
    symrec *ptr;
    ptr=(symrec *)malloc(sizeof(symrec));
    ptr->name=(char *)malloc(strlen(sym_name)+1);
    strcpy(ptr->name,sym_name);
    ptr->type=sym_type;
    ptr->next=(struct symrec *)sym_table;
    sym_table=ptr;
    return ptr;
}
symrec *getsym(char *sym_name)
{
    symrec *ptr;
    for(ptr=sym_table;ptr!=(symrec *)0;ptr=(symrec *)ptr->next)
    if(strcmp(ptr->name,sym_name)==0)
    return ptr;
    return 0;
}

if write 1=a in the sample c program, it doesnt detect it as an error. How do i solve this problem? Also how do i do global and local scope of variables. Thanks if anyone can solve it

clexer.lex source code

D           [0-9]
L           [a-zA-Z_]
H           [a-fA-F0-9]
E           [Ee][+-]?{D}+
FS          (f|F|l|L)
IS          (u|U|l|L)*

%{
#include <stdio.h>
#include "y.tab.h"
int cnt=1;
int line=1;
char tempid[100];
%}

%%
"/*"            {comment();}

"auto"          { cnt+=yyleng;ECHO; return(AUTO); }
"break"         { cnt+=yyleng;ECHO; return(BREAK); }
"case"          { cnt+=yyleng;ECHO; return(CASE); }
"char"          { cnt+=yyleng;ECHO; return(CHAR); }
"const"         { cnt+=yyleng;ECHO; return(CONST); }
"continue"      { cnt+=yyleng;ECHO; return(CONTINUE); }
"default"       { cnt+=yyleng;ECHO; return(DEFAULT); }
"do"            { cnt+=yyleng;ECHO; return(DO); }
"double"        { cnt+=yyleng;ECHO; return(DOUBLE); }
"else"          { cnt+=yyleng;ECHO; return(ELSE); }
"enum"          { cnt+=yyleng;ECHO; return(ENUM); }
"extern"        { cnt+=yyleng;ECHO; return(EXTERN); }
"float"         { cnt+=yyleng;ECHO; return(FLOAT); }
"for"           { cnt+=yyleng;ECHO; return(FOR); }
"goto"          { cnt+=yyleng;ECHO; return(GOTO); }
"if"            { cnt+=yyleng;ECHO; return(IF); }
"int"           { cnt+=yyleng;ECHO; return(INT); }
"long"          { cnt+=yyleng;ECHO; return(LONG); }
"register"      { cnt+=yyleng;ECHO; return(REGISTER); }
"return"        { cnt+=yyleng;ECHO; return(RETURN); }
"short"         { cnt+=yyleng;ECHO; return(SHORT); }
"signed"        { cnt+=yyleng;ECHO; return(SIGNED); }
"sizeof"        { cnt+=yyleng;ECHO; return(SIZEOF); }
"static"        { cnt+=yyleng;ECHO; return(STATIC); }
"struct"        { cnt+=yyleng;ECHO; return(STRUCT); }
"switch"        { cnt+=yyleng;ECHO; return(SWITCH); }
"typedef"       { cnt+=yyleng;ECHO; return(TYPEDEF); }
"union"         { cnt+=yyleng;ECHO; return(UNION); }
"unsigned"      { cnt+=yyleng;ECHO; return(UNSIGNED); }
"void"          { cnt+=yyleng;ECHO; return(VOID); }
"volatile"      { cnt+=yyleng;ECHO; return(VOLATILE); }
"while"         { cnt+=yyleng;ECHO; return(WHILE); }
(['])+({L}|{D})+(['])           { cnt+=yyleng;ECHO; return(SINGLE); }
{L}({L}|{D})*       { cnt+=yyleng;ECHO; strcpy(tempid,yytext);return(IDENTIFIER); }

0[xX]{H}+{IS}?      { cnt+=yyleng;ECHO; return(CONSTANT); }
0{D}+{IS}?      { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}+{IS}?       { cnt+=yyleng;ECHO; return(CONSTANT); }
L?'(\\.|[^\\'])+'   { cnt+=yyleng;ECHO; return(CONSTANT); }

{D}+{E}{FS}?        { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}*"."{D}+({E})?{FS}?  { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}+"."{D}*({E})?{FS}?  { cnt+=yyleng;ECHO; return(CONSTANT); }

L?\"(\\.|[^\\"])*\" { cnt+=yyleng;ECHO; return(STRING_LITERAL); }

"..."           { cnt+=yyleng;ECHO; return(ELLIPSIS); }
">>="           { cnt+=yyleng;ECHO; return(RIGHT_ASSIGN); }
"<<="           { cnt+=yyleng;ECHO; return(LEFT_ASSIGN); }
"+="            { cnt+=yyleng;ECHO; return(ADD_ASSIGN); }
"-="            { cnt+=yyleng;ECHO; return(SUB_ASSIGN); }
"*="            { cnt+=yyleng;ECHO; return(MUL_ASSIGN); }
"/="            { cnt+=yyleng;ECHO; return(DIV_ASSIGN); }
"%="            { cnt+=yyleng;ECHO; return(MOD_ASSIGN); }
"&="            { cnt+=yyleng;ECHO; return(AND_ASSIGN); }
"^="            { cnt+=yyleng;ECHO; return(XOR_ASSIGN); }
"|="            { cnt+=yyleng;ECHO; return(OR_ASSIGN); }
">>"            { cnt+=yyleng;ECHO; return(RIGHT_OP); }
"<<"            { cnt+=yyleng;ECHO; return(LEFT_OP); }
"++"            { cnt+=yyleng;ECHO; return(INC_OP); }
"--"            { cnt+=yyleng;ECHO; return(DEC_OP); }
"->"            { cnt+=yyleng;ECHO; return(PTR_OP); }
"&&"            { cnt+=yyleng;ECHO; return(AND_OP); }
"||"            { cnt+=yyleng;ECHO; return(OR_OP); }
"<="            { cnt+=yyleng;ECHO; return(LE_OP); }
">="            { cnt+=yyleng;ECHO; return(GE_OP); }
"=="            { cnt+=yyleng;ECHO; return(EQ_OP); }
"!="            { cnt+=yyleng;ECHO; return(NE_OP); }
";"         { cnt+=yyleng;ECHO; return(';'); }
("{"|"<%")      { cnt+=yyleng;ECHO; return('{'); }
("}"|"%>")      { cnt+=yyleng;ECHO; return('}'); }
","         { cnt+=yyleng;ECHO; return(','); }
":"         { cnt+=yyleng;ECHO; return(':'); }
"="         { cnt+=yyleng;ECHO; return('='); }
"("         { cnt+=yyleng;ECHO; return('('); }
")"         { cnt+=yyleng;ECHO; return(')'); }
("["|"<:")      { cnt+=yyleng;ECHO; return('['); }
("]"|":>")      { cnt+=yyleng;ECHO; return(']'); }
"."         { cnt+=yyleng;ECHO; return('.'); }
"&"         { cnt+=yyleng;ECHO; return('&'); }
"!"         { cnt+=yyleng;ECHO; return('!'); }
"~"         { cnt+=yyleng;ECHO; return('~'); }
"-"         { cnt+=yyleng;ECHO; return('-'); }
"+"         { cnt+=yyleng;ECHO; return('+'); }
"*"         { cnt+=yyleng;ECHO; return('*'); }
"/"         { cnt+=yyleng;ECHO; return('/'); }
"%"         { cnt+=yyleng;ECHO; return('%'); }
"<"         { cnt+=yyleng;ECHO; return('<'); }
">"         { cnt+=yyleng;ECHO; return('>'); }
"^"         { cnt+=yyleng;ECHO; return('^'); }
"|"         { cnt+=yyleng;ECHO; return('|'); }
"?"         { cnt+=yyleng;ECHO; return('?'); }

[ ]         {cnt+=yyleng;ECHO;}
[\t\v\f]        { cnt+=yyleng; }
[\n]            {line++;cnt=1;}
.           { /* ignore bad characters */ }

%%
yywrap()
{
    return(1);
}
comment()
{
    char c, c1;
loop:
    while ((c = input()) != '*' && c != 0)
    {
        if(c=='\n') {line++;cnt=1;} 
        else    {cnt++;}
    }
        //putchar(c); PUTCHAR only if comments need to be shown! 
    if ((c1 = input()) != '/' && c1 != 0)
    {
        unput(c1);
        goto loop;
    }
}

cparser.yacc source code

%{
#include <stdio.h>
#include <string.h>
#include "symbol_table.h"
extern FILE *yyin;
extern FILE *yyout;
extern int column;
extern int line;
extern int cnt;
extern char *yytext,tempid[100];
int temp,err,err1=0;

install()
{ 
    symrec *s;
    s = getsym (tempid);
    if (s == 0)
    s = putsym (tempid,temp);
    else 
    {
        printf(" VOID=1 ");
     printf(" CHAR=2 ");
     printf(" INT=3 ");
     printf(" FLOAT=4 ");
     printf(" DOUBLE=4 ");
        printf( "\n\nThere is a Semantic error at Pos : %d : %d : %s is already defined as %d\n\n",line,cnt,s->name,s->type );
        exit(0);    
    }
    err1=1;
}
int context_check()
{ 
    symrec *s;
    s = getsym(tempid); 
    if (s == 0 )
    {printf( "\n\nThere is a Semantic error at Pos : %d : %d : %s is an undeclared identifier\n\n",line,cnt,tempid);exit(0);return 0;}
    else
    return(s->type);
    err1=1;

}
type_err(int t1,int t2)
{
    if(t1&&t2)
    {
     printf(" VOID=1 ");
     printf(" CHAR=2 ");
     printf(" INT=3 ");
     printf(" FLOAT=4 ");
     printf(" DOUBLE=4 ");  
    printf( "\n\nThere is a Semantic error at Pos : %d : %d : Type mismatch for %s between %d and %d \n\n",line,cnt,tempid,t1,t2);
    err1=1;
    exit(0);    
    }   
}

%}



%token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF
%token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
%token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN
%token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN
%token XOR_ASSIGN OR_ASSIGN TYPE_NAME SINGLE

%token TYPEDEF EXTERN STATIC AUTO REGISTER
%token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID
%token STRUCT UNION ENUM ELLIPSIS

%token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN
%nonassoc LOWER_THAN_ELSE
%nonassoc ELSE

%start translation_unit
%%

primary_expression
    : IDENTIFIER    {$=context_check();}
    | CONSTANT
    | STRING_LITERAL
    | '(' expression ')' {$= $2;}
    ;

postfix_expression
    : primary_expression    {$=$1;}
    | postfix_expression '[' expression ']'
    | postfix_expression '(' ')'
    | postfix_expression '(' argument_expression_list ')'
    | postfix_expression '.' IDENTIFIER 
    | postfix_expression PTR_OP IDENTIFIER
    | postfix_expression INC_OP
    | postfix_expression DEC_OP
    ;

argument_expression_list
    : assignment_expression
    | argument_expression_list ',' assignment_expression
    ;

unary_expression
    : postfix_expression    {$=$1;}
    | INC_OP unary_expression
    | DEC_OP unary_expression
    | unary_operator cast_expression
    | SIZEOF unary_expression
    | SIZEOF '(' type_name ')'
    ;

unary_operator
    : '&'
    | '*'
    | '+'
    | '-'
    | '~'
    | '!'
    ;

cast_expression
    : unary_expression  {$=$1;}
    | '(' type_name ')' cast_expression
    ;

multiplicative_expression
    : cast_expression   {$=$1;}
    | multiplicative_expression '*' cast_expression
    | multiplicative_expression '/' cast_expression
    | multiplicative_expression '%' cast_expression
    ;

additive_expression
    : multiplicative_expression {$=$1;}
    | additive_expression '+' multiplicative_expression
    | additive_expression '-' multiplicative_expression
    ;

shift_expression
    : additive_expression   {$=$1;}
    | shift_expression LEFT_OP additive_expression
    | shift_expression RIGHT_OP additive_expression
    ;

relational_expression
    : shift_expression  {$=$1;}
    | relational_expression '<' shift_expression
    | relational_expression '>' shift_expression
    | relational_expression LE_OP shift_expression
    | relational_expression GE_OP shift_expression
    ;

equality_expression
    : relational_expression {$=$1;}
    | equality_expression EQ_OP relational_expression
    | equality_expression NE_OP relational_expression
    ;

and_expression
    : equality_expression   {$=$1;}
    | and_expression '&' equality_expression
    ;

exclusive_or_expression
    : and_expression    {$=$1;}
    | exclusive_or_expression '^' and_expression
    ;

inclusive_or_expression 
    : exclusive_or_expression   {$=$1;}
    | inclusive_or_expression '|' exclusive_or_expression
    ;

logical_and_expression
    : inclusive_or_expression   {$=$1;}
    | logical_and_expression AND_OP inclusive_or_expression
    ;

logical_or_expression
    : logical_and_expression    {$=$1;}
    | logical_or_expression OR_OP logical_and_expression
    ;

conditional_expression
    : logical_or_expression {$=$1;}
    | logical_or_expression '?' expression ':' conditional_expression
    ;

assignment_expression
    : conditional_expression    {$=$1;}
    | unary_expression assignment_operator assignment_expression    {if($1!=$3){type_err($1,$3);}}
    ;

assignment_operator
    : '='
    | MUL_ASSIGN
    | DIV_ASSIGN
    | MOD_ASSIGN
    | ADD_ASSIGN
    | SUB_ASSIGN
    | LEFT_ASSIGN
    | RIGHT_ASSIGN
    | AND_ASSIGN
    | XOR_ASSIGN
    | OR_ASSIGN
    ;

expression
    : assignment_expression {$=$1;}
    | expression ',' assignment_expression
    ;

constant_expression
    : conditional_expression
    ;

declaration
    : declaration_specifiers ';'
    | declaration_specifiers init_declarator_list ';'
    ;

declaration_specifiers
    : storage_class_specifier
    | storage_class_specifier declaration_specifiers
    | type_specifier
    | type_specifier declaration_specifiers
    | type_qualifier
    | type_qualifier declaration_specifiers
    ;

init_declarator_list
    : init_declarator
    | init_declarator_list ',' init_declarator
    ;

init_declarator
    : declarator
    | declarator '=' initializer
    ;

storage_class_specifier
    : TYPEDEF
    | EXTERN
    | STATIC
    | AUTO
    | REGISTER
    ;

type_specifier
    : VOID  {temp=1;}
    | CHAR  {temp=2;}
    | SHORT {temp=3;}
    | INT   {temp=3;}
    | LONG  {temp=3;}
    | FLOAT {temp=4;}
    | DOUBLE    {temp=4;}
    | SIGNED
    | UNSIGNED
    | struct_or_union_specifier
    | enum_specifier
    | TYPE_NAME
    ;

struct_or_union_specifier
    : struct_or_union IDENTIFIER '{' struct_declaration_list '}'    {install();}
    | struct_or_union '{' struct_declaration_list '}'
    | struct_or_union IDENTIFIER    {install();}
    ;

struct_or_union
    : STRUCT
    | UNION
    ;

struct_declaration_list
    : struct_declaration
    | struct_declaration_list struct_declaration
    ;

struct_declaration
    : specifier_qualifier_list struct_declarator_list ';'
    ;

specifier_qualifier_list
    : type_specifier specifier_qualifier_list
    | type_specifier
    | type_qualifier specifier_qualifier_list
    | type_qualifier
    ;

struct_declarator_list
    : struct_declarator
    | struct_declarator_list ',' struct_declarator
    ;

struct_declarator
    : declarator
    | ':' constant_expression
    | declarator ':' constant_expression
    ;

enum_specifier
    : ENUM '{' enumerator_list '}'
    | ENUM IDENTIFIER '{' enumerator_list '}'
    | ENUM IDENTIFIER
    ;

enumerator_list
    : enumerator
    | enumerator_list ',' enumerator
    ;

enumerator
    : IDENTIFIER    {context_check();}
    | IDENTIFIER '=' constant_expression    //{context_check();}
    ;

type_qualifier
    : CONST
    | VOLATILE
    ;

declarator
    : pointer direct_declarator
    | direct_declarator
    ;

direct_declarator
    : IDENTIFIER    {install();}
    | '(' declarator ')'
    | direct_declarator '[' constant_expression ']'
    | direct_declarator '[' ']'
    | direct_declarator '(' parameter_type_list ')'
    | direct_declarator '(' identifier_list ')'
    | direct_declarator '(' ')'
    ;

pointer
    : '*'
    | '*' type_qualifier_list
    | '*' pointer
    | '*' type_qualifier_list pointer
    ;

type_qualifier_list
    : type_qualifier
    | type_qualifier_list type_qualifier
    ;


parameter_type_list
    : parameter_list
    | parameter_list ',' ELLIPSIS
    ;

parameter_list
    : parameter_declaration
    | parameter_list ',' parameter_declaration
    ;

parameter_declaration
    : declaration_specifiers declarator
    | declaration_specifiers abstract_declarator
    | declaration_specifiers
    ;

identifier_list
    : IDENTIFIER    {install();}
    | identifier_list ',' IDENTIFIER    {install();}
    ;

type_name
    : specifier_qualifier_list
    | specifier_qualifier_list abstract_declarator
    ;

abstract_declarator
    : pointer
    | direct_abstract_declarator
    | pointer direct_abstract_declarator
    ;

direct_abstract_declarator
    : '(' abstract_declarator ')'
    | '[' ']'
    | '[' constant_expression ']'
    | direct_abstract_declarator '[' ']'
    | direct_abstract_declarator '[' constant_expression ']'
    | '(' ')'
    | '(' parameter_type_list ')'
    | direct_abstract_declarator '(' ')'
    | direct_abstract_declarator '(' parameter_type_list ')'
    ;

initializer
    : assignment_expression {$=$1;}
    | '{' initializer_list '}'
    | '{' initializer_list ',' '}'
    ;

initializer_list
    : initializer
    | initializer_list ',' initializer
    ;

statement
    : labeled_statement
    | compound_statement
    | expression_statement
    | selection_statement
    | iteration_statement
    | jump_statement
    ;

labeled_statement
    : IDENTIFIER ':' statement  //{context_check();}
    | CASE constant_expression ':' statement
    | DEFAULT ':' statement
    ;

compound_statement
    : '{' '}'
    | '{' statement_list '}'
    | '{' declaration_list '}'
    | '{' declaration_list statement_list '}'
    ;

declaration_list
    : declaration
    | declaration_list declaration
    ;

statement_list
    : statement
    | statement_list statement
    ;

expression_statement
    : ';'
    | expression ';'
    ;

selection_statement
    : IF '(' expression ')' statement  %prec LOWER_THAN_ELSE ;

    | IF '(' expression ')' statement ELSE statement
    | SWITCH '(' expression ')' statement
    ;

iteration_statement
    : WHILE '(' expression ')' statement
    | DO statement WHILE '(' expression ')' ';'
    | FOR '(' expression_statement expression_statement ')' statement
    | FOR '(' expression_statement expression_statement expression ')' statement
    ;

jump_statement
    : GOTO IDENTIFIER ';'   //{context_check();}
    | CONTINUE ';'
    | BREAK ';'
    | RETURN ';'
    | RETURN expression ';'
    ;

translation_unit
    : external_declaration
    | translation_unit external_declaration
    ;

external_declaration
    : function_definition
    | declaration
    ;

function_definition
    : declaration_specifiers declarator declaration_list compound_statement
    | declaration_specifiers declarator compound_statement
    | declarator declaration_list compound_statement
    | declarator compound_statement
    ;
%%
yyerror(s)
char *s;
{
    fflush(stdout);err=1;
    printf("Syntax error at Pos : %d : %d\n",line,cnt);
    exit(0);
    //printf("\n%*s\n%*s\n", column, "^", column, s);
}
main(argc,argv)
int argc;
char **argv;
{

    char *fname;    
    ++argv,--argc;/*skip program name*/
    if(argc>0)
    {
        yyin=fopen(argv[0],"r");
        fname=argv[0];
        strcat(fname,"_output");
        yyout=fopen(fname,"w");
    }
    else
    {
        printf("Please give the c filename as an argument.\n");
    }
    yyparse();
    if(err==0)
    printf("No Syntax errors found!\n");
    fname=argv[0];strcat(fname,"_symbol-table");
    FILE *sym_tab=fopen(fname,"w");
    fprintf(sym_tab,"Type\tSymbol\n");
    symrec *ptr;    
    for(ptr=sym_table;ptr!=(symrec *)0;ptr=(symrec *)ptr->next)
    {
        fprintf(sym_tab,"%d\t%s\n",ptr->type,ptr->name);
    }
    fclose(sym_tab);    

}   

Symbol table.h source code

#define t_void  1
#define t_char  2
#define t_int   3
#define t_float 4
struct symrec
{
    char *name;
    int type;
    struct symrec *next;
};
typedef struct symrec symrec;
symrec *sym_table = (symrec *)0;
symrec *putsym();
symrec *getsym();
symrec *putsym(char *sym_name,int sym_type)
{
    symrec *ptr;
    ptr=(symrec *)malloc(sizeof(symrec));
    ptr->name=(char *)malloc(strlen(sym_name)+1);
    strcpy(ptr->name,sym_name);
    ptr->type=sym_type;
    ptr->next=(struct symrec *)sym_table;
    sym_table=ptr;
    return ptr;
}
symrec *getsym(char *sym_name)
{
    symrec *ptr;
    for(ptr=sym_table;ptr!=(symrec *)0;ptr=(symrec *)ptr->next)
    if(strcmp(ptr->name,sym_name)==0)
    return ptr;
    return 0;
}

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

咿呀咿呀哟 2024-11-09 02:48:32

一般来说,当您进行赋值操作时,您需要检查左操作数以确保它是左值,如果不是,则发出错误。这通常是作为类型检查的一部分完成的——您将值的属性(例如,它是否是左值)与类型一起保存,并检查这些属性对于值的每次使用是否正确。

因此,您可能要做的就是使用 %union 定义一个可以保存此信息的解析器值对象:

%union {
    struct {
        Type  *type;
        int   is_lvalue;
    } valinfo;
}
%type<valinfo> assignment_expression unary_expression

然后,您的分配规则将检查此信息以及类型:

assignment_expression:
    unary_expression assignment_operator assignment_expression {
        if (!$1.is_lvalue)
            error("assigning to non-lvalue");
        if ($1.type != $3.type && !type_is_implicitly_convertable($3.type, $1.type))
            error("type mismatch in assignment");
        $.type = $1. type;
        $.is_lvalue = 0; }

请注意,您需要确保在每个规则操作中正确设置$$,其值可能被其他规则操作使用;您的代码无法做到这一点,因此很可能不会做任何有用的事情。

In general terms, when you have an assignment operation, you need to check the left operand to make sure its an lvalue and issue an error if its not. This is most commonly done as part of typechecking -- you keep attributes about values (eg, is it an lvalue or not) along with the type, and check that those attributes are correct for each use of a value.

So what you might do is use %union to define a parser value object that can hold this info:

%union {
    struct {
        Type  *type;
        int   is_lvalue;
    } valinfo;
}
%type<valinfo> assignment_expression unary_expression

Then, your rule for assignments would check this along with the type:

assignment_expression:
    unary_expression assignment_operator assignment_expression {
        if (!$1.is_lvalue)
            error("assigning to non-lvalue");
        if ($1.type != $3.type && !type_is_implicitly_convertable($3.type, $1.type))
            error("type mismatch in assignment");
        $.type = $1. type;
        $.is_lvalue = 0; }

Note that you need to make sure to set $$ properly in EVERY rule action that might have its value used by some other rule action; your code fails to do this, so likely won't do anything useful as is.

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文