在c中编码词法分析器时出错
#include<stdio.h>
#include<ctype.h>
#include<string.h>
/* this is a lexer which recognizes constants , variables ,symbols, identifiers , functions , comments and also header files . It stores the lexemes in 3 different files . One file contains all the headers and the comments . Another file will contain all the variables , another will contain all the symbols. */
int main()
{
int i=0,j,k,count=0;
char a,b[100],c[10000],d[100];
memset ( d, 0, 100 );
j=30;
FILE *fp1,*fp2;
fp1=fopen("source.txt","r"); //the source file is opened in read only mode which will passed through the lexer
fp2=fopen("lext.txt","w");
//now lets remove all the white spaces and store the rest of the words in a file
if(fp1==NULL)
{
perror("failed to open source.txt");
//return EXIT_FAILURE;
}
i=0;
k=0;
while(!feof(fp1))
{
a=fgetc(fp1);
if(a!=' '&&a!='\n')
{
if (!isalpha(a))
{
switch(a)
{
case '+':{fprintf(fp2,"+ ----> PLUS \n");
i=0;break;}
case '-':{fprintf(fp2,"- ---> MINUS \n");
i=0;break;}
case '*':{fprintf(fp2, "* --->MULT \n");
i=0;break;}
case '/':{fprintf(fp2, "/ --->DIV \n");
i=0;break;}
//case '+=':fprintf(fp2, "%.20s\n", "ADD_ASSIGN");
//case '-=':fprintf(fp2, "%.20s\n", "SUB_ASSIGN");
case '=':{fprintf(fp2, "= ---> ASSIGN \n");
i=0;break;}
case '%':{fprintf(fp2, "% ---> MOD \n");
i=0;break;}
case '<':{fprintf(fp2, "< ---> LESSER_THAN \n");
i=0;break;}
case '>':{fprintf(fp2, "> --> GREATER_THAN \n");
i=0;break;}
//case '++':fprintf(fp2, "%.20s\n", "INCREMENT");
//case '--':fprintf(fp2, "%.20s\n", "DECREMENT");
//case '==':fprintf(fp2, "%.20s\n", "ASSIGNMENT");
case ';':{fprintf(fp2, "; --->SEMI_COLUMN \n");
i=0;break;}
case ':':{fprintf(fp2, ": --->COLUMN \n");
i=0;break;}
case '(':{fprintf(fp2, "( --->LPAR \n");
i=0;break;}
case ')':{fprintf(fp2, ") --->RPAR \n");
i=0;break;}
case '{':{fprintf(fp2, "{ --->LBRACE \n");
i=0;break;}
case '}':{fprintf(fp2, "} ---> RBRACE \n");
i=0;break;}
}
}
else
{
d[i]=a;
//printf("%c\n",d[i]);
i=i+1;
}
//}
/* we can make the lexer more complex by including even more depths of checks for the symbols*/
}
else
{
d[i+1]='\0';
printf("\n");
if((strcmp(d,"if ")==0)){fprintf(fp2,"if ----> IDENTIFIER \n");
//printf("%s \n",d);
memset ( d, 0, 100 );
//printf("%s \n",d);
count=count+1;}
else if(strcmp(d,"then")==0){fprintf(fp2,"then ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"else")==0){fprintf(fp2,"else ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"switch")==0){fprintf(fp2,"switch ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"printf")==0){fprintf(fp2,"prtintf ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"scanf")==0){fprintf(fp2,"scanf ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"NULL")==0){fprintf(fp2,"NULL ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"int")==0){fprintf(fp2,"INT ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"char")==0){fprintf(fp2,"char ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"float")==0){fprintf(fp2,"float ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"long")==0){fprintf(fp2,"long ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"double")==0){fprintf(fp2,"double ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"const")==0){fprintf(fp2,"const ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"continue")==0)fprintf(fp2,"continue ----> IDENTIFIER \n");
else if(strcmp(d,"size of")==0){fprintf(fp2,"size of ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"register")==0){fprintf(fp2,"register ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"short")==0){fprintf(fp2,"short ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"auto")==0){fprintf(fp2,"auto ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"while")==0){fprintf(fp2,"while ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"do")==0){fprintf(fp2,"do ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"case")==0){fprintf(fp2,"case ----> IDENTIFIER \n");
count=count+1;}
else if (isdigit(d[i]))
{
fprintf(fp2,"%s ---->NUMBER",d);
}
else if (isalpha(a))
{
fprintf(fp2,"%s ----> Variable",d);
//printf("%s",d);
// memset ( d, 0, 100 );}
//fprintf(fp2, "s\n", b);
i=0;
k=k+1;
continue;
}
i=i+1;
k=k+1;
}
fclose(fp1);
fclose(fp2);
printf("%d",count);
return 0;
}
在此代码中,我的 source.txt 存储了 if (a+b) 。但只有 ( 、 + 和 ) 被写入 lext.txt,而不是标识符 if 或变量 a 和 b 。有什么特别的原因吗?
#include<stdio.h>
#include<ctype.h>
#include<string.h>
/* this is a lexer which recognizes constants , variables ,symbols, identifiers , functions , comments and also header files . It stores the lexemes in 3 different files . One file contains all the headers and the comments . Another file will contain all the variables , another will contain all the symbols. */
int main()
{
int i=0,j,k,count=0;
char a,b[100],c[10000],d[100];
memset ( d, 0, 100 );
j=30;
FILE *fp1,*fp2;
fp1=fopen("source.txt","r"); //the source file is opened in read only mode which will passed through the lexer
fp2=fopen("lext.txt","w");
//now lets remove all the white spaces and store the rest of the words in a file
if(fp1==NULL)
{
perror("failed to open source.txt");
//return EXIT_FAILURE;
}
i=0;
k=0;
while(!feof(fp1))
{
a=fgetc(fp1);
if(a!=' '&&a!='\n')
{
if (!isalpha(a))
{
switch(a)
{
case '+':{fprintf(fp2,"+ ----> PLUS \n");
i=0;break;}
case '-':{fprintf(fp2,"- ---> MINUS \n");
i=0;break;}
case '*':{fprintf(fp2, "* --->MULT \n");
i=0;break;}
case '/':{fprintf(fp2, "/ --->DIV \n");
i=0;break;}
//case '+=':fprintf(fp2, "%.20s\n", "ADD_ASSIGN");
//case '-=':fprintf(fp2, "%.20s\n", "SUB_ASSIGN");
case '=':{fprintf(fp2, "= ---> ASSIGN \n");
i=0;break;}
case '%':{fprintf(fp2, "% ---> MOD \n");
i=0;break;}
case '<':{fprintf(fp2, "< ---> LESSER_THAN \n");
i=0;break;}
case '>':{fprintf(fp2, "> --> GREATER_THAN \n");
i=0;break;}
//case '++':fprintf(fp2, "%.20s\n", "INCREMENT");
//case '--':fprintf(fp2, "%.20s\n", "DECREMENT");
//case '==':fprintf(fp2, "%.20s\n", "ASSIGNMENT");
case ';':{fprintf(fp2, "; --->SEMI_COLUMN \n");
i=0;break;}
case ':':{fprintf(fp2, ": --->COLUMN \n");
i=0;break;}
case '(':{fprintf(fp2, "( --->LPAR \n");
i=0;break;}
case ')':{fprintf(fp2, ") --->RPAR \n");
i=0;break;}
case '{':{fprintf(fp2, "{ --->LBRACE \n");
i=0;break;}
case '}':{fprintf(fp2, "} ---> RBRACE \n");
i=0;break;}
}
}
else
{
d[i]=a;
//printf("%c\n",d[i]);
i=i+1;
}
//}
/* we can make the lexer more complex by including even more depths of checks for the symbols*/
}
else
{
d[i+1]='\0';
printf("\n");
if((strcmp(d,"if ")==0)){fprintf(fp2,"if ----> IDENTIFIER \n");
//printf("%s \n",d);
memset ( d, 0, 100 );
//printf("%s \n",d);
count=count+1;}
else if(strcmp(d,"then")==0){fprintf(fp2,"then ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"else")==0){fprintf(fp2,"else ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"switch")==0){fprintf(fp2,"switch ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"printf")==0){fprintf(fp2,"prtintf ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"scanf")==0){fprintf(fp2,"scanf ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"NULL")==0){fprintf(fp2,"NULL ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"int")==0){fprintf(fp2,"INT ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"char")==0){fprintf(fp2,"char ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"float")==0){fprintf(fp2,"float ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"long")==0){fprintf(fp2,"long ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"double")==0){fprintf(fp2,"double ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"const")==0){fprintf(fp2,"const ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"continue")==0)fprintf(fp2,"continue ----> IDENTIFIER \n");
else if(strcmp(d,"size of")==0){fprintf(fp2,"size of ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"register")==0){fprintf(fp2,"register ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"short")==0){fprintf(fp2,"short ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"auto")==0){fprintf(fp2,"auto ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"while")==0){fprintf(fp2,"while ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"do")==0){fprintf(fp2,"do ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"case")==0){fprintf(fp2,"case ----> IDENTIFIER \n");
count=count+1;}
else if (isdigit(d[i]))
{
fprintf(fp2,"%s ---->NUMBER",d);
}
else if (isalpha(a))
{
fprintf(fp2,"%s ----> Variable",d);
//printf("%s",d);
// memset ( d, 0, 100 );}
//fprintf(fp2, "s\n", b);
i=0;
k=k+1;
continue;
}
i=i+1;
k=k+1;
}
fclose(fp1);
fclose(fp2);
printf("%d",count);
return 0;
}
In this code , my source.txt has if (a+b) stored . But only ( , + and ) is getting written into lext.txt and not the identifier if or the variable a and b . Any particular reason why?
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(3)
有很多问题:
一旦找到空格或换行符,您就尝试使用一系列
strcmp
来比较字符串d
中的内容。但是您正在重置 else if (isalpha(a)) 主体中的 i 值,该值永远不会执行,因为a
是空格或换行符。 之后无条件将i
的值设置为0
您应该在字符串中的空格
,所以应该是
There are quite a few problems:
Once you find a space or a newline you try and compare what is in string
d
using a series ofstrcmp
. But you are resetting the value of i in the body ofelse if (isalpha(a))
which will never execute becausea
is space or newline. You should unconditionally set the value ofi
to0
right afterSpaces in the string matter, so
should be
“if”关键字的比较是与
“if”
(带有空格)进行比较,但代码不会将空格复制到缓冲区d
中。最好的办法是使用调试器单步调试它,看看发生了什么。The comparison for the "if" keyword is comparing against
"if "
(with a space), but the code does not copy the space into the bufferd
. Your best bet is to step through it with a debugger and see what is happening.希望这对你有帮助......
Hope this helps you........