Pascal语言的词法和语法

发布于 2022-09-29 20:58:25 字数 19497 浏览 14 评论 0

响应号召,贴一个pascal语言的lex和yacc描述

lex描述

%{
/*
* scan.l
*
* lex input file for pascal scanner
*
* extensions: to ways to spell "external" and "->" ok for "^".
*/

#include <stdio.h>
#include "y.tab.h"
int line_no = 1;

%}

A [aA]
B [bB]
C [cC]
D [dD]
E [eE]
F [fF]
G [gG]
H [hH]
I [iI]
J [jJ]
K [kK]
L [lL]
M [mM]
N [nN]
O [oO]
P [pP]
Q [qQ]
R [rR]
S [sS]
T [tT]
U [uU]
V [vV]
W [wW]
X [xX]
Y [yY]
Z [zZ]
NQUOTE [^']

%%

{A}{N}{D}   return(AND);
{A}{R}{R}{A}{Y}   return(ARRAY);
{C}{A}{S}{E}   return(CASE);
{C}{O}{N}{S}{T}   return(CONST);
{D}{I}{V}   return(DIV);
{D}{O}    return(DO);
{D}{O}{W}{N}{T}{O}  return(DOWNTO);
{E}{L}{S}{E}   return(ELSE);
{E}{N}{D}   return(END);
{E}{X}{T}{E}{R}{N} |
{E}{X}{T}{E}{R}{N}{A}{L} return(EXTERNAL);
{F}{O}{R}   return(FOR);
{F}{O}{R}{W}{A}{R}{D}  return(FORWARD);
{F}{U}{N}{C}{T}{I}{O}{N} return(FUNCTION);
{G}{O}{T}{O}   return(GOTO);
{I}{F}    return(IF);
{I}{N}    return(IN);
{L}{A}{B}{E}{L}   return(LABEL);
{M}{O}{D}   return(MOD);
{N}{I}{L}   return(NIL);
{N}{O}{T}   return(NOT);
{O}{F}    return(OF);
{O}{R}    return(OR);
{O}{T}{H}{E}{R}{W}{I}{S}{E} return(OTHERWISE);
{P}{A}{C}{K}{E}{D}  return(PACKED);
{B}{E}{G}{I}{N}   return(PBEGIN);
{F}{I}{L}{E}   return(PFILE);
{P}{R}{O}{C}{E}{D}{U}{R}{E} return(PROCEDURE);
{P}{R}{O}{G}{R}{A}{M}  return(PROGRAM);
{R}{E}{C}{O}{R}{D}  return(RECORD);
{R}{E}{P}{E}{A}{T}  return(REPEAT);
{S}{E}{T}   return(SET);
{T}{H}{E}{N}   return(THEN);
{T}{O}    return(TO);
{T}{Y}{P}{E}   return(TYPE);
{U}{N}{T}{I}{L}   return(UNTIL);
{V}{A}{R}   return(VAR);
{W}{H}{I}{L}{E}   return(WHILE);
{W}{I}{T}{H}   return(WITH);
[a-zA-Z]([a-zA-Z0-9])+  return(IDENTIFIER);

":="    return(ASSIGNMENT);
'({NQUOTE}|'')+'  return(CHARACTER_STRING);
":"    return(COLON);
","    return(COMMA);
[0-9]+    return(DIGSEQ);
"."    return(DOT);
".."    return(DOTDOT);
"="    return(EQUAL);
">="    return(GE);
">"    return(GT);
"["    return(LBRAC);
"<="    return(LE);
"("    return(LPAREN);
"<"    return(LT);
"-"    return(MINUS);
"<>"    return(NOTEQUAL);
"+"    return(PLUS);
"]"    return(RBRAC);
[0-9]+"."[0-9]+   return(REALNUMBER);
")"    return(RPAREN);
";"    return(SEMICOLON);
"/"    return(SLASH);
"*"    return(STAR);
"**"    return(STARSTAR);
"->"   |
"^"    return(UPARROW);

"(*"   |
"{"    { register int c;
     while ((c = input()))
     {
      if (c == '}')
       break;
      else if (c == '*')
      {
       if ((c = input()) == ')')
        break;
       else
        unput (c);
      }
      else if (c == '\n')
       line_no++;
      else if (c == 0)
       commenteof();
     }
    }

[ \t\f]    ;

\n    line_no++;

.    { fprintf (stderr,
    "'%c' (0%o): illegal charcter at line %d\n",
     yytext[0], yytext[0], line_no);
    }

%%

commenteof()
{
fprintf (stderr, "unexpected EOF inside comment at line %d\n",
  line_no);
exit (1);
}

yywrap ()
{
return (1);
}

yacc描述

%{
/*
* grammar.y
*
* Pascal grammar in Yacc format, based originally on BNF given
* in "Standard Pascal -- User Reference Manual", by Doug Cooper.
* This in turn is the BNF given by the ANSI and ISO Pascal standards,
* and so, is PUBLIC DOMAIN. The grammar is for ISO Level 0 Pascal.
* The grammar has been massaged somewhat to make it LALR, and added
* the following extensions.
*
* constant expressions
* otherwise statement in a case
* productions to correctly match else's with if's
* beginnings of a separate compilation facility
*/

%}

%token AND ARRAY ASSIGNMENT CASE CHARACTER_STRING COLON COMMA CONST DIGSEQ
%token DIV DO DOT DOTDOT DOWNTO ELSE END EQUAL EXTERNAL FOR FORWARD FUNCTION
%token GE GOTO GT IDENTIFIER IF IN LABEL LBRAC LE LPAREN LT MINUS MOD NIL NOT
%token NOTEQUAL OF OR OTHERWISE PACKED PBEGIN PFILE PLUS PROCEDURE PROGRAM RBRAC
%token REALNUMBER RECORD REPEAT RPAREN SEMICOLON SET SLASH STAR STARSTAR THEN
%token TO TYPE UNTIL UPARROW VAR WHILE WITH

%%
file : program
| module
;

program : program_heading semicolon block DOT
;

program_heading : PROGRAM identifier
| PROGRAM identifier LPAREN identifier_list RPAREN
;

identifier_list : identifier_list comma identifier
| identifier
;

block : label_declaration_part
constant_definition_part
type_definition_part
variable_declaration_part
procedure_and_function_declaration_part
statement_part
;

module : constant_definition_part
type_definition_part
variable_declaration_part
procedure_and_function_declaration_part
;

label_declaration_part : LABEL label_list semicolon
|
;

label_list : label_list comma label
| label
;

label : DIGSEQ
;

constant_definition_part : CONST constant_list
|
;

constant_list : constant_list constant_definition
| constant_definition
;

constant_definition : identifier EQUAL cexpression semicolon
;

/*constant : cexpression ;  /* good stuff! */

cexpression : csimple_expression
| csimple_expression relop csimple_expression
;

csimple_expression : cterm
| csimple_expression addop cterm
;

cterm : cfactor
| cterm mulop cfactor
;

cfactor : sign cfactor
| cexponentiation
;

cexponentiation : cprimary
| cprimary STARSTAR cexponentiation
;

cprimary : identifier
| LPAREN cexpression RPAREN
| unsigned_constant
| NOT cprimary
;

constant : non_string
| sign non_string
| CHARACTER_STRING
;

sign : PLUS
| MINUS
;

non_string : DIGSEQ
| identifier
| REALNUMBER
;

type_definition_part : TYPE type_definition_list
|
;

type_definition_list : type_definition_list type_definition
| type_definition
;

type_definition : identifier EQUAL type_denoter semicolon
;

type_denoter : identifier
| new_type
;

new_type : new_ordinal_type
| new_structured_type
| new_pointer_type
;

new_ordinal_type : enumerated_type
| subrange_type
;

enumerated_type : LPAREN identifier_list RPAREN
;

subrange_type : constant DOTDOT constant
;

new_structured_type : structured_type
| PACKED structured_type
;

structured_type : array_type
| record_type
| set_type
| file_type
;

array_type : ARRAY LBRAC index_list RBRAC OF component_type
;

index_list : index_list comma index_type
| index_type
;

index_type : ordinal_type ;

ordinal_type : new_ordinal_type
| identifier
;

component_type : type_denoter ;

record_type : RECORD record_section_list END
| RECORD record_section_list semicolon variant_part END
| RECORD variant_part END
;

record_section_list : record_section_list semicolon record_section
| record_section
;

record_section : identifier_list COLON type_denoter
;

variant_part : CASE variant_selector OF variant_list semicolon
| CASE variant_selector OF variant_list
|
;

variant_selector : tag_field COLON tag_type
| tag_type
;

variant_list : variant_list semicolon variant
| variant
;

variant : case_constant_list COLON LPAREN record_section_list RPAREN
| case_constant_list COLON LPAREN record_section_list semicolon
  variant_part RPAREN
| case_constant_list COLON LPAREN variant_part RPAREN
;

case_constant_list : case_constant_list comma case_constant
| case_constant
;

case_constant : constant
| constant DOTDOT constant
;

tag_field : identifier ;

tag_type : identifier ;

set_type : SET OF base_type
;

base_type : ordinal_type ;

file_type : PFILE OF component_type
;

new_pointer_type : UPARROW domain_type
;

domain_type : identifier ;

variable_declaration_part : VAR variable_declaration_list semicolon
|
;

variable_declaration_list :
   variable_declaration_list semicolon variable_declaration
| variable_declaration
;

variable_declaration : identifier_list COLON type_denoter
;

procedure_and_function_declaration_part :
  proc_or_func_declaration_list semicolon
|
;

proc_or_func_declaration_list :
   proc_or_func_declaration_list semicolon proc_or_func_declaration
| proc_or_func_declaration
;

proc_or_func_declaration : procedure_declaration
| function_declaration
;

procedure_declaration : procedure_heading semicolon directive
| procedure_heading semicolon procedure_block
;

procedure_heading : procedure_identification
| procedure_identification formal_parameter_list
;

directive : FORWARD
| EXTERNAL
;

formal_parameter_list : LPAREN formal_parameter_section_list RPAREN ;

formal_parameter_section_list : formal_parameter_section_list semicolon formal_parameter_section
| formal_parameter_section
;

formal_parameter_section : value_parameter_specification
| variable_parameter_specification
| procedural_parameter_specification
| functional_parameter_specification
;

value_parameter_specification : identifier_list COLON identifier
;

variable_parameter_specification : VAR identifier_list COLON identifier
;

procedural_parameter_specification : procedure_heading ;

functional_parameter_specification : function_heading ;

procedure_identification : PROCEDURE identifier ;

procedure_block : block ;

function_declaration : function_heading semicolon directive
| function_identification semicolon function_block
| function_heading semicolon function_block
;

function_heading : FUNCTION identifier COLON result_type
| FUNCTION identifier formal_parameter_list COLON result_type
;

result_type : identifier ;

function_identification : FUNCTION identifier ;

function_block : block ;

statement_part : compound_statement ;

compound_statement : PBEGIN statement_sequence END ;

statement_sequence : statement_sequence semicolon statement
| statement
;

statement : open_statement
| closed_statement
;

open_statement : label COLON non_labeled_open_statement
| non_labeled_open_statement
;

closed_statement : label COLON non_labeled_closed_statement
| non_labeled_closed_statement
;

non_labeled_closed_statement : assignment_statement
| procedure_statement
| goto_statement
| compound_statement
| case_statement
| repeat_statement
| closed_with_statement
| closed_if_statement
| closed_while_statement
| closed_for_statement
|
;

non_labeled_open_statement : open_with_statement
| open_if_statement
| open_while_statement
| open_for_statement
;

repeat_statement : REPEAT statement_sequence UNTIL boolean_expression
;

open_while_statement : WHILE boolean_expression DO open_statement
;

closed_while_statement : WHILE boolean_expression DO closed_statement
;

open_for_statement : FOR control_variable ASSIGNMENT initial_value direction
   final_value DO open_statement
;

closed_for_statement : FOR control_variable ASSIGNMENT initial_value direction
   final_value DO closed_statement
;

open_with_statement : WITH record_variable_list DO open_statement
;

closed_with_statement : WITH record_variable_list DO closed_statement
;

open_if_statement : IF boolean_expression THEN statement
| IF boolean_expression THEN closed_statement ELSE open_statement
;

closed_if_statement : IF boolean_expression THEN closed_statement
   ELSE closed_statement
;

assignment_statement : variable_access ASSIGNMENT expression
;

variable_access : identifier
| indexed_variable
| field_designator
| variable_access UPARROW
;

indexed_variable : variable_access LBRAC index_expression_list RBRAC
;

index_expression_list : index_expression_list comma index_expression
| index_expression
;

index_expression : expression ;

field_designator : variable_access DOT identifier
;

procedure_statement : identifier params
| identifier
;

params : LPAREN actual_parameter_list RPAREN ;

actual_parameter_list : actual_parameter_list comma actual_parameter
| actual_parameter
;

/*
* this forces you to check all this to be sure that only write and
* writeln use the 2nd and 3rd forms, you really can't do it easily in
* the grammar, especially since write and writeln aren't reserved
*/
actual_parameter : expression
| expression COLON expression
| expression COLON expression COLON expression
;

goto_statement : GOTO label
;

case_statement : CASE case_index OF case_list_element_list END
| CASE case_index OF case_list_element_list SEMICOLON END
| CASE case_index OF case_list_element_list semicolon
   otherwisepart statement END
| CASE case_index OF case_list_element_list semicolon
   otherwisepart statement SEMICOLON END
;

case_index : expression ;

case_list_element_list : case_list_element_list semicolon case_list_element
| case_list_element
;

case_list_element : case_constant_list COLON statement
;

otherwisepart : OTHERWISE
| OTHERWISE COLON
;

control_variable : identifier ;

initial_value : expression ;

direction : TO
| DOWNTO
;

final_value : expression ;

record_variable_list : record_variable_list comma variable_access
| variable_access
;

boolean_expression : expression ;

expression : simple_expression
| simple_expression relop simple_expression
;

simple_expression : term
| simple_expression addop term
;

term : factor
| term mulop factor
;

factor : sign factor
| exponentiation
;

exponentiation : primary
| primary STARSTAR exponentiation
;

primary : variable_access
| unsigned_constant
| function_designator
| set_constructor
| LPAREN expression RPAREN
| NOT primary
;

unsigned_constant : unsigned_number
| CHARACTER_STRING
| NIL
;

unsigned_number : unsigned_integer | unsigned_real ;

unsigned_integer : DIGSEQ
;

unsigned_real : REALNUMBER
;

/* functions with no params will be handled by plain identifier */
function_designator : identifier params
;

set_constructor : LBRAC member_designator_list RBRAC
| LBRAC RBRAC
;

member_designator_list : member_designator_list comma member_designator
| member_designator
;

member_designator : member_designator DOTDOT expression
| expression
;

addop: PLUS
| MINUS
| OR
;

mulop : STAR
| SLASH
| DIV
| MOD
| AND
;

relop : EQUAL
| NOTEQUAL
| LT
| GT
| LE
| GE
| IN
;

identifier : IDENTIFIER
;

semicolon : SEMICOLON
;

comma : COMMA
;

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(2

旧伤还要旧人安 2022-10-06 20:58:25

自己顶一下!!!!!!

酒解孤独 2022-10-06 20:58:25

不错,怀念一下...

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文