Method of applying Lex and YACC (IV). Application of syntax tree
Papaya 20070515
I. Sequencing
No matter what language, the syntax structure is always the same. You can imagine that any program can be interpreted as a syntax.
The essence of the syntax tree is recursion. Obviously, the core idea of the YACC syntax is also recursion. This article uses a specific example,
Build a recursive syntax tree using YACC to solve practical problems.
It is a pity that it is not easy to make it clear during the summary process. I will give it a seven-point explanation.
The key lies in personal thinking.
Ii. Recursive ideas
Let's first look at a simplified C language example section:
I = 0;
While (I <= 10 ){
Print (I );
I = I + 1;
}
Print (I + I );
First, the combination of () +/* print while is called expr (expression), which only indicates the basic table.
It can be understood that any operator combination can be performed through recursion. For example, each row below can be called expr:
I = 0
While (I <= 10)
Print (I)
I = I + 1
Print (I + I)
The statement line of expr +; is called stmt (statement), indicating the end of a statement. Multiple stmt caused {}
It is called stmt_list. In this case, the original sample segment can be expressed:
Stmt
Expr stmt_list
Stmt
This obviously does not conform to the recursion rule. If stmt can also be combined by Expr stmt_list, the program can be recursive to the top level.
Stmt
Stmt
Stmt
This also requires that the syntax definition of YACC must be recursive to the top level, as shown above.
Iii. Memory Structure
The compilation process must have a regular and recursive tree structure in the memory, which makes it easier to understand stmt for each statement.
You need to build a syntax tree.
The following is an example of the syntax tree we are going to use:
Graph 0:
[=]
|
| ---- |
|
Idx (I) C (0)
Graph 1:
While
|
| ---------------- |
|
[<=] [;]
|
| ----- | ---------- |
|
Idx (I) C (10) print [=]
|
| ------- |
|
Idx (I) [+]
|
| ---- |
|
Idx (I) C (1)
Graph 2:
Print
|
|
|
[+]
|
| ----- |
|
Idx (I) idx (I)
After carefully viewing the three figures above, we will find that each stmt corresponds to a tree chart. The tree node contains three types: operators.
(Such as + =;), variable index (such as idx (I) and value (such as C (10) C (1 )). For each operator, ensure that
Recursive rules.
Iv. Specific instances
A. node. H <Tree node definition header file>
/* Define the authorization type of the Tree node */
Typedef Enum {type_content, type_index, type_op} nodeenum;
/* Operator */
Typedef struct {
Int name;/* Operator name */
Int num;/* Number of operation elements */
Struct nodetag * node [1];/* The operation metadata address can be expanded */
} Opnode;
Typedef struct nodetag {
Nodeenum type;/* Tree node type */
/* Union must be the last member */
Union {
Int content;/* content */
Int index;/* Index */
Opnode op;/* operator object */
};
} Node;
Extern int var [26];
[Note] node is a defined Tree node. It can be of three types (content, index, OP ). Node
If it is an operator object (opnode), the node can continue to recursion the node. Operator nodes include names, numbers
Number and subnode. The subnode can be multiple.
B. lexya_e.l <Lex File>
% {
# Include <stdlib. h>
# Include "node. H"
# Include "lexya_e.tab.h"
Void yyerror (char *);
%}
%
[A-Z] {
Yylval. sindex = * yytext-'A ';
Return variable;
}
[0-9] + {
Yylval. ivalue = atoi (yytext );
Return integer;
}
[() <>=+ */; {}.] {
Return * yytext;
}
"> =" Return Ge;
"<=" Return le;
"=" Return eq;
"! = "Return ne;
"&" Return and;
"|" Return or;
"While" return while;
"If" return if;
"Else" Return else;
"Print" Return print;
[/T/N] +;/* remove spaces and press ENTER */
. Printf ("unknow Symbol: [% s]/n", yytext );
%
Int yywrap (void ){
Return 1;
}
[Note] the Lex file here is relatively simple.
C. lexya_e.y <YACC File>
001% {
002
003 # include <stdio. h>
004 # include <stdlib. h>
005 # include <stdarg. h>
006 # include "node. H"
007
008/* attribute operation type */
009 node * OPR (INT name, int num ,...);
010
011 node * set_index (INT value );
012 node * set_content (INT value );
013
014 void freenode (node * P );
015 int exenode (node * P );
016
017 int yylexenode (void );
018 void yyerror (char * s );
019
020 int var [26];/* variable array */
021
022%}
023
024% Union {
025 int ivalue;/* variable value */
026 char sindex;/* variable array index */
027 node * nptr;/* Node Address */
028 };
029
030% token <ivalue> variable
031% token <sindex> integer
032% token while if print
033% nonassoc ifx
034% nonassoc else
035% left and or GE le EQ ne '> ''<'
036% left '+ ''-'
037% left '*''/'
038% nonassoc uminus
039% type <nptr> stmt expr stmt_list
040% %
041 program:
042 function {exit (0 );}
043;
044 function:
045 function stmt {exenode ($2); freenode ($2 );}
046 |/* null */
047;
048 stmt:
049'; '{$ $ = OPR ('; ', 2, null, null );}
050 | expr ';' {$ $ = $1 ;}
051 | print expr ';' {$ $ = OPR (print, 1, $2 );}
052 | variable '= 'expr';' {$ $ = OPR ('=', 2, set_index ($1), $3 );}
053 | while '('expr')' stmt {$ = OPR (while, 2, $3, $5 );}
054 | if '('expr')' stmt % prec ifx {$ = OPR (if, 2, $3, $5 );}
055 | if '('expr')' stmt else stmt % prec else {$ = OPR (if, 3, $3, $5, $7 );}
056 | '{'stmt_list'} '{$ $ = $2 ;}
057;
058 stmt_list:
059 stmt {$ = =$ 1 ;}
060 | stmt_list stmt {$ $ = OPR (';', 2, $1, $2 );}
061;
062 expr:
063 INTEGER {$ = set_content ($1 );}
064 | variable {$ = set_index ($1 );}
065 | '-' expr % prec uminus {$ = OPR (uminus, 1, $2 );}
066 | expr '+ 'expr {$ = OPR (' + ', 2, $1, $3 );}
067 | expr '-'expr {$ = OPR ('-', 2, $1, $3 );}
068 | expr '*' expr {$ = OPR ('*', 2, $1, $3 );}
069 | expr '/'expr {$ = OPR ('/', 2, $1, $3 );}
070 | expr' <'expr {$ = OPR ('<', 2, $1, $3 );}
071 | expr '> 'expr {$ = OPR ('> ', 2, $1, $3 );}
072 | expr Ge expr {$ = OPR (GE, 2, $1, $3 );}
073 | expr le expr {$ = OPR (Le, 2, $1, $3 );}
074 | expr ne expr {$ = OPR (ne, 2, $1, $3 );}
075 | expr EQ expr {$ = OPR (EQ, 2, $1, $3 );}
076 | expr and expr {$ = OPR (and, 2, $1, $3 );}
077 | expr or expr {$ = OPR (or, 2, $1, $3 );}
078 | '('expr')' {$ $ = $2 ;}
079;
080% %
081 # define size_of_node (char *) & P-> content-(char *) P)
082
083 node * set_content (INT value ){
084
085 node * P;
086
087 size_t sizenode;
088/* allocate Node space */
089 sizenode = size_of_node + sizeof (INT );
090
091 if (P = malloc (sizenode) = NULL)
092 yyerror ("out of memory ");
093
094/* copy content */
095 p-> type = type_content;
096 p-> content = value;
097
098 return P;
099
100}
101
102 node * set_index (INT value ){
103
104 node * P;
105 size_t sizenode;
106/* allocate Node space */
107 sizenode = size_of_node + sizeof (INT );
108
109 If (P = malloc (sizenode) = NULL)
110 yyerror ("out of memory ");
111
112/* copy content */
113 p-> type = type_index;
114 p-> Index = value;
115
116 return P;
117}
118
119 node * OPR (INT name, int num ,...){
120
121 va_list valist;
122 node * P;
123 size_t sizenode;
124 int I;
125/* allocate Node space */
126 sizenode = size_of_node + sizeof (opnode) + (Num-1) * sizeof (node *);
127
128 If (P = malloc (sizenode) = NULL)
129 yyerror ("out of memory ");
130
131/* copy content */
132
133 p-> type = type_op;
134 p-> op. Name = Name;
135 p-> op. num = num;
136
137 va_start (valist, num );
138
139 for (I = 0; I <num; I ++)
140 p-> op. node [I] = va_arg (valist, node *);
141
142 va_end (valist );
143 return P;
144}
145 void freenode (node * P ){
146 int I;
147 If (! P) return;
148 If (p-> type = type_op ){
149 for (I = 0; I <p-> op. Num; I ++)
150 freenode (p-> op. node [I]);
151}
152 free (P );
153}
154 void yyerror (char * s ){
155 fprintf (stdout, "% s/n", S );
156}
157 int main (void ){
158 yyparse ();
159 return 0;
160}
[Note] This file is the core of the file. It can be divided into YACC pre-defined syntax, BNF recursive syntax, and extended implementation functions.
Three parts.
YACC definition:
(1). (024-031) (039)
% Union {
Int ivalue;/* variable value */
Char sindex;/* variable array index */
Node * nptr;/* Node Address */
};
% Token <ivalue> integer
% Token <sindex> variable
(024-031) This section expands the content of yystype. By default, yystype is only int type.
Generate the following code:
# Ifndef yystype
Typedef Union {
Int ivalue;/* variable value */
Char sindex;/* variable array index */
Node * nptr;/* Node Address */
} Yystype;
# Define yystype
# Define yystype_is_trivial 1
# Endif
And <ivalue> and integer, <sindex> are bound to variable, indicating that the value returned by lex is automatically
Type conversion.
(039) bind <nptr> to the pointer type of union.
That is, in the content stack of the parser, constants, variables, and nodes can all be represented by yylval. Yylval can be int,
Char, or node *. For more information, see lexya_e.tab.c.
Understanding. (Switch (yyn)
(2). (032-039)
This method mainly defines the operator priority. Nonassoc means no dependency. It is often connected
Used Together with % prec to specify the priority of a rule. The following rules have the ambiguity of if else.
Nonassoc must be used to specify rules. 054 corresponds to ifx, 055 corresponds to else, And 055 is higher than 054.
054 | if '('expr')' stmt % prec ifx {$ = OPR (if, 2, $3, $5 );}
055 | if '('expr')' stmt else stmt % prec else {$ = OPR (if, 3, $3, $5, $7 );}
(039) type keyword statement, indicating that the return value is <PTR> type.
BNF recursive syntax (040-080 ):
This recursive syntax looks easy and is difficult to design. For more information about recursive ideas, see this article.
The first "recursive ideas ". Consider the syntax definition of (056)-(060.
Extended implementation functions:
In this example, the set_index and set_value assignment statements are extended. The operation is to allocate the index in the memory space.
And value. The OPR Extension function is very important and uses dynamic parameters.
The number of operation elements is variable. This is also the definition of the header file "struct nodetag * node [1 ];"
To be consistent. OPR allocates Tree nodes related to operators in the memory space.
Set_index, set_value, and OPR are completely consistent in concept. The objective is to construct a recursive tree in the memory.
Syntax tree.
D. parser. c
# Include <stdio. h>
# Include "node. H"
# Include "lexya_e.tab.h"
Int exenode (node * P ){
If (! P) return 0;
Switch (p-> type ){
Case type_content: Return p-> content;
Case type_index: Return var [p-> Index];
Case type_op:
Switch (p-> op. Name ){
Case while: While (exenode (p-> op. node [0]) exenode (p-> op. node [1]);
Return 0;
Case if: If (exenode (p-> op. node [0])
Exenode (p-> op. node [1]);
Else
If (p-> op. Num> 2)
Exenode (p-> op. node [2]);
Return 0;
Case print: printf ("% d/N", exenode (p-> op. node [0]);
Return 0;
Case ';': exenode (p-> op. node [0]);
Return exenode (p-> op. node [1]);
Case '=': Return var [p-> op. node [0]-> Index] = exenode (p-> op. node [1]);
Case uminus: Return exenode (p-> op. node [0]);
Case '+': Return exenode (p-> op. node [0]) + exenode (p-> op. node [1]);
Case '-': Return exenode (p-> op. node [0])-exenode (p-> op. node [1]);
Case '*': Return exenode (p-> op. node [0]) * exenode (p-> op. node [1]);
Case '/': Return exenode (p-> op. node [0])/exenode (p-> op. node [1]);
Case '<': Return exenode (p-> op. node [0]) <exenode (p-> op. node [1]);
Case '>': Return exenode (p-> op. node [0])> exenode (p-> op. node [1]);
Case Ge: Return exenode (p-> op. node [0])> = exenode (p-> op. node [1]);
Case le: Return exenode (p-> op. node [0]) <= exenode (p-> op. node [1]);
Case ne: Return exenode (p-> op. node [0])! = Exenode (p-> op. node [1]);
Case EQ: Return exenode (p-> op. node [0]) = exenode (p-> op. node [1]);
Case and: Return exenode (p-> op. node [0]) & exenode (p-> op. node [1]);
Case or: Return exenode (p-> op. node [0]) | exenode (p-> op. node [1]);
}
}
Return 0;
}
This file is an explanation of the syntax tree. Contains only one recursive function exenode. First, the tree node type,
Then, we determine the actions one by one based on the operators.
5. Conclusion
Bison-D lexya_e.y
Lex lexya_e.l
Gcc-g-o parser Lex. yy. c lexya_e.tab.c parser. c
Compile to test execution.
I feel that what I understand is the most important thing. This example contains some C syntax,
In a small part, the functions of this example will be gradually extended in subsequent articles to explore the powerful functions of YACC and Lex.
<Note: For examples in this article, refer to a compact guide to Lex & YACC by Tom niann.
Http://epaperpress.com/lexandyacc/index.html>