1. Preface
This is an experiment on compilation principles. It is the most difficult experiment that has been done since I went to college.
Basic knowledge used in the experiment: C language, data structure, and assembly (simple understanding ).
Development Tool: VC
2. Problem Description
Compile the four integer arithmetic expressions and translate the four integer arithmetic expressions into assembly language code.
Remove the left recursive Syntax:
E → te'
E' → + te' | ε
T → FT'
T' → * FT '| ε
F → (E) | I
Remove the translation mode after left recursion:
E: = T {E '. I: = T. nptr}
E' {E. nptr: = E'. s}
E ': = + T {E '1. I: = mknode (' + ', E'. I, T. nptr )}
E '1 {E '. s: = E1.s}
E ': =-T {E '1. I: = mknode ('-', E'. I, T. nptr )}
E '1 {E '. s: = E1.s}
E': = ε {E '. s: = E'. I}
T: = F {t'. I: = F. nptr}
T' {T. nptr: = t'. s}
T': = * F {t' 1. I: = mknode ('*', T'. I, F. nptr )}
T' 1 {t'. s: = T1.s}
T': =/F {t' 1. I: = mknode ('/', T'. I, F. nptr )}
T' 1 {t'. s: = T1.s}
T': = ε {t'. s: = t'. I}
F ::= (E) {F. nptr: = E. nptr}
F ::= num {F. nptr: = mkleaf (num, num. val )}
3. Global Definition
Test. c file
Copy codeThe Code is as follows: # ifndef TEST_C
# Define TEST_C
/**
* Global variables and global function files
**/
# Include <stdio. h>
# Include <ctype. h>
# Include <string. h>
# Include <stdlib. h>
/************************** The following global variables (functions) *******************/
// The maximum length of the input expression, which can be considered as the length of the buffer.
# Define MAX_EXPRESSION_LENGTH 50
// Input expression
Char expression [MAX_EXPRESSION_LENGTH];
// Subscript of the expression character Array
Int expression_index = 0;
// Store a word symbol
Char strToken [MAX_EXPRESSION_LENGTH/2];
// Determine whether it is a number
Int isNum (char * strToken)
{
Int I = 0;
While (strToken [I]) {
If (! Isdigit (strToken [I])
Break;
I ++;
}
Return strToken [I] = 0;
}
// Error handling program
Void error (char * errerMessage)
{
Printf ("\ nERROR: % s \ n", errerMessage );
Exit (0 );
}
/************************* The above is a global variable (function) ******************/
# Endif
4. Lexical Analysis
The requirement for lexical analysis is to accept an expression and output all types of word symbols in the expression.
There are two methods to perform lexical analysis, one is to use a state chart, and the other is to use a state conversion table. The following is a status chart.
First, define the type and type of the word symbol.
Typedef enum Symbol {ERR =-1, END, NUM, PLUS, MINUS, TIMES, SLASH, LPAREN, RPAREN} Symbol;
Then the transition diagram is as follows:
The test1.c file is represented in the following code:
Copy codeThe Code is as follows: # ifndef TEST1_C
# Define TEST1_C
/**
* Use a status chart for lexical analysis and test lexical analysis
*
**/
# Include "test. c"
// Enumeration type
Typedef enum Symbol {ERR =-1, END, NUM, PLUS, MINUS, TIMES,
SLASH, LPAREN, RPAREN} Symbol;
// Obtain a word symbol, which is stored in strToken. Returns the enumerated type of the word symbol.
Symbol getToken ();
// Output the corresponding word symbol based on the input Enumeration type
Void printToken (Symbol I );
// Test lexical analysis
Void testLexAnalyse ();
// Obtain a word symbol, which is stored in strToken. Returns the enumerated type of the word symbol.
Symbol getToken ()
{
Char ch;
Int state = 0; // each time it starts from status 0
Int j = 0;
// Expression traversal is complete. The word symbol is '#'
If (expression [expression_index] = '\ 0 '){
StrToken [0] = '#';
StrToken [1] = '\ 0 ';
Return END;
}
While (1 ){
Switch (state ){
Case 0:
// Read one character
Ch = strToken [j ++] = expression [expression_index ++];
If (isspace (ch )){
J --; // pay attention to the backspace
State = 0;
}
Else if (isdigit (ch ))
State = 1;
Else if (ch = '+ ')
State = 2;
Else if (ch = '-')
State = 3;
Else if (ch = '*')
State = 4;
Else if (ch = '/')
State = 5;
Else if (ch = '(')
State = 6;
Else if (ch = ')')
State = 7;
Else
Return ERR;
Break;
Case 1:
Ch = strToken [j ++] = expression [expression_index ++];
If (isdigit (ch ))
State = 1;
Else {
Expression_index --;
StrToken [-- j] = 0;
Return NUM;
}
Break;
Case 2:
StrToken [j] = 0;
Return PLUS;
Case 3:
StrToken [j] = 0;
Return MINUS;
Case 4:
StrToken [j] = 0;
Return TIMES;
Case 5:
StrToken [j] = 0;
Return SLASH;
Case 6:
StrToken [j] = 0;
Return LPAREN;
Case 7:
StrToken [j] = 0;
Return RPAREN;
}
}
}
// Output the corresponding word symbol based on the input Enumeration type
Void printToken (Symbol I ){
Switch (I ){
Case-1: printf ("ERR \ n"); break;
Case 0: printf ("END \ n"); break;
Case 1: printf ("NUM % s \ n", strToken); break;
Case 2: printf ("PLUS % s \ n", strToken); break;
Case 3: printf ("MINUS % s \ n", strToken); break;
Case 4: printf ("TIMES % s \ n", strToken); break;
Case 5: printf ("SLASH % s \ n", strToken); break;
Case 6: printf ("LPAREN % s \ n", strToken); break;
Case 7: printf ("RPAREN % s \ n", strToken); break;
}
}
// Test lexical analysis
Void testLexAnalyse ()
{
Symbol tokenStyle; // The type of the word Symbol.
Expression_index = 0;
Puts ("\ n lexical analysis results :");
While (1 ){
TokenStyle = getToken ();
PrintToken (tokenStyle );
If (tokenStyle = ERR ){
Error ("lexical analysis error! ");
}
If (tokenStyle = END ){
Break;
}
}
}
// Main Function
Int main ()
{
Gets (expression );
TestLexAnalyse ();
Return 0;
}
# Endif
Running result
5. syntax analysis
Requirement: accept an expression, analyze the expression, and provide the corresponding information based on whether the input is correct or not.
It mainly writes corresponding subprograms based on the non-left recursive syntax.
Test2.cCopy codeThe Code is as follows: # ifndef TEST_2
# Define TEST_2
/**
* Syntax analysis and test syntax analysis
**/
# Include "test1.c"
/*
Remove the left recursive Syntax:
E → te'
E' → + te' | ε
T → FT'
T' → * FT '| ε
F → (E) | I
*/
// Each non-terminator has a corresponding subroutine function declaration
Void E ();
Void E1 ();
Void T ();
Void T1 ();
Void F ();
// Test syntax analysis
Void testSyntaxAnalyse ();
// Each non-terminator has a corresponding subroutine
Void E ()
{
T ();
E1 ();
}
Void E1 ()
{
If (strcmp (strToken, "+") = 0 | strcmp (strToken, "-") = 0 ){
GetToken ();
T ();
E1 ();
}
// Follow (E1) = {#,)}
Else if (strcmp (strToken ,"#")! = 0 & strcmp (strToken ,")")! = 0 ){
Error ("syntax analysis error! ");
}
}
Void T ()
{
F ();
T1 ();
}
Void T1 ()
{
If (strcmp (strToken, "*") = 0 | strcmp (strToken, "/") = 0 ){
GetToken ();
F ();
T1 ();
}
// Follow (T1) = {+, #,)}. If you want to add the-number, add the-number.
Else if (strcmp (strToken ,"-")! = 0 & strcmp (strToken, "+ ")! = 0 & strcmp (strToken ,"#")! = 0 & strcmp (strToken ,")")! = 0 ){
Error ("syntax analysis error! ");
}
}
Void F ()
{
If (isNum (strToken )){
GetToken ();
}
Else {
If (strcmp (strToken, "(") = 0 ){
GetToken ();
E ();
If (strcmp (strToken, ")") = 0)
GetToken ();
Else
Error ("syntax analysis error! ");
}
Else
Error ("syntax analysis error! ");
}
}
// Test syntax analysis
Void testSyntaxAnalyse ()
{
Expression_index = 0;
GetToken ();
E ();
Puts ("\ n syntax analysis result :");
If (strcmp (strToken ,"#")! = 0)
Error ("syntax analysis error! ");
Else {
Puts ("Correct syntax analysis! ");
}
}
// Main Function
Int main ()
{
Gets (expression );
TestLexAnalyse ();
TestSyntaxAnalyse ();
Return 0;
}
# Endif
Delete the main function in test1.c during running.
6. Semantic Analysis
Requirement: the function of the semantic analysis program to be implemented is to accept an expression, analyze the expression, and create the abstract syntax tree of the expression during the analysis. Since the abstract syntax tree of the four arithmetic expressions can be basically considered as a binary tree, the ordinal traversal sequence should be the same as the input expression-except that there are no parentheses. Check whether the program function is correct. If each branch node is marked with a temporary variable, perform post-order traversal on the abstract syntax tree of the four arithmetic expressions to obtain the Quaternary sequence corresponding to the input expression.
Test3.c File
Copy codeThe Code is as follows: # ifndef TEST3_C
# Define TEST3_C
/**
* Semantic Analysis and Test Semantic Analysis
* In fact, this experiment was modified on the code of test2.
**/
# Include "test1.c"
/*
Remove the left recursive translation mode:
E: = T {E '. I: = T. nptr}
E' {E. nptr: = E'. s}
E ': = + T {E '1. I: = mknode (' + ', E'. I, T. nptr )}
E '1 {E '. s: = E1.s}
E ': =-T {E '1. I: = mknode ('-', E'. I, T. nptr )}
E '1 {E '. s: = E1.s}
E': = ε {E '. s: = E'. I}
T: = F {t'. I: = F. nptr}
T' {T. nptr: = t'. s}
T': = * F {t' 1. I: = mknode ('*', T'. I, F. nptr )}
T' 1 {t'. s: = T1.s}
T': =/F {t' 1. I: = mknode ('/', T'. I, F. nptr )}
T' 1 {t'. s: = T1.s}
T': = ε {t'. s: = t'. I}
F ::= (E) {F. nptr: = E. nptr}
F ::= num {F. nptr: = mkleaf (num, num. val )}
*/
# Define MAX_LENGTH 20 // Maximum length of the operand in the quad-element Formula
Typedef int ValType;
// Node Type
Typedef struct ASTNode {
Symbol sym; // type
ValType val; // Value
Struct ASTNode * left, * right; // left, right child
} ASTNode, * AST;
// The four-element type is defined as follows:
Typedef struct Quaternion {
Char op;
Char arg1 [MAX_LENGTH];
Char arg2 [MAX_LENGTH];
Char result [MAX_LENGTH];
} Quaternion;
// A four-element array stores the generated four-element array.
Quaternion quaternion [MAX_LENGTH * 2];
// Count the number of quadrants
Int count = 0;
// Stores the operands and temporary variables when traversing the abstract syntax tree in a descending order. It is used as a stack.
Char stack [MAX_LENGTH * 2] [MAX_LENGTH];
// Stack subscript
Int index = 0;
// Offset of the temporary data storage address in the memory
Int t =-4;
// Function declaration
ASTNode * E ();
ASTNode * E1 (ASTNode * E1_ I );
ASTNode * T ();
ASTNode * T1 (ASTNode * T1_ I );
ASTNode * F ();
Void error (char * errerMessage );
ASTNode * mknode (Symbol op, ASTNode * left, ASTNode * right );
ASTNode * mkleaf (Symbol sym, ValType val );
Void yuyi_analyse ();
Void print_node (ASTNode * root );
Void middle_list (ASTNode * root );
Void last_list (ASTNode * root );
// Test Semantic Analysis
Void testYuyiAnalyse ();
// Create an operator Node
ASTNode * mknode (Symbol op, ASTNode * left, ASTNode * right );
// Create an operand Node
ASTNode * mkleaf (Symbol sym, ValType val );
// Output Node
Void printNode (ASTNode * root );
// Traverse a binary tree in the middle order
Void middle_list (ASTNode * root );
// Traverse Binary Trees in descending order
Void last_list (ASTNode * root );
/*
E: = T {E '. I: = T. nptr}
E' {E. nptr: = E'. s}
*/
// Define an attribute for each non-Terminator on the right and return the comprehensive attribute of E.
ASTNode * E ()
{
ASTNode * E_nptr;
ASTNode * E1_ I;
E1_ I = T ();
E_nptr = E1 (E1_ I );
Return E_nptr;
}
/*
E ': = + T {E '1. I: = mknode (' + ', E'. I, T. nptr )}
E '1 {E '. s: = E1.s}
E ': =-T {E '1. I: = mknode ('-', E'. I, T. nptr )}
E '1 {E '. s: = E1.s}
E': = ε {E '. s: = E'. I}
*/
// The returned result is a comprehensive property that is passed as an inheritance property.
ASTNode * E1 (ASTNode * E1_ I)
{
ASTNode * E11_ I;
ASTNode * E1_s;
ASTNode * T_nptr;
Char success;
If (strcmp (strToken, "+") = 0 | strcmp (strToken, "-") = 0 ){
Token = strToken [0];
GetToken ();
T_nptr = T ();
If (Signature = '+ ')
E11_ I = mknode (PLUS, E1_ I, T_nptr );
Else
E11_ I = mknode (MINUS, E1_ I, T_nptr );
E1_s = E1 (E11_ I );
}
// Follow (E1) = {#,)}, can match an empty string
Else if (strcmp (strToken, "#") = 0 | strcmp (strToken, ")") = 0 ){
E1_s = E1_ I;
} Else {
Error ("syntax analysis error! ");
}
Return E1_s;
}
/*
T: = F {t'. I: = F. nptr}
T' {T. nptr: = t'. s}
*/
ASTNode * T ()
{
ASTNode * T_nptr;
ASTNode * T1_ I;
T1_ I = F ();
T_nptr = T1 (T1_ I );
Return T_nptr;
}
/*
T': = * F {t' 1. I: = mknode ('*', T'. I, F. nptr )}
T' 1 {t'. s: = T1.s}
T': =/F {t' 1. I: = mknode ('/', T'. I, F. nptr )}
T' 1 {t'. s: = T1.s}
T': = ε {t'. s: = t'. I}
*/
ASTNode * T1 (ASTNode * T1_ I)
{
ASTNode * F_nptr;
ASTNode * T11_ I;
ASTNode * T1_s;
Char success;
If (strcmp (strToken, "*") = 0 | strcmp (strToken, "/") = 0 ){
Token = strToken [0];
GetToken ();
F_nptr = F ();
If (Bytes = '*')
T11_ I = mknode (TIMES, T1_ I, F_nptr );
Else
T11_ I = mknode (SLASH, T1_ I, F_nptr );
T1_s = T1 (T11_ I );
}
// Follow (T1) = {+, #,)}. If you want to add the-number, add the-number.
Else if (strcmp (strToken, "-") = 0 | strcmp (strToken, "+") = 0 | strcmp (strToken ,"#") = 0 | strcmp (strToken, ")") = 0 ){
T1_s = T1_ I;
} Else {
Error ("syntax analysis error! ");
}
Return T1_s;
}
/*
F ::= (E) {F. nptr: = E. nptr}
F ::= num {F. nptr: = mkleaf (num, num. val )}
*/
ASTNode * F ()
{
ASTNode * F_nptr;
ASTNode * E_nptr;
If (isNum (strToken )){
F_nptr = mkleaf (NUM, atoi (strToken ));
GetToken ();
}
Else {
If (strcmp (strToken, "(") = 0 ){
GetToken ();
E_nptr = E ();
If (strcmp (strToken, ")") = 0)
GetToken ();
Else
Error ("syntax analysis error! ");
F_nptr = E_nptr;
}
Else {
Error ("syntax analysis error! ");
}
}
Return F_nptr;
}
// Create an operator Node
ASTNode * mknode (Symbol op, ASTNode * left, ASTNode * right)
{
ASTNode * p = (ASTNode *) malloc (sizeof (ASTNode ));
P-> left = left;
P-> right = right;
P-> sym = op;
P-> val = 0;
Return p;
}
// Create an operand Node
ASTNode * mkleaf (Symbol sym, ValType val)
{
ASTNode * p = (ASTNode *) malloc (sizeof (ASTNode ));
P-> sym = sym;
P-> val = val;
P-> left = NULL;
P-> right = NULL;
Return p;
}
// Output Node
Void printNode (ASTNode * root)
{
If (root-> sym = NUM)
Printf ("% d", root-> val );
Else if (root-> sym = PLUS)
Printf ("+ ");
Else if (root-> sym = MINUS)
Printf ("-");
Else if (root-> sym = TIMES)
Printf ("*");
Else if (root-> sym = SLASH)
Printf ("/");
}
// Traverse a binary tree in the middle order
Void middle_list (ASTNode * root)
{
If (root = NULL)
Return;
Middle_list (root-> left );
PrintNode (root );
Middle_list (root-> right );
}
// Traverse Binary Trees in descending order
Void last_list (ASTNode * root)
{
Char temp [MAX_LENGTH];
If (root = NULL)
Return;
Last_list (root-> left );
Last_list (root-> right );
If (root-> sym = NUM) {// if it is a number, it is directly stored in the stack
Sprintf (temp, "% d \ 0", root-> val );
Strcpy (stack [index ++], temp );
}
Else if (root-> sym = PLUS) {// if it is a + sign, a quad-element formula is generated.
// Assign a value to the element
Quaternion [count]. op = '+ ';
Strcpy (quaternion [count]. arg2, stack [-- index]);
Strcpy (quaternion [count]. arg1, stack [-- index]);
Sprintf (quaternion [count]. result, "t + % d \ 0", t + = 4 );
Strcpy (stack [index ++], quaternion [count]. result );
// Output the quad-Element
Printf ("%-4c %-8 s %-8 s %-8s \ n", quaternion [count]. op, quaternion [count]. arg1, quaternion [count]. arg2, quaternion [count]. result );
Count ++;
} Else if (root-> sym = MINUS) {// if it is a + sign, a quad-element formula is generated.
Quaternion [count]. op = '-';
Strcpy (quaternion [count]. arg2, stack [-- index]);
Strcpy (quaternion [count]. arg1, stack [-- index]);
Sprintf (quaternion [count]. result, "t + % d \ 0", t + = 4 );
Strcpy (stack [index ++], quaternion [count]. result );
Printf ("%-4c %-8 s %-8 s %-8s \ n", quaternion [count]. op, quaternion [count]. arg1, quaternion [count]. arg2, quaternion [count]. result );
Count ++;
} Else if (root-> sym = TIMES) {// if the number is *, a quad-element formula is generated.
Quaternion [count]. op = '*';
Strcpy (quaternion [count]. arg2, stack [-- index]);
Strcpy (quaternion [count]. arg1, stack [-- index]);
Sprintf (quaternion [count]. result, "t + % d \ 0", t + = 4 );
Strcpy (stack [index ++], quaternion [count]. result );
Printf ("%-4c %-8 s %-8 s %-8s \ n", quaternion [count]. op, quaternion [count]. arg1, quaternion [count]. arg2, quaternion [count]. result );
Count ++;
} Else if (root-> sym = SLASH ){
Quaternion [count]. op = '/';
Strcpy (quaternion [count]. arg2, stack [-- index]);
Strcpy (quaternion [count]. arg1, stack [-- index]);
Sprintf (quaternion [count]. result, "t + % d \ 0", t + = 4 );
Strcpy (stack [index ++], quaternion [count]. result );
Printf ("%-4c %-8 s %-8 s %-8s \ n", quaternion [count]. op, quaternion [count]. arg1, quaternion [count]. arg2, quaternion [count]. result );
Count ++;
}
}
// Test Semantic Analysis
Void testYuyiAnalyse ()
{
ASTNode * root;
Expression_index = 0;
GetToken ();
Root = E ();
Puts ("\ n semantic analysis results :");
Printf ("sequential traversal :");
Middle_list (root );
Putchar ('\ n ');
Printf ("four elements obtained by post-order traversal: \ n ");
Last_list (root );
Putchar ('\ n ');
}
// Main Function
Int main ()
{
Gets (expression );
TestYuyiAnalyse ();
Return 0;
}
# Endif
Running result
7. Code Generation
Requirement: Take the four-element output of the semantic analysis program in experiment 3 as the input and output the assembly language program.
Test4.cCopy codeThe Code is as follows: # ifndef TEST4_C
# Define TEST4_C
/**
* Production assembly code
**/
# Include "test3.c"
// Upload a four-element structure to output the corresponding assembly code
Void print_code (Quaternion qua)
{
Putchar ('\ n ');
/*
Mov eax, 3
Add eax, 4
Mov t + 0, eax
*/
If (qua. op = '+ '){
Printf ("mov eax, % s \ n", qua. arg1 );
Printf ("add eax, % s \ n", qua. arg2 );
Printf ("mov % s, eax \ n", qua. result );
} Else if (qua. op = '-'){
Printf ("mov eax, % s \ n", qua. arg1 );
Printf ("sub eax, % s \ n", qua. arg2 );
Printf ("mov % s, eax \ n", qua. result );
}
/*
Mov eax, 2
Mov ebx, t + 0
Mul ebx
Mov t + 4, eax
*/
Else if (qua. op = '*'){
Printf ("mov eax, % s \ n", qua. arg1 );
Printf ("mov ebx, % s \ n", qua. arg2 );
Printf ("mul ebx \ n ");
Printf ("mov % s, eax \ n", qua. result );
} Else if (qua. op = '/') {// the remainder is not considered during division.
Printf ("mov eax, % s \ n", qua. arg1 );
Printf ("mov ebx, % s \ n", qua. arg2 );
Printf ("div ebx \ n ");
Printf ("mov % s, eax \ n", qua. result );
}
}
// Output all assembly code
Void testCode ()
{
Int I = 0;
Puts ("the generated assembly code is as follows: \ n ");
Puts (". 386 ");
Puts (". model flat ");
Puts ("ExitProcess PROTO NEAR32 stdcall, dwExitCode: DWORD ");
Puts ("INCLUDE io. h; header file for input/output ");
Puts ("cr EQU 0dh; carriage return character ");
Puts ("Lf EQU 0ah; line feed ");
Puts (". STACK 4096; reserve 4096-byte stack ");
Puts (". DATA; reserve storage for data ");
Puts ("t DWORD 40 DUP (?) ");
Puts ("label1 BYTE cr, Lf, \" The result is \"");
Puts ("result BYTE 11 DUP (?) ");
Puts ("BYTE cr, Lf, 0 ");
Puts (". CODE; start of main program code ");
Puts ("_ start :");
// Traverse the four elements in experiment 3 and output the corresponding assembly code
For (; I <count; I ++)
Print_code (quaternion [I]);
Puts ("dtoa result, eax; convert to ASCII characters ");
Puts ("output label1; output label and sum ");
Puts ("INVOKE ExitProcess, 0; exit with return code 0 ");
Puts ("PUBLIC _ start; make entry point public ");
Puts ("END; end of source code ");
}
// Main Function
Int main ()
{
Gets (expression );
TestLexAnalyse ();
TestYuyiAnalyse ();
TestCode ();
Return 0;
}
# Endif
Running result
8,Click to download source code