Lexical analysis of pl/0 language
First, the purpose of the experiment
Through the completion of lexical analysis procedures, understand the process of lexical analysis. Compile a read Word program, the lexical analysis of the pl/0 language, the input string form of the source program into a word symbol, that is, the basic reserved words, identifiers, constants, operators, the five major categories.
Second, the experimental environment
Operating system: Windows XP
Authoring Environment: Visual C + +, C-free, turbo C
Writing language: C language
Analytical language: pl/0
Third. contents of the experiment
Lexical analysis of the pl/0 language, the input string form of the source program is divided into a word symbol, the lexical description is as follows:
(1) Keyword: begin,call,const,do,end,if,odd,procedure,read,then,var,while,write
(2) Identifier: Used to denote a variety of names, must start with a letter less than 10-character composition
(3) Number: A number less than 14 digits in 0-9
(4) Operator: +,-,*,/,:=,<,<=,>,>=
(5) Bounded by:,,.,;, #
Table 1 Table of the corresponding types of word symbols
Word symbols |
Type |
+ |
Plus |
- |
Minus |
* |
Times |
/ |
Slash |
( |
Lparen |
) |
Rparen |
= |
Eql |
, |
Comma |
. |
Perio |
# |
Neq |
; |
Semicolon |
Begin |
Beginsym |
Pager |
Callsym |
Const |
Constsym |
Do |
Dosym |
End |
Endsym |
If |
Ifsym |
Odd |
Oddsym |
Procedure |
Proceduresym |
Read |
Readsym |
Then |
Thensym |
Var |
Varsym |
While |
Whilesym |
Write |
Writesym |
Getsym function Function:
(1) Filter space space in the lexical analysis is an indispensable interface, and in the parsing is useless, so must filter
(2) Identify the reserved word The main program defines a one-dimensional array of characters as elements word, which is called the reserved word table. This table is checked for letters and numeric strings that begin with letters. If the check is recognized as a reserved word, the corresponding category is placed in the sym. If the corresponding value of Ifsym,then is the corresponding value of Thensym. If not, the user-defined identifier is considered
(3) identify reserved words to user-defined identifiers put ident in Sym, the value of the identifier itself is placed in the ID
(4) Spelling when scanning to a number string, convert the decimal number as a string into a binary number, and then place the number in the Sym, and the value itself is placed in num
(5) The combination of compound words to two characters of the operator, such as: >=,: =, <=, etc., after the recognition of the category sent to Sym
(6) Output source program for the side read into the character edge output (can be output in the file)
Fourth. Experimental results
The content to be analyzed is as follows:
Const A=36;
var c,d;
Procedure P;
Begin
Var G;
g:=1234567890000000;
Write (g);
End;
Begin
Read (c,d);
If C<=d then c:=a;
Write (c,d);
Call P;
End.
Pl0.h header File
#define NORW /* keyword number */
#define The maximum number of digits of Nmax//number
#define AL //symbol The most length symbol is +-god horse's
# Define Cxmax //maximum number of virtual machine code
enum symbol {
nul, ident, number, plus, minus, times, Slash, Oddsym, EQL, Neq,LSS, Leq, GTR, Geq, Lparen, Rparen, comma, semicolon, period, becomes, Beginsym, Endsym, Ifsym, Thensym, Whilesym,
Writesym, Readsym, Dosym,
callsym, Constsym, Varsym, Procsym,
};
file* fa1; The output parses the file and the first address is the virtual machine pointer
Char ch; Getch Read the character
enum symbol sym;
Char id[al+1]; The current ident
int num;
int cc, LL; Getch counter
int cx; Virtual machine code pointer, value range 0-cxmax-1
char line[81];
Char a[al+1]; Read a symbol temporarily present here
Char Word[norw][al]; The reserved word 13 is the enum symbol WSYM[NORW] of the begin end if what, and/
/reserved word corresponding to the symbol begin corresponds to beginsym
enum symbol ssym[256]; Single-character the symbol value
file* fin;
file* Fout;
Char Fname[al]; Enter the file name
int err;
#define GETCHDO if ( -1==getch ()) return-1;
void error (int n);
Main function
#include <stdio.h> #include <string.h> #include "pl0.h" void error (int n) {char space[81];
Memset (Space, 32, 81);
Space[cc-1] = 0;
printf ("* * * ERROR \ n");
fprintf (FA1, "* * * ERROR \ n");
err++;
} int Getch () {if (cc = = ll) {if (feof (Fin)) {printf ("read out");
return-1;
} ll = 0;
CC = 0;
printf ("%d", CX);
fprintf (FA1, "%d", CX);
ch = ";
while (ch! =) {if (FSCANF (Fin, "%c", &ch) = = EOF) {Line[ll] = 0;
Break
} printf ("%c", ch);
fprintf (FA1, "%c", ch);
LINE[LL] = ch;
ll++;
} printf ("\ n");
fprintf (FA1, "\ n");
} ch = line[cc];
cc++;
return 0;
} int Getsym () {int I, j, K;
while (ch = = "| | ch = = | | ch = = 9) {getchdo;
} if (ch >= ' a ' && ch <= ' Z ') { k = 0;
do {if (K < AL) {a[k] = ch;
k++;
} Getchdo;
}while (Ch >= ' a ' && ch <= ' z ' | | ch >= ' 0 ' && ch <= ' 9 ');
A[k] = 0;
strcpy (ID, a);
i = 0;
j = norw-1;
do {k = (i+j)/2;
if (strcmp (ID, word[k]) <= 0) j = k-1;
if (strcmp (ID, word[k]) >= 0) i = k+1;
}while (i <= J);
if (I-1 > J) {sym = wsym[k];
} else {sym = ident;
}} else {if (ch >= ' 0 ' && ch <= ' 9 ') {k = 0;
num = 0;
sym = number;
do {num = num * + CH-' 0 ';
k++;
Getchdo; }while (Ch >= ' 0 ' && ch <= ' 9 ');
k--;
if (k > Nmax) {error (30);
}} else {if (ch = = ': ') {getchdo;
if (ch = = ' = ') {sym = becomes;
Getchdo;
} else {sym = nul; }} else {if (ch = = ' < ') {GETC
Hdo
if (ch = = ' = ') {sym = Leq;
Getchdo;
} else {sym = LSS;
}} else {if (ch = = ' > ') {
Getchdo;
if (ch = = ' = ') {sym = Geq;
Getchdo;
} else {sym = gtr;
}} else {sym = Ssym[ch];
if (sym! = period) {Getchdo;
} else return-1;
}}}}} return 0;
} void init () {int i;
for (int i = 0; i < i++) ssym[i] = nul;
ssym[' + '] = plus;
ssym['-'] = minus;
ssym[' * ') = times;
ssym['/'] = slash;
ssym[' (') = Lparen;
ssym[') = Rparen;
ssym[' = '] = EQL;
Ssym[', '] = comma;
ssym['. '] = period;
ssym[' # '] = NEQ;
Ssym['; '] = semicolon;
strcpy (& (Word[0][0]), "Begin");
strcpy (& (Word[1][0]), "call");
strcpy (& (Word[2][0]), "const"); strcpy (& (Word[3][0]), "do");
strcpy (& (Word[4][0]), "end");
strcpy (& (Word[5][0]), "if");
strcpy (& (Word[6][0]), "odd");
strcpy (& (Word[7][0]), "procedure");
strcpy (& (Word[8][0]), "read");
strcpy (& (Word[9][0]), "then");
strcpy (& (Word[10][0]), "Var");
strcpy (& (Word[11][0]), "while");
strcpy (& (Word[12][0]), "write");
Wsym[0] = Beginsym;
WSYM[1] = Callsym;
WSYM[2] = Constsym;
WSYM[3] = Dosym;
WSYM[4] = Endsym;
WSYM[5] = Ifsym;
WSYM[6] = Oddsym;
WSYM[7] = Procsym;
WSYM[8] = Readsym;
WSYM[9] = Thensym;
WSYM[10] = Varsym;
WSYM[11] = Whilesym;
WSYM[12] = Writesym;
} int main () {printf ("Please enter the file name to parse \ n");
scanf ("%s", fname);
Fin = fopen (fname, "R");
if (Fin) {printf ("Please enter a file name to save \ n");
scanf ("%s", fname);
FA1 = fopen (fname, "w");
Init ();
Err = 0;
CC = CX = LL = 0;
ch = ";
while (Getsym ()! =-1) {} printf ("Parse complete");
} else {printf ("File not found \ n");
} printf ("\ n");
return 0; }