1. Lexical analyzer
Function: Read the input characters of the source program and form them into a word base to generate and output a lexical unit sequence.
Ii. Design Principles
1) symbol classification of c programming language: keywords, identifiers, constants, operators, operators
2) lexical analyzer Dual Output: <word type, word symbol attribute value>
3) Regular Expression and state transition diagram
4) program description:
1> open the source code file in main and read from the first worker stream.
2> If the first character is a character, it is handed over to letterprocess (STR );
3> If the first digit is a number, it is handed over to numberprocess (STR );
4> If the first digit is a number, it is handed over to otherprocess (STR );
5> note that in the above process, each time file * FP reads a word, FP moves to the next word. Processing of spaces: isspace (CH) checks whether parameter C is a space character, that is, whether it is a space ('') or a positioning character
('\ T'), Cr (' \ R'), line feed ('\ n'), vertical positioning character (' \ V'), or flip ('\ F ') status
Summary of output results of this program: keywords, Arithmetic Operators, Relational operators, segmentation symbols, special symbols, annotation symbols, logical operators, and invalid symbols
3. program source code
# Include <stdio. h> # include <stdlib. h> # include <string. h> # include <ctype. h> # include <malloc. h> # include <conio. h> # define null 0 file * FP; char ch; char * keyword [34] = {"Auto", "break", "case", "char ", "const", "continue", "default", "do", "double", "else", "Enum", "extern", "float", "", "Goto", "if", "int", "long", "register", "Return", "short", "Signed", "sizeof", "static ", "struct", "Switch", "typedef", "printf", "Union", "UN Signed "," Void "," volatile "," while "," Main "}; char * operatornum [6] = {" + ","-","*", "/", "++", "--"}; char * comparison [8] = {"<", "<=", "=", "> ", "> =", "<>", "= ","! = "}; Char * interpunction [8] = {", ","; ",": = ",". "," (",") "," {","} "}; char * biaoshifu [6] = {" % "," $ "," ^ ", "&", "_", "#"}; // special identifier char * zhussce [3] = {"//","/*", "*/"}; // annotator char * Luoji [3] = {"&", "| ","! "}; // Logical operator bool search (char searchstr [], int wordtype) // symbol matching {int I; Switch (wordtype) {Case 1: for (I = 0; I <= 33; I ++) {If (strcmp (keyword [I], searchstr) = 0) Return (true) ;} break; Case 2: for (I = 0; I <= 5; I ++) {If (strcmp (operatornum [I], searchstr) = 0) Return (true);} break; case 3: for (I = 0; I <= 7; I ++) {If (strcmp (comparison [I], searchstr) = 0) Return (true );} break; Case 4: for (I = 0; I <= 7; I ++) {If (strcmp (interpunction [I], searchstr) = 0) Return (true);} break; Case 5: for (I = 0; I <= 5; I ++) {If (strcmp (biaoshifu [I], searchstr) = 0) Return (true);} break; Case 6: for (I = 0; I <= 2; I ++) {If (strcmp (zhussce [I], searchstr) = 0) Return (true) ;} break; Case 7: for (I = 0; I <= 2; I ++) {If (strcmp (Luoji [I], searchstr) = 0) Return (true) ;} break ;}return false ;} char letterprocess (char ch) // Letter Processing Function {int I =-1; char letter [20]; while (isalnum (CH )! = 0) {letter [++ I] = CH; CH = fgetc (FP);} letter [I + 1] = '\ 0'; If (search (letter, 1) {printf ("<% s, keyword> \ n", letter); // strcat (letter, "\ n "); // fputs ('<'Letter'> \ n', outp);} else {printf ("<% s, custom variable >\n", letter ); // strcat (letter, "\ n"); // fputs (letter, outp);} return (CH);} Char numberprocess (char ch) // number handler {int I =-1; char num [20]; while (isdigit (CH )! = 0) {num [++ I] = CH; CH = fgetc (FP);} If (isalpha (CH )! = 0) // The number is followed by the character {While (isspace (CH) = 0) {num [++ I] = CH; CH = fgetc (FP );} num [I + 1] = '\ 0'; printf ("error! Invalid identifier: % s \ n ", num); goto U;} num [I + 1] = '\ 0'; printf (" <% s, number> \ n ", num); U: Return (CH);} Char otherprocess (char ch) // other handler {int I =-1; char other [20]; If (isspace (CH )! = 0) {CH = fgetc (FP); goto U;} while (isspace (CH) = 0) & (isalnum (CH) = 0 )) {Other [++ I] = CH; CH = fgetc (FP);} Other [I + 1] = '\ 0'; If (search (Other, 2 )) printf ("<% s, Arithmetic Operator> \ n", other); else if (search (Other, 3) printf ("<% s, relational operator number> \ n ", other); else if (search (Other, 4) printf (" <% s, separator number> \ n ", other ); else if (search (Other, 5) printf ("<% s, special identifier> \ n", other); else if (search (Other, 6 )) printf ("<% s, annotator> \ n", other); else if (SEAR CH (Other, 7) printf ("<% s, logical operator number> \ n", other); else printf ("error! Invalid character: % s \ n ", other); U: Return (CH) ;}int main () {char STR; printf ("********************************** lexical analyzer * * ********************************* \ n "); if (FP = fopen ("source program .txt", "R") = NULL) printf ("source program cannot be opened! \ N "); else {STR = fgetc (FP); // read the character while (STR! = EOF) {If (isalpha (STR )! = 0) // If the isalpha character is contained in # include <cctype> STR = letterprocess (STR); else {If (isdigit (STR )! = 0) STR = numberprocess (STR); else STR = otherprocess (STR) ;}}; printf ("the lexical analysis is complete. Thank you for using it! \ N "); // printf (" click any key to exit! \ N ") ;}// c = getch (); Return 0 ;}