Once we know the rules of grammar, we will analyze all the symbols, operators, identifiers (IDs), literal values (NB), and keywords. Where the symbols, operators, keywords are a one-character class, we need to write a program to analyze each of the characters, and finally get a token sequence, marker table and a literal list. Gossip less, look at the code (the following program uses vs2013 compilation)
#define _crt_secure_no_warnings#include <stdio.h> #include <stdlib.h> #include <iostream> #include <string> #include <vector>using namespace std;//token struct struct Token{token (string str, int pos) {this-> Type = Str;this->pos = pos;} String Type;int pos;}; vector<string> idtable;//Global variables, lexical analysis of the resulting identifier table vector<int> nbtable;//global variables, lexical analysis of the resulting INTC value table # define Error_ INVALID_SYMBOL 1//Macro definition error: illegal symbol # define NO_ERROR 0//macro definition: no error int error = no_error;//global variable, record error type in lexical analysis/* * Determine if character is not other character */bool Isother (char ch) {if (ch >= ' a ' &&ch <= ' z ') return false;if (Ch >= ' a ' &&ch <= ' z ') return false;i F (Ch >= ' 0 ' &&ch <= ' 9 ') return False;return true; /* * Determine if the string is the keyword */bool iskeyword (string str) {if (str = = "integer") return true;if (str = = "char") return true;if (str = = "PR Ogram ") return true;if (str = =" Array ") return true;if (str = =" of ") return true;if (str = =" Record ") return true;if (str = =" End ") return true;if (str = =" var ") return true;if (str = =" PRocedure ") return true;if (str = =" Begin ") return true;if (str = =" if ") return true;if (str = =" then ") return true;if (str = = "Else") return true;if (str = = "fi") return true;if (str = = "while") return true;if (str = = "Do") return true;if (str = = "ENDW H ") return true;if (str = =" read ") return true;if (str = =" Write ") return true;if (str = =" return ") return true;if (str = =" Ty PE ") return True;return false;} /* * Returns the position of the string in the table without adding to the last, and returns the subscript */int addidtable (String str) {for (int i = 0; i < idtable.size (); i++) {if (str = = idtable[ I]) return i;} Idtable.push_back (str); return idtable.size ()-1;} /* * Returns the position of the number in the table, does not add to the last, and returns the subscript */int addnbtable (string str) {int num = atoi (STR.C_STR ()); for (int i = 0; i < nbtable.size ( ); i++) {if (num = = Nbtable[i]) return i;} Nbtable.push_back (num); return nbtable.size ()-1;} /* * Lexical analysis scanner, each call returns a Token pointer */token *scanner (FILE *pf) {char ch = 0;string Tmpstr = ""; Token * PToken = NULL; ls0://the program toward ch = fgetc (PF) According to the first character, if (ch! = EOF) {if (ch >= ' A ' &&ch <= ' Z ') | | (Ch >= ' a ' && ch <= ' z ')) goto ls1;if (ch >= ' 0 ' &&ch <= ' 9 ') goto ls2;if (ch = = ' + ') goto ls3;if (ch = = '-') goto ls4;if (ch = = ' * ') goto ls5;if (ch = = '/') goto ls6;if (ch = = ' < ') goto ls7;if (ch = = '; ') Goto ls8;if (ch = = ': ') goto ls9;if (ch = = ', ') goto ls10;if (ch = = '. ') Goto ls11;if (ch = = ' = ') goto ls12;if (ch = = ' [') goto ls13;if (ch = = '] ') goto ls14;if (ch = = ' (') goto ls15;if (ch = = ') ') go To ls16;if (ch = = ' | | ch = = ' \ n ' | | ch = = ' \ r ' | | ch = = ' \ t ') goto Ls17;goto LS18;} Else{return NULL;} ls1://identifiers and keywords {tmpstr + = Ch;ch = fgetc (PF); if (ch >= ' A ' &&ch <= ' Z ') | | (Ch >= ' a ' && ch <= ' z ') | | (Ch >= ' 0 ' &&ch <= ' 9 ')) Goto ls1;if (Isother (ch)) {ungetc (ch, pf);//The Read word descriptors back to the file stream if (Iskeyword (TMPSTR)) {//keyword Ptoken = new Token ("$" +tmpstr,-1) ; return PToken;} else{//is not a keyword, is the identifier int pos = addidtable (tmpstr);p token = new token ("$id", POS); return PToken;}}} ls2://Number {Tmpstr + = Ch;ch = fgetc (PF); if (ch >= ' 0 ' &&ch <= ' 9 ') goto LS2;IF (Isother (CH)) {ungetc (ch, pf); int pos = addnbtable (tmpstr);p token = new token ("$INTC", POS); return pToken;}} ls3://' + ' {pToken = new Token ("$+",-1); return pToken;} ls4://'-' {pToken = new Token ("$-",-1); return pToken;} ls5://' * ' {pToken = new Token ("$*",-1); return pToken;} ls6://'/' {pToken = new Token ("$/",-1); return pToken;} ls7://' < ' {PToken = new Token ("$<",-1); return pToken;} ls8://'; ' {PToken = new Token ("$;",-1); return pToken;} ls9://': ' {ch = fgetc (PF), if (ch = = ' = ') {PToken = new Token ("$:=",-1); return pToken;} if (Isother (CH)) {error = error_invalid_symbol;printf ("ERR:": "not a valid symbol \ n"); return NULL;}} ls10://', ' {pToken = new Token ("$comma",-1); return pToken;} ls11://'. ' {PToken = new Token ("$.",-1); return pToken;} ls12://' = ' {PToken = new Token ("$=",-1); return pToken;} ls13://' [' {pToken = new Token ("$[",-1); return pToken;} ls14://'] ' {pToken = new Token ("$]",-1); return pToken;} ls15://' (' {pToken = new Token ("$ (",-1); return pToken;} ls16://') ' {pToken = new Token ("$)",-1); return pToken;} Ls17://whitespace character {goto LS0;} Ls18://other{error = error_invalid_symbol;//Set global variable errorprintf ("error: Contains illegal symbol (%c) \ n", ch);//Print error message return NULL;}} /* Main function, if ARGC is greater than or equal to 2,argv[1] is the source file name, otherwise the default is Snl.txt */int main (int argc, char * * argv) {//Set source file string Filename;if (argc >= 2) filename = Argv[1];elsefilename = "Snl.txt";//Open source file *PF = fopen (Filename.c_str (), "R"), if (!PF) {printf ("Open file failed!\n") ; return-1;} Vector <Token> result; Token * PToken = Null;while ((PToken = Scanner (PF)) = null) {Result.push_back (*ptoken);d elete ptoken;ptoken = null;} Fclose (PF), if (Error = = no_error) {pf = fopen ((filename + ". Token"). C_STR (), "w+"); for (auto &iter:result) {if (iter.t ype = = "$id" | | Iter.type = = "$INTC") {printf ("(%-10s, [%2d]) \ n", Iter.type.c_str (), Iter.pos), fprintf (PF, "(%s,[%d]) \ n", iter.type.c_ STR (), iter.pos);} Else{printf ("(%-10s, \" \ ") \ n", Iter.type.c_str ()), fprintf (PF, "(%s,\" \ ") \ n", Iter.type.c_str ());} Fclose (PF);p f = fopen ((filename + ". IDTable"). C_str (), "w+"); for (int i = 0; i < idtable.size ();i++) {fprintf (pf, "%s\n", Idtable[i].c_str ());} Fclose (PF);p f = fopen ((filename + ". Nbtable"). C_str (), "w+"); for (int i = 0; i < nbtable.size (); i++) {fprintf (pf, "%d\n ", Nbtable[i]);} Fclose (PF);} System ("Pause");}
The program outputs the token sequence, as well as the indicator table and the literal tables
Token table file name is called *.token file
The other two tables are not used in parsing, and the resulting file names are *.idtable and *.nbtable, respectively.
Lexical analyzers for SNL grammars