Lexical analyzers for SNL grammars

Source: Internet
Author: User

Once we know the rules of grammar, we will analyze all the symbols, operators, identifiers (IDs), literal values (NB), and keywords. Where the symbols, operators, keywords are a one-character class, we need to write a program to analyze each of the characters, and finally get a token sequence, marker table and a literal list. Gossip less, look at the code (the following program uses vs2013 compilation)

#define _crt_secure_no_warnings#include <stdio.h> #include <stdlib.h> #include <iostream> #include <string> #include <vector>using namespace std;//token struct struct Token{token (string str, int pos) {this-> Type = Str;this->pos = pos;} String Type;int pos;}; vector<string> idtable;//Global variables, lexical analysis of the resulting identifier table vector<int> nbtable;//global variables, lexical analysis of the resulting INTC value table # define Error_ INVALID_SYMBOL 1//Macro definition error: illegal symbol # define NO_ERROR 0//macro definition: no error int error = no_error;//global variable, record error type in lexical analysis/* * Determine if character is not other character */bool Isother (char ch) {if (ch >= ' a ' &&ch <= ' z ') return false;if (Ch >= ' a ' &&ch <= ' z ') return false;i F (Ch >= ' 0 ' &&ch <= ' 9 ') return False;return true; /* * Determine if the string is the keyword */bool iskeyword (string str) {if (str = = "integer") return true;if (str = = "char") return true;if (str = = "PR Ogram ") return true;if (str = =" Array ") return true;if (str = =" of ") return true;if (str = =" Record ") return true;if (str = =" End ") return true;if (str = =" var ") return true;if (str = =" PRocedure ") return true;if (str = =" Begin ") return true;if (str = =" if ") return true;if (str = =" then ") return true;if (str = = "Else") return true;if (str = = "fi") return true;if (str = = "while") return true;if (str = = "Do") return true;if (str = = "ENDW H ") return true;if (str = =" read ") return true;if (str = =" Write ") return true;if (str = =" return ") return true;if (str = =" Ty PE ") return True;return false;} /* * Returns the position of the string in the table without adding to the last, and returns the subscript */int addidtable (String str) {for (int i = 0; i < idtable.size (); i++) {if (str = = idtable[ I]) return i;} Idtable.push_back (str); return idtable.size ()-1;} /* * Returns the position of the number in the table, does not add to the last, and returns the subscript */int addnbtable (string str) {int num = atoi (STR.C_STR ()); for (int i = 0; i < nbtable.size ( ); i++) {if (num = = Nbtable[i]) return i;} Nbtable.push_back (num); return nbtable.size ()-1;} /* * Lexical analysis scanner, each call returns a Token pointer */token *scanner (FILE *pf) {char ch = 0;string Tmpstr = ""; Token * PToken = NULL; ls0://the program toward ch = fgetc (PF) According to the first character, if (ch! = EOF) {if (ch >= ' A ' &&ch <= ' Z ') | | (Ch &GT;= ' a ' && ch <= ' z ')) goto ls1;if (ch >= ' 0 ' &&ch <= ' 9 ') goto ls2;if (ch = = ' + ') goto ls3;if (ch = = '-') goto ls4;if (ch = = ' * ') goto ls5;if (ch = = '/') goto ls6;if (ch = = ' < ') goto ls7;if (ch = = '; ') Goto ls8;if (ch = = ': ') goto ls9;if (ch = = ', ') goto ls10;if (ch = = '. ') Goto ls11;if (ch = = ' = ') goto ls12;if (ch = = ' [') goto ls13;if (ch = = '] ') goto ls14;if (ch = = ' (') goto ls15;if (ch = = ') ') go To ls16;if (ch = = ' | | ch = = ' \ n ' | | ch = = ' \ r ' | | ch = = ' \ t ') goto Ls17;goto LS18;} Else{return NULL;} ls1://identifiers and keywords {tmpstr + = Ch;ch = fgetc (PF); if (ch >= ' A ' &&ch <= ' Z ') | | (Ch >= ' a ' && ch <= ' z ') | | (Ch >= ' 0 ' &&ch <= ' 9 ')) Goto ls1;if (Isother (ch)) {ungetc (ch, pf);//The Read word descriptors back to the file stream if (Iskeyword (TMPSTR)) {//keyword Ptoken = new Token ("$" +tmpstr,-1) ; return PToken;} else{//is not a keyword, is the identifier int pos = addidtable (tmpstr);p token = new token ("$id", POS); return PToken;}}} ls2://Number {Tmpstr + = Ch;ch = fgetc (PF); if (ch >= ' 0 ' &&ch <= ' 9 ') goto LS2;IF (Isother (CH)) {ungetc (ch, pf); int pos = addnbtable (tmpstr);p token = new token ("$INTC", POS); return pToken;}} ls3://' + ' {pToken = new Token ("$+",-1); return pToken;} ls4://'-' {pToken = new Token ("$-",-1); return pToken;} ls5://' * ' {pToken = new Token ("$*",-1); return pToken;} ls6://'/' {pToken = new Token ("$/",-1); return pToken;} ls7://' < ' {PToken = new Token ("$<",-1); return pToken;} ls8://'; ' {PToken = new Token ("$;",-1); return pToken;} ls9://': ' {ch = fgetc (PF), if (ch = = ' = ') {PToken = new Token ("$:=",-1); return pToken;} if (Isother (CH)) {error = error_invalid_symbol;printf ("ERR:": "not a valid symbol \ n"); return NULL;}} ls10://', ' {pToken = new Token ("$comma",-1); return pToken;} ls11://'. ' {PToken = new Token ("$.",-1); return pToken;} ls12://' = ' {PToken = new Token ("$=",-1); return pToken;} ls13://' [' {pToken = new Token ("$[",-1); return pToken;} ls14://'] ' {pToken = new Token ("$]",-1); return pToken;} ls15://' (' {pToken = new Token ("$ (",-1); return pToken;} ls16://') ' {pToken = new Token ("$)",-1); return pToken;} Ls17://whitespace character {goto LS0;} Ls18://other{error = error_invalid_symbol;//Set global variable errorprintf ("error: Contains illegal symbol (%c) \ n", ch);//Print error message return NULL;}} /* Main function, if ARGC is greater than or equal to 2,argv[1] is the source file name, otherwise the default is Snl.txt */int main (int argc, char * * argv) {//Set source file string Filename;if (argc >= 2) filename = Argv[1];elsefilename = "Snl.txt";//Open source file *PF = fopen (Filename.c_str (), "R"), if (!PF) {printf ("Open file failed!\n") ; return-1;} Vector <Token> result; Token * PToken = Null;while ((PToken = Scanner (PF)) = null) {Result.push_back (*ptoken);d elete ptoken;ptoken = null;} Fclose (PF), if (Error = = no_error) {pf = fopen ((filename + ". Token"). C_STR (), "w+"); for (auto &iter:result) {if (iter.t ype = = "$id" | | Iter.type = = "$INTC") {printf ("(%-10s, [%2d]) \ n", Iter.type.c_str (), Iter.pos), fprintf (PF, "(%s,[%d]) \ n", iter.type.c_ STR (), iter.pos);} Else{printf ("(%-10s, \" \ ") \ n", Iter.type.c_str ()), fprintf (PF, "(%s,\" \ ") \ n", Iter.type.c_str ());} Fclose (PF);p f = fopen ((filename + ". IDTable"). C_str (), "w+"); for (int i = 0; i < idtable.size ();i++) {fprintf (pf, "%s\n", Idtable[i].c_str ());} Fclose (PF);p f = fopen ((filename + ". Nbtable"). C_str (), "w+"); for (int i = 0; i < nbtable.size (); i++) {fprintf (pf, "%d\n ", Nbtable[i]);} Fclose (PF);} System ("Pause");}

The program outputs the token sequence, as well as the indicator table and the literal tables

Token table file name is called *.token file

The other two tables are not used in parsing, and the resulting file names are *.idtable and *.nbtable, respectively.


Lexical analyzers for SNL grammars

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.