Http://www.cnblogs.com/xuqiang/archive/2010/09/21/1953501.html
Main.java
* * * Main program/import java.io.*; Import lexer.*; public class Main {public static void main (string[] args) throws IOException {Lexer Lexer = new Lexer (); while (lexer.ge Treaderstate () = = False) {Lexer.scan ();}/* Save relevant information/Lexer.savetokens (); Lexer.savesymbolstable (); } }
Lexer.java
Package lexer; Import java.io.*; Import java.util.*; Import symbols.*; public class Lexer {public static int line = 1;/* Record line number */char peek = ';/* Next read character/hashtable<string, word> W ords = new hashtable<string, word> (); /* Symbol table */private hashtable<token, string> table = new Hashtable<token, string> (); /* Token sequence */private list<string> tokens = new linkedlist<string> (); /* Read File variable * * BufferedReader reader = null; /* Save whether the current read to the end of the file/private Boolean isend = false; /* Read to the end of the file/public Boolean getreaderstate () {return this.isend}/* Save the */public void savesymbolstable () stored in the table Throws IOException {FileWriter writer = new FileWriter ("symbol table. txt"); Writer.write ("[Symbol] [Symbol type information]\n"); Writer.write ("\ r \ n" ); Enumeration<token> e = Table.keys (); while (E.hasmoreelements ()) {Token Token = (Token) e.nextelement (); String desc = table.get (token); /* Write File * * Writer.write (token + "\t\t\t" + desc + "\ r \ n"); } writer.flush (); }/* Save tokens */Publicvoid Savetokens () throws IOException {FileWriter writer = new FileWriter ("tokens table. txt"); Writer.write ("[symbol] \ n"); writer. Write ("\ r \ n"); for (int i = 0; i < tokens.size (); ++i) {String tok = (String) tokens.get (i);/* Write File/writer.write (tok + "\ r \ n");} W Riter.flush (); } void Reserve (Word W) {Words.put (W.lexme, w);}/* constructor add keyword and type to hashtable words/public Lexer () {* * Initialize read file variable * * try {reader = new BufferedReader (new FileReader ("input. txt");} catch (IOException e) {System.out.print (e);}/* keyword */th Is.reserve (New Word ("If", Tag.if)); This.reserve (New Word ("then", Tag.then)); This.reserve (New Word ("Else", Tag.else)); This.reserve (New Word ("while", Tag.while)); This.reserve (New Word ("Do", tag.do)); /* Type * * This.reserve (word.true); This.reserve (Word.false); This.reserve (Type.int); This.reserve (Type.char); This.reserve (Type.bool); This.reserve (type.float);end = true; }//Peek = (char) System.in.read (); Public Boolean readch (char ch) throws IOException {READCH (), if (This.peek!= ch) {return false;} This.peek = '; re Turn true; Public Token Scan () throws IOException {/* eliminate whitespace */for (;; Readch ()) {if (peek = = ' | | | = PEEK = = ' t ') continue; else if (peek = = ' \ n ') line = line + 1; else break; /* Below start to split keywords, identifiers and other information * * Switch (PEEK) {/* for = =, >=, <=,!= distinction using the state machine to achieve * * case ' = ': if (readch (' = ')) {Tokens.add ( "=="); return word.eq; else {tokens.add ("="); return the new Token (' = ');} case ' > ': if (readch (' = ')) {Tokens.add (">="); else {tokens.add (">"), Return new Token (' > '), Case ' < ': if (readch (' = ')) {Tokens.add ("<="), return Word. Le else {Tokens.add ("<"); return to New Token (' < ');} case '! ': if (readch (' = ')) {Tokens.add ("!="); else {tokens.add ("!"); return new Token ('! ');} /* The following is the recognition of the number, according to the grammatical rules, here the * number as long as it can recognize the integer on the line. */if (Character.isdigit (peek)) {int Value = 0; Do {value = ten * value + character.digit (Peek, ten); Readch (); while (Character.isdigit (Peek)); num n = new num (value); Tokens.add (N.tostring ()); Table.put (N, "Num"); return n; }/* keyword or identifier identification */if (Character.isletter (Peek)) {StringBuffer sb = new StringBuffer ();/* First get the entire partition/do {Sb.appe nd (PEEK); READCH (); while (Character.isletterordigit (Peek)); /* judgment is the keyword or identifier */String s = sb.tostring (); Word w = (word) words.get (s); /* If it is a keyword or type, w should not be empty */if (w!= null) {//Table.put (W, "KeyWord or type"); Tokens.add (w.tostring ()); return w;/* description is keyword or type name * * * Otherwise it is an identifier ID/w = new Word (s, tag.id); Tokens.add (W.tostring ()); Table.put (W, "id"); Words.put (S, W); Return w; /* Any character in Peek is considered to be a lexical unit return */Token tok = new Token (peek); Table.put (Tok, "Token or seprator"); if (int) Peek!= 0xffff) Tokens.add (tok.tostring ()); Peek = '; return Tok; } }
Num.java
Package lexer; public class Num extends token{the public final int value, public Num (int v) {super (tag.num), this.value = v} public strin G ToString () {return "" + Value;}}
Tag.java
Package lexer; public class Tag {public final static int/= 256, BASIC = 257, break = 258, do = 259, ELSE =?, EQ = 261,/* = = =/F Alse = 262, GE = 263, ID = 264, IF = 265, INDEX = 266, LE = 267, minus = 268, NE = MB, NUM = 270, or = 271, real = 272, T EMP = 273, TRUE = 274, while = 275,/* after adding */THEN = 276; }
Token.java
Package lexer; public class Token {public final int tag; Token (int t) {this.tag = t;} public String toString () {return "" + (c) HAR) tag; public static void Main (string[] args) {Token tok = new Token (' a '); System.out.println (Tok); } }
Word.java
/* Class word is used to manage reserved words, identifiers, and compound word elements such as &&. * * Package lexer; public class Word extends Token {public String lexme = ""; Public Word (String s, int t) {super (t); this.lexme = S.} public String toString () {return this.lexme;} public static Final word and = new Word ("&&", tag.and), or = new Word ("| |", tag.or), eq = new Word ("= =", tag.eq), NE = new word ("!=", tag.ne), le = new Word ("<=", tag.le), GE = new Word (">=", tag.ge), minus = new word ("minus", Tag.minus), True = new Word ("true", tag.true), False = new Word ("False", tag.false), temp = new Word ("T", tag.temp); }
Type.java
* * Description Data type * * Package symbols; Import lexer.*; public class type extends word{the public type (String s, int tag) {Super (S, tag);} The public static final Type int = new Type ( "int", tag.basic), Float = new Type ("float", tag.basic), Char = new Type ("char", tag.basic), Bool = new Type ("bool", Tag. BASIC); }
============
http://freewxy.iteye.com/blog/870016
What is lexical.
The so-called lexical, the source code is composed of character streams, which include keywords, variable names, method names, parentheses, and so on, where variable names can not contain punctuation, cannot begin with numbers with letters, and so on, which is lexical;
What is lexical analysis.
The lexical analysis phase is the first phase of the compilation process. The task at this stage is to read the source program from left to right, one character at a time, i.e. to scan the stream of characters that compose the source program and recognize the word (also known as a word symbol or symbol) according to the word-formation rule.
Lexical of the simple language to be analyzed:
1) Key Words
Begin if then while doing end
2 Operators and bounds
: = +-*/< <= > >= <> =; ( ) #
3 other words are identifiers (IDs) and reshaping constants (NUM), defined by the following formal definitions:
Id=letter (letter|digit) *
num=digitdigit*
4 spaces are made up of blanks, tabs, and line breaks. Spaces are generally used to separate IDs, NUM, operators, bounds, and keywords, and the lexical analysis phase is often ignored.
Category encoding for various word symbols
Word symbol |
Category Code |
Word symbol |
Category Code |
Begin |
1 |
: |
17 |
If |
2 |
:= |
18 |
Then |
3 |
< |
20 |
While |
4 |
<> |
21st |
Todo |
5 |
<= |
22 |
End |
6 |
> |
23 |
Letter (Letter|digit) * |
10 |
>= |
24 |
digitdigit* |
11 |
= |
25 |
+ |
13 |
; |
26 |
- |
14 |
( |
27 |
* |
15 |
) |
28 |
/ |
16 |
# |
0 |
The function of the lexical analysis program:
Input: Source program string for the given grammar
Output: A sequence consisting of two tuples (SYN, token, or sum).
SYN is the word category code;
Token for the stored word itself string;
Sum is the shaping constant.
For example: The source program begin X:=9;if X>0 then x:=2*x+1/3;end# after lexical analysis output the following sequence: (1,begin) ("X") (18,:=) (11,9) (;) (2,if) ...
Flow chart: