一個簡單詞法分析器的實現代碼(java實現)

來源:互聯網
上載者:User

Main.java

/*<br /> * 主程式<br /> */<br />import java.io.*;<br />import lexer.*;</p><p>public class Main {<br />public static void main(String[] args) throws IOException {<br />Lexer lexer = new Lexer();</p><p>while (lexer.getReaderState() == false) {<br />lexer.scan();<br />}</p><p>/* 儲存相關資訊 */<br />lexer.saveTokens();<br />lexer.saveSymbolsTable();</p><p>}<br />}<br />

Lexer.java

package lexer;</p><p>import java.io.*;<br />import java.util.*;</p><p>import symbols.*;</p><p>public class Lexer {<br />public static int line = 1;/* 記錄行號 */<br />char peek = ' ';/* 下一個讀入字元 */<br />Hashtable<String, Word> words =<br />new Hashtable<String, Word>();<br />/* 符號表 */<br />private Hashtable<Token, String> table =<br />new Hashtable<Token, String>();<br />/* token序列 */<br />private List<String> tokens =<br />new LinkedList<String> ();<br />/* 讀取檔案變數 */<br />BufferedReader reader = null;<br />/* 儲存當前是否讀取到了檔案的結尾 */<br />private Boolean isEnd = false;</p><p>/* 是否讀取到檔案的結尾 */<br />public Boolean getReaderState() {<br />return this.isEnd;<br />}</p><p>/* 儲存儲存在table中的 */<br />public void saveSymbolsTable() throws IOException {<br />FileWriter writer = new FileWriter("符號表.txt");<br />writer.write("[符號][符號類型資訊]\n");<br />writer.write("\r\n");</p><p>Enumeration<Token> e = table.keys();<br />while( e.hasMoreElements() ){<br />Token token = (Token)e.nextElement();<br />String desc = table.get(token);</p><p>/* 寫入檔案 */<br />writer.write(token + "\t\t\t" + desc + "\r\n");<br />}</p><p>writer.flush();<br />}</p><p>/* 儲存Tokens */<br />public void saveTokens() throws IOException {<br />FileWriter writer = new FileWriter("Tokens表.txt");<br />writer.write("[符號]\n");<br />writer.write("\r\n");</p><p>for(int i = 0; i < tokens.size(); ++i) {<br />String tok = (String)tokens.get(i);</p><p>/* 寫入檔案 */<br />writer.write(tok + "\r\n");<br />}</p><p>writer.flush();<br />}</p><p>void reserve(Word w) {<br />words.put(w.lexme, w);<br />}</p><p>/*<br /> * 建構函式中將關鍵字和類型添加到hashtable words中<br /> */<br />public Lexer() {<br />/* 初始化讀取檔案變數 */<br />try {<br />reader = new BufferedReader(new FileReader("輸入.txt"));<br />}<br />catch(IOException e) {<br />System.out.print(e);<br />}</p><p>/* 關鍵字 */<br />this.reserve(new Word("if", Tag.IF));<br />this.reserve(new Word("then", Tag.THEN));<br />this.reserve(new Word("else", Tag.ELSE));<br />this.reserve(new Word("while", Tag.WHILE));<br />this.reserve(new Word("do", Tag.DO));</p><p>/* 類型 */<br />this.reserve(Word.True);<br />this.reserve(Word.False);<br />this.reserve(Type.Int);<br />this.reserve(Type.Char);<br />this.reserve(Type.Bool);<br />this.reserve(Type.Float);<br />}</p><p>public void readch() throws IOException {<br />/* 這裡應該是使用的是 */<br />peek = (char)reader.read();<br />if((int)peek == 0xffff){<br />this.isEnd = true;<br />}<br />// peek = (char)System.in.read();<br />}</p><p>public Boolean readch(char ch) throws IOException {<br />readch();<br />if (this.peek != ch) {<br />return false;<br />}</p><p>this.peek = ' ';<br />return true;<br />}</p><p>public Token scan() throws IOException {<br />/* 消除空白 */<br />for( ; ; readch() ) {<br />if(peek == ' ' || peek == '\t')<br />continue;<br />else if (peek == '\n')<br />line = line + 1;<br />else<br />break;<br />}</p><p>/* 下面開始分割關鍵字,標識符等資訊 */<br />switch (peek) {<br />/* 對於 ==, >=, <=, !=的區分使用狀態機器實現 */<br />case '=' :<br />if (readch('=')) {<br />tokens.add("==");<br />return Word.eq;<br />}<br />else {<br />tokens.add("=");<br />return new Token('=');<br />}<br />case '>' :<br />if (readch('=')) {<br />tokens.add(">=");<br />return Word.ge;<br />}<br />else {<br />tokens.add(">");<br />return new Token('>');<br />}<br />case '<' :<br />if (readch('=')) {<br />tokens.add("<=");<br />return Word.le;<br />}<br />else {<br />tokens.add("<");<br />return new Token('<');<br />}<br />case '!' :<br />if (readch('=')) {<br />tokens.add("!=");<br />return Word.ne;<br />}<br />else {<br />tokens.add("!");<br />return new Token('!');<br />}<br />}</p><p>/* 下面是對數位識別,根據文法的規定的話,這裡的<br /> * 數字只要是能夠識別整數就行.<br /> */<br />if(Character.isDigit(peek)) {<br />int value = 0;<br />do {<br />value = 10 * value + Character.digit(peek, 10);<br />readch();<br />} while (Character.isDigit(peek));</p><p>Num n = new Num(value);<br />tokens.add(n.toString());<br />//table.put(n, "Num");<br />return n;<br />}</p><p>/*<br /> * 關鍵字或者是標識符的識別<br /> */<br />if(Character.isLetter(peek)) {<br />StringBuffer sb = new StringBuffer();</p><p>/* 首先得到整個的一個分割 */<br />do {<br />sb.append(peek);<br />readch();<br />} while (Character.isLetterOrDigit(peek));</p><p>/* 判斷是關鍵字還是標識符 */<br />String s = sb.toString();<br />Word w = (Word)words.get(s);</p><p>/* 如果是關鍵字或者是類型的話,w不應該是空的 */<br />if(w != null) {<br />// table.put(w, "KeyWord or Type");<br />tokens.add(w.toString());<br />return w; /* 說明是關鍵字 或者是類型名 */<br />}</p><p>/* 否則就是一個標識符id */<br />w = new Word(s, Tag.ID);<br />tokens.add(w.toString());<br />table.put(w, "id");<br />words.put(s, w);</p><p>return w;<br />}</p><p>/* peek中的任一字元都被認為是詞法單元返回 */<br />Token tok = new Token(peek);<br />// table.put(tok, "Token or Seprator");<br />if ((int)peek != 0xffff )<br />tokens.add(tok.toString());<br />peek = ' ';</p><p>return tok;<br />}<br />}<br />

Num.java

package lexer;</p><p>public class Num extends Token{<br />public final int value;</p><p>public Num(int v) {<br />super(Tag.NUM);<br />this.value = v;<br />}</p><p>public String toString() {<br />return "" + value;<br />}<br />}<br />

Tag.java

package lexer;</p><p>public class Tag {<br />public final static int<br />AND= 256,<br />BASIC= 257,<br />BREAK= 258,<br />DO= 259,<br />ELSE= 260,<br />EQ= 261,/* == */<br />FALSE= 262,<br />GE= 263,<br />ID= 264,<br />IF= 265,<br />INDEX= 266,<br />LE= 267,<br />MINUS= 268,<br />NE= 269,<br />NUM= 270,<br />OR= 271,<br />REAL= 272,<br />TEMP= 273,<br />TRUE= 274,<br />WHILE= 275,<br />/* 後面添加 */<br />THEN= 276;<br />}<br />

Token.java

package lexer;</p><p>public class Token {<br />public final int tag;</p><p>public Token(int t) {<br />this.tag = t;<br />}</p><p>public String toString() {<br />return "" + (char)tag;<br />}</p><p>public static void main(String[] args) {<br />Token tok = new Token('a');<br />System.out.println(tok);<br />}<br />}<br />

Word.java

/*<br /> * 類word用於管理保留字,標識符以及像&&這樣的複合單詞元素 。<br /> */<br />package lexer;</p><p>public class Word extends Token {<br />public String lexme = "";</p><p>public Word (String s, int t) {<br />super(t);<br />this.lexme = s;<br />}</p><p>public String toString() {<br />return this.lexme;<br />}</p><p>public static final Word<br />and = new Word("&&", Tag.AND),<br />or = new Word("||", Tag.OR),<br />eq = new Word ("==", Tag.EQ),<br />ne = new Word("!=", Tag.NE),<br />le = new Word("<=", Tag.LE),<br />ge = new Word(">=", Tag.GE),<br />minus = new Word("minus", Tag.MINUS),<br />True = new Word("true", Tag.TRUE),<br />False = new Word("false", Tag.FALSE),<br />temp = new Word("t", Tag.TEMP);<br />}<br />

Type.java

/*<br /> * 說明資料類型<br /> */<br />package symbols;</p><p>import lexer.*;</p><p>public class Type extends Word{<br />public Type(String s, int tag) {<br />super(s, tag);<br />}</p><p>public static final Type<br />Int = new Type("int", Tag.BASIC),<br />Float = new Type("float", Tag.BASIC),<br />Char = new Type ("char", Tag.BASIC),<br />Bool = new Type("bool", Tag.BASIC);<br /> }<br />

相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.