C Language Lexical analyzer written in Java
This is the Java written C language Lexical analyzer, I also refer to a lot of code, and then the core code to organize, in the QQ Space and blog, the purpose is to learn from each other, hoping to get expert improvement. This lexical analyzer implements features that open files, save files, open Help documents, cut and copy and paste text field content, and perform lexical analysis
The project structure of the program, the word class and the Unidentifiable class are two JavaBean classes, the stored parameters are two row (int), Word (String), row is used to get the number of rows, word is used to get the identifier, Lexerframe is the interface class of the lexical parser, analyze encapsulates the core code for lexical analysis, and the Doc folder has a Help document that can be bounced to help users when they click the Help button.
Core program:
Package Com.lexer;import java.util.ArrayList;
/**
*1~20 number as the keyword, with the subscript, i+1 is its machine code, 21~40 number is the operator, with the subscript, I+21 is its machine code; 41~60 number is the delimiter,
* The subscript indicates that the I+41 is its machine code, the user-defined identifier, the machine code is 51, the constant machine code is 52, the identifier is not recognized, the machine code is 0
*/
public class Analyze {Key words
Private String keyword[]={' int ', ' long ', ' char ', ' if ', ' Else ', ' for ', ' while ', ' return ', ' break ', ' Continue ',
"Switch", "Case", "Default", "float", "double", "void", "struct", "static", "does", "short"};
Operator
Private String operator[]={"+", "-", "*", "/", "%", "=", ">", "<", "!", "= =", "! =", ">=", "<=", "+ +", "--", "&" , "&&", "| |", "[", "]"};
Delimiter
Private String delimiter[]={",", ";", "(", ")", "{", "}", "\", "\" ",": "," # "};
Public Analyze () {
}
/**
* Determine if it is a number
*/
public boolean isdigit (char ch) {
if (ch>= ' 0 ' &&ch<= ' 9 ') {
return true;
}else{
return false;
}
}
/**
* Determine if the function is a letter
*/
public boolean isletter (char ch) {
if ((ch>= ' a ' &&ch<= ' z ') | | (ch>= ' A ' &&ch<= ' Z ')) {
return true;
}else{
return false;
}
}
/**
* Determines whether it consists of two operators
*/
public boolean istwooperator (String Str,char ch) {
char LC;
int flag=0;
if (Str.length () >1| | Str.length () ==0) {//number of characters greater than 2 and no characters
return false;
}else{//the number of characters equals 2
Lc=str.charat (Str.length ()-1);
if (ch== ' = ' && (lc== ' > ' | | lc== ' < ' | | lc== ' = ' | | lc== '! ')) {
}else if (ch== ' + ' &&lc== ' + ') {
}else if (ch== '-' &&lc== '-') {
}else if (ch== ' | ') &&lc== ' | ') {
}else if (ch== ' & ' &&lc== ' & ') {
}else{
Return false;//No returns false
}
Return true;//the case of other symbols returns true
}
}
/**
* Get the machine code of the keyword
*/
public int getkeywordopcodes (String str) {
int i;
for (i=0;i<keyword.length;i++) {
if (Str.equals (Keyword[i]))
Break
}
if (i<keyword.length) {
Return i+1;//The machine code that returns the keyword
}else{
return 0;
}
}
/**
* Get the machine code of the operator
*/
public int getoperatoropcodes (String str) {
int i;
for (i=0;i<operator.length;i++) {
if (Str.equals (Operator[i]))
Break
}
if (i<operator.length)
Return i+21;//machine code for returning operator
Else
return 0;
}
/**
* Get the machine code of the delimiter
*/
public int getdelimiteropcodes (String str) {
int i;
for (i=0;i<delimiter.length;i++) {
if (Str.equals (Delimiter[i]))
Break
}
if (i<delimiter.length)
Return i+41;//machine code returning the delimiter
Else
return 0;
}
/**
* Determine if a character can be recognized
*/
public boolean isident (String str) {
Char ch;
int i;
For (I=0;i<str.length (); i++) {
Ch=str.charat (i);
Non-numeric strings and non-English letters
if ((I==0&&!isletter (ch)) | | (!isdigit (CH) &&!isletter (CH))) {
Break
}
}
if (I<str.length ()) {
return false;
}else{
return true;
}
}
/**
*
* Preprocessing function
*/
public string prefunction (String str) {
String ts= "";
int i;
Char Ch,nc;
Here's I<str.length ()-1
For (I=0;i<str.length () -1;i++) {
Ch=str.charat (i);
Nc=str.charat (i+1);
if (ch== ' \ n ') {///If the character is a line break, it will be \ n replaced by $
Ch= ' $ ';
Ts=ts+ch;
}else if (ch== ' | | | ch== ' \ r ' | | ch== ' \ t ') {
if (nc== ' | | | nc== ' \ r ' | | ch== ' \ t ') {
continue;//consecutive ' or ' \ t ' or ' \ R ' cases, skip directly
}else{
Ch= ';//a ' or ' \ t ' or ' \ R ', change these characters to '
Ts=ts+ch;
}
}else{
ts=ts+ch;//the word Fulianqi.
}
}
Ch=str.charat (Str.length ()-1);
if (ch!= ' &&ch!= ' \ R ' &&ch!= ' \ t ' &&ch!= ' \ n ') {
Ts=ts+ch;
}
return TS;
}
/**
* Divide the string into words, stored in the array list
*/
Public arraylist<word> Divide (String str) {
Arraylist<word> list=new arraylist<word> ();
String s= "";
Char ch;
int i;
int row=1;
For (I=0;i<str.length (); i++) {
Ch=str.charat (i);
if (i==0&&ch== ')//The first character of a string
Continue
if (ch== ') {//' or ' \ t ' or ' \ R '
if (s!= "") {
List.add (New Word (row, s));
S= "";//Empty
}else{
Continue
}
}else if (isdigit (ch) | | Isletter (CH)) {
if (s== "" | | IsDigit (S.charat (S.length ()-1)) | | Isletter (S.charat (S.length ()-1))) {
s = s + ch;
}else{
List.add (New Word (row, s));
s = "";
S=s + ch;
}
}else{
if (Istwooperator (S, ch)) {//two operator case
s = s + ch;
}else{
if (s== "" &&ch!= ' $ ') {
s = s + ch;
}else if (s== "" "&&ch== ' $ ') {//If the $ symbol is detected, line breaks
row++;//number of rows plus one
}else{
List.add (New Word (row, s));
s = "";
if (ch!= ' $ ') {
S=s + ch;
}else{
row++;
}
}
}
}
}
if (s!= "") {
List.add (New Word (row, s));
}
return list;
}
/**
* Determine if a string is a number string, a single character, or a string
*/
public int check (String str) {
Char ch;
Ch=str.charat (0);
if (ch>= ' 0 ' &&ch<= ' 9 ') {
return 1;//Digital string
}
if (Str.length () ==1)
Return 2;//single character
Else
Return 3;//A string
}
/**
*
* Check to see if string is a serial number, return its machine code
*/
public int checkdigit (String str) {
int i;
Char ch;
For (I=0;i<str.length (); i++) {
Ch=str.charat (i);
if (ch> ' 9 ' | | ch< ' 0 ')
Break
}
if (I<str.length ()) {
return 0;//Unrecognized condition
}else{
Return 52;//constant
}
}
/**
*
* Check if the string is a single character and return its machine code
*/
public int Checkchar (String str) {
if (Getoperatoropcodes (str)!=0) {//operator
return Getoperatoropcodes (str);
}else if (getdelimiteropcodes (str)!=0) {//delimiter
return Getdelimiteropcodes (str);
}else if (isident (str)) {
Return 51;//machine code for user-defined identifiers
}else{
Return 0;//identifier that cannot be recognized, machine code 0
}
}
/**
*
* Check if string is a string, return its machine code
*/
public int checkstring (String str) {
if (Getoperatoropcodes (str)!=0) {//operator
return Getoperatoropcodes (str);
}else if (getkeywordopcodes (str)!=0) {//keyword
return Getkeywordopcodes (str);
}else if (isident (str)) {
Return 51;//machine code for user-defined identifiers
}else{
Return 0;//identifier that cannot be recognized, machine code 0
}
}
}
//************************************************************************************************************* *******
In the interface class write this method to invoke the method
/**
* Lexical analysis
*/
public void dotokenizing () {
Consoletextarea.settext (NULL);
Arraylist<word> wlist=new arraylist<word> ();
Arraylist<unidentifiable> ulist=new arraylist<unidentifiable> ();
String S,ts,str;
Word Word;
int i;
int opcodes=-1;
int errornum=0;
int count=0;
S=filecontenttextarea.gettext ();
if (S.length () >1) {
Ts=analyze.prefunction (s);
Wlist=analyze.divide (TS);
Values=new string[wlist.size ()][3];
while (Wlist.size () >0) {
Word= (Word) wlist.remove (0);
Str=word.getword ();
I=analyze.check (str);
switch (i) {
Case 1:
Opcodes=analyze.checkdigit (str);
Break
Case 2:
Opcodes=analyze.checkchar (str);
Break
Case 3:
Opcodes=analyze.checkstring (str);
Break
}
if (opcodes==0) {
unidentifiable u=new unidentifiable (Word.getrow (), str);
Ulist.add (U);
errornum++;
}
Values[count][0]=string.valueof (Word.getrow ());
VALUES[COUNT][1]=STR;
Values[count][2]=string.valueof (opcodes);
count++;
}
Update table Contents
DefaultTableModel model= (DefaultTableModel) Table.getmodel ();
while (Model.getrowcount () >0) {
Model.removerow (Model.getrowcount ()-1);
}
Model.setdatavector (Values,title);
Table=new JTable (model);
Consoletextarea.append ("Common" +errornum+ "Error!") + "\ n");
while (Ulist.size () >0) {
int R;
string string;
Unidentifiable uni=ulist.remove (0);
R=uni.getrow ();
String=uni.getword ();
Consoletextarea.append ("+r+" line: "+" error, "+string+" \ n ");
}
}else{
Int J;
J=joptionpane.showconfirmdialog (This, "Please enter the program!");
if (j!=joptionpane.yes_option) {
Return
}
}
}
C Language lexical analyzer written in Java