Parse HTML Tag using Java
Import java. util. hashMap; import java. util. map; public class TagParser {private Integer index = 0; private char [] tagChar; private int position = 0;/* parse symbol */private char symbol = '"'; public static final String START_SCRIPT = "<script"; public static final String END_SCRIPT = ">"; public static final String END_SCRIPT_1 = "/> "; public static final String EQ = "="; public static final char SPACE = ''; public static final String MUST_SPACE =" "; public static final String TAB = ""; public static final String SYMBOL = "'"; private DFAStatus status; private Map
Map = new HashMap
(); Private Map
Result = new HashMap
(); Public static void main (String [] args) throws SymbolError {String tag = "<script filter = \" a \ '\ "type = \" text/javascript \ "id = 'node' src = \" http://www.test.com/abc.js\ "async = \ "true \"/> "; int I = 0; long start = System. currentTimeMillis (); while (I <10) {TagParser token = new TagParser (tag); token. parser (); System. out. println (token. getAttr ("src"); I ++; System. out. println (token);} System. out. println ("use Time: "+ (System. currentTimeMillis ()-start);} private boolean startsWith (String str) {char [] chat = str. toCharArray (); if (position + chat. length> tagChar. length) {return false;} for (int I = 0; I <chat. length; I ++) {if (tagChar [position + I]! = Chat [I]) {if (is_az (chat [I]) {if (tagChar [position + I] = chat [I]-32) {continue ;}} return false ;}} return true;} public void parser () throws SymbolError {if (status = null) {status = DFAStatus. UNSTART; skipSpace ();} if (status = DFAStatus. UNSTART) {if (startsWith (START_SCRIPT) {position + = START_SCRIPT.length (); status = DFAStatus. START; parser () ;}else {throw new SymbolError ("syntax error:" + tagChar [position]) ;}} else if (status = DFAStatus. START) {nextSpace (); parser ();} else if (status = DFAStatus. NULL) {skipSpace (); if (startsWith (END_SCRIPT_1) {status = DFAStatus. DONE; done (); return;} else if (startsWith (END_SCRIPT) {status = DFAStatus. DONE; done (); return;} parserName (); parser ();} else if (status = DFAStatus. EQ) {parserVal (); parser ();} else if (status = DFAStatus. SYMBOL_END) {status = DFAStatus. NULL; parser () ;}} private void done () {for (Entity entity: map. values () {result. put (entity. name, entity. value) ;}} private void parserVal () throws SymbolError {skipSpace (); StringBuilder builder = new StringBuilder (); int startIndex = position; for (int I = position; I <tagChar. length; I ++) {if (I = startIndex) {if (tagChar [I] = '\ ''| tagChar [I] = '"') {symbol = tagChar [I]; status = DFAStatus. SYMBOL_START; position ++;} else {throw new SymbolError ("syntax error:" + tagChar [position]) ;}} else {if (tagChar [I] = symbol) {status = DFAStatus. SYMBOL_END; position ++; break;} else {builder. append (tagChar [I]); position ++ ;}} map. get (index ). value = builder. toString (); index ++;} private boolean is_AZ (char chat) {return chat >=65 & chat <= 90;} private boolean is_az (char chat) {return chat> = 97 & chat <= 122;} private void parserName () throws SymbolError {StringBuilder builder = new StringBuilder (); for (int I = position; I <tagChar. length; I ++) {if (is_az (tagChar [I]) | is_AZ (tagChar [I]) {builder. append (tagChar [I]); position ++;} else {if (builder. length ()> 0) {skipSpace (); nextEQ (); break ;}} if (builder. toString (). length () = 0) {throw new SymbolError ("syntax error:" + tagChar [position]);} map. put (index, new Entity (builder. toString ();} private void nextEQ () throws SymbolError {if (startsWith (EQ) {position ++; status = DFAStatus. EQ;} else {throw new SymbolError ("syntax error:" + tagChar [position]);} private void skipSpace () {for (int I = position; I <tagChar. length; I ++) {if (tagChar [I] = SPACE | tagChar [I] = '\ t') {position ++ ;} else {return ;}} private void nextSpace () throws SymbolError {if (startsWith (MUST_SPACE) {position + = MUST_SPACE.length (); status = DFAStatus. NULL;} else if (startsWith (TAB) {position + = TAB. length (); status = DFAStatus. NULL;} else {throw new SymbolError ("syntax error:" + tagChar [position]);} public String getAttr (String name) {return result. get (name);} public TagParser (String str) {this. tagChar = str. toCharArray () ;}@ Overridepublic String toString () {return result. toString ();} public static class SymbolError extends Exception {private static final long serialVersionUID = 2441411373778495898L; public SymbolError (String msg) {super (msg );}} public static class Entity {public Entity (String name) {this. name = name;} public String name; public String value; @ Overridepublic String toString () {return "[" + name + ":" + value + "]" ;}} public enum DFAStatus {UNSTART, START, SYMBOL_START, SYMBOL_END, DONE, NULL, EQ }}