This article from: http://blog.csdn.net/javaalpha/article/details/8332587 reprint is please mark, thank you. Read HTML page file parsing Email Address package COM. alpha. test; import Java. io. bufferedreader; import Java. io. file; import Java. io. filenotfoundexception; import Java. io. filereader; import Java. io. filewriter; import Java. io. ioexception; import Java. io. inputstreamreader; import Java. io. reader; import Java. io. writer; import java.net. malformedurlexception; import java.net. URL; import java.net. urlconnection; import Java. util. regEx. matche R; import Java. util. regEx. pattern;/*** read the HTML page file parsing Email Address ** @ author javaalpha 13:45:11 */public class readhtmltotxt {// read the file public static string readhtml (string path) {stringbuffer emailcont = new stringbuffer (); file htmlfile = new file (PATH); If (htmlfile. exists () & htmlfile. isfile () & htmlfile. canread () {reader in; try {In = new filereader (htmlfile); char [] buff = new char [40 96]; int NCH; while (NCH = in. Read (buff, 0, Buff. Length ))! =-1) {emailcont. append (checkemail (new string (buff, 0, NCH) ;}} catch (filenotfoundexception e) {e. printstacktrace ();} catch (ioexception e) {e. printstacktrace () ;}return emailcont. tostring () ;}// determines whether the string contains the @ symbol public static string checkemail (string Str) {string postcont = ""; // determines whether the response content if (Str. indexof ("@")>-1) {postcont = Str. substring (Str. indexof ("@")-10, str. indexof ("@") + 1 0); If (postcont. indexof (">")>-1 | postcont. indexof ("<")>-1) {postcont = postcont. replaceall (">", ""); postcont = postcont. replaceall ("<", ""); postcont = postcont. replaceall ("/", "");} If (postcont. indexof (",")>-1 | postcont. indexof (",")>-1 | postcont. indexof (". ")>-1 | postcont. indexof (";")>-1) {postcont = postcont. replaceall (",", ""); postcont = postcont. replaceall (",", ""); postcont = postcont. replaceall (". "," ");} Postcont = postcont. substring (0, postcont. indexof (". com ") + 4); system. out. println (postcont);} return postcont;} // filter Chinese characters public static Boolean checkchinese (string Str) {string RegEx = "[\ u4e00-\ u9fa5]"; pattern P = pattern. compile (RegEx); matcher M = P. matcher (STR); If (M! = NULL & M. find () {return true; // Chinese Character} return false;} // write the email address into the file public static void writerfile (string cont, string path) {file emailfile = new file (PATH); try {// if the file does not exist, create the file if (! Emailfile. exists () {emailfile. createnewfile ();} writer out = new filewriter (emailfile); out. write (cont); out. flush (); out. close ();} catch (exception e) {e. printstacktrace () ;}}/*** read network content */public static void readurlcont (string strurl) {stringbuffer cont = new stringbuffer (); // content try {URL url = new URL (strurl); urlconnection conn = URL. openconnection (); bufferedreader reader = New Buffer Edreader (New inputstreamreader (conn. getinputstream (); string linecont = ""; while (linecont = reader. Readline ())! = NULL) {Cont. append (linecont + "</BR>");} reader. close ();} catch (malformedurlexception e) {e. printstacktrace ();} catch (ioexception e) {e. printstacktrace ();} system. out. println (Cont. tostring ();} public static void main (string [] ARGs) {// string cont = readhtml ("E: // test.htm "); // read the file // writerfile (cont, "E: // test.txt"); // write the file // checkchinese ("qwe123 "); readurlcont ("http://www.163.com ");}}