Package COM. tag; import java.net. malformedurlexception; import java.net. URL; import Org. apache. commons. httpclient. header; import Org. apache. commons. httpclient. headerelement; import Org. apache. commons. httpclient. httpclient; import Org. apache. commons. httpclient. namevaluepair; import Org. apache. commons. httpclient. methods. getmethod; import toptrack. tools. jquerybase;/*** get the webpage encoding format * @ author DL */public class Encoding Tmlupdatecheck {/** Text Content Encoding recognition class */Private Static cpdetector. io. codepagedetectorproxy detector = cpdetector. io. codepagedetectorproxy. getinstance (); static {detector. add (New cpdetector. io. htmlcodepagedetector (false); detector. add (cpdetector. io. jchardetfacade. getinstance ();}/*** <br> method Description: webpage encoding format * <br> input parameter: strurl webpage link; timeout timeout setting * <br> return type: webpage Code */public static string getencoding (string strurl, int Ti Meout) {string strencoding = NULL; httpclient client = new httpclient (); client. gethttpconnectionmanager (). getparams (). setconnectiontimeout (timeout); getmethod method = new getmethod (strurl); method. setfollowredirects (true); int statuscode; try {statuscode = client.exe cutemethod (method); If (statuscode! =-1) {// obtain the webpage code strencoding from the HTTP header = getcontentcharset (method. getResponseHeader ("Content-Type"); If (strencoding! = NULL) {method. releaseconnection (); Return strencoding;} // get the webpage code string strhtml = method through meta parsing. getresponsebodyasstring (). tolowercase (); stringbuffer strbuffer = new stringbuffer (); int Pos = jquerybase. gettagtext (strhtml, "<meta", ">", strbuffer, false, 0); While (strbuffer. length ()> 0) {stringbuffer strencodingbuffer = new stringbuffer (); jquerybase. gettagtext (strbuffer. tostring (), "charset = ","/"", Strencodingbuffer, 0); If (strencodingbuffer. length ()> 0) {strencoding = strencodingbuffer. tostring (); method. releaseconnection (); Return strencoding;} strbuffer = new stringbuffer (); Pos = jquerybase. gettagtext (strhtml, "<meta", ">", strbuffer, false, POS);} // analyze the byte to obtain the webpage code strencoding = getfileencoding (strurl, timeout ); // set the default webpage character encoding if (strencoding = NULL) strencoding = "GBK";} metho D. releaseconnection ();} catch (exception e) {// todo auto-generated catch blocksystem. out. println (E. getclass () + "error in extracting webpage encoding information for" + strurl + "); return NULL;} return strencoding;}/*** <br> method description: get webpage encoding information through HTTP header * <br> input parameter: contentheade rhttp header * <br> return type: webpage encoding */protected static string getcontentcharset (header contentheader) {string charset = NULL; If (contentheader! = NULL) {headerelement values [] = contentheader. getelements (); If (values. length = 1) {namevaluepair Param = values [0]. getparameterbyname ("charset"); If (Param! = NULL) {charset = Param. getvalue () ;}}return charset;}/*** <br> method Description: webpage encoding identified by webpage content * <br> input parameter: strurl webpage link; timeout timeout setting * <br> return type: webpage encoding */public static string getfileencoding (string strurl, int timeout) {Java. NIO. charset. charset = NULL; Url F; try {f = new URL (strurl);} catch (malformedurlexception e) {// todo auto-generated catch blocksystem. out. println (E. getclass () + strurl + "invalid"); RET Urn NULL;} Try {charset = detector. detectcodepage (f);} catch (exception e) {system. out. println (E. getclass () + "analysis" + strurl + "encoding failed");} If (charset! = NULL) return charset. Name (); return NULL ;}}