/*
* To change this template, choose tools | templates
* And open the template in the editor.
*/
Import java. Io. bufferedreader;
Import java. Io. ioexception;
Import java. Io. inputstream;
Import java. Io. inputstreamreader;
Import java.net. httpurlconnection;
Import java.net. url;
Import java. util. properties;
/**
** General webpage capture class
* @ Author
*/
Public class WebClient {
/**
* Proxy server address
*/
Private Static string proxyhost;
/**
* Proxy server port
*/
Private Static string proxyport;
/**
* Proxy Server Username
*/
Private Static string proxyuser;
/**
* Proxy server password
*/
Private Static string proxypassword;
/**
* Webpage Capturing Method
* @ Param urlstring the URL to be crawled
* @ Param charset webpage encoding method
* @ Param timeout
* @ Return the webpage content captured
* @ Throws ioexception exception
*/
Public static string getwebcontent (string urlstring, final string charset, int timeout) throws ioexception {
If (urlstring = NULL | urlstring. Length () = 0 ){
Return NULL;
}
Urlstring = (urlstring. startswith ("http: //") |
Urlstring. startswith ("https ://"))? Urlstring: ("http: //" +
Urlstring). Intern ();
URL url = new URL (urlstring );
Httpurlconnection conn = (httpurlconnection) URL. openconnection ();
Getproxy ();
Conn. setrequestproperty (
"User-Agent ",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; Trident/4.0;. Net CLR 1.1.4322;. Net CLR 2.0.50727 )");// Add a header to simulate the browser to prevent blocking
Conn. setrequestproperty ("accept", "text/html ");// Only the text/html type is accepted. Of course, images, PDF, and */* can be accepted, which are defined in Tomcat/CONF/web.
Conn. setconnecttimeout (timeout );
Try {
If (conn. getresponsecode ()! = Httpurlconnection. http_ OK ){
Return NULL;
}
} Catch (ioexception e ){
E. printstacktrace ();
Return NULL;
}
Inputstream input = conn. getinputstream ();
Bufferedreader reader = new bufferedreader (New inputstreamreader (input,
Charset ));
String line = NULL;
Stringbuffer sb = new stringbuffer ();
While (line = reader. Readline ())! = NULL ){
SB. append (Line). append ("\ r \ n ");
}
If (reader! = NULL ){
Reader. Close ();
}
If (Conn! = NULL ){
Conn. Disconnect ();
}
Return sb. tostring ();
}
/**
* webpage crawling method
* @ Param urlstring URL to be crawled
* @ return the webpage content to be crawled
* @ throws ioexception capture exception
*/
Public static string getwebcontent (string urlstring) throws ioexception {
return getwebcontent (urlstring, "iso-8859-1", 5000);
}
/**
* webpage Capturing Method
* @ Param urlstring URL to be crawled
* @ Param pagecharset target webpage code method
* @ return: webpage content captured
* @ throws ioexception capture exception
*/
Public static string getwebcontent (string urlstring, string pagecharset) throws ioexception {
string strhtml = getwebcontent (urlstring, "iso-8859-1", 5000);
string strencode = new string (strhtml. getbytes ("iso-8859-1"), pagecharset);
return strencode;
}
/**
* Set Proxy Server
* @ Param proxyhost
* @ Param proxyport
*/
Public static void setproxy (string proxyhost, string proxyport ){
Setproxy (proxyhost, proxyport, null, null );
}
/**
* Set Proxy Server
* @ Param proxyhost address of the proxy server
* @ Param proxyport the proxy server port
* @ Param proxyuser Proxy Server Username
* @ Param proxypassword proxy server password
*/
Public static void setproxy (string sproxyhost, string sproxyport, string sproxyuser, string sproxypassword ){
Proxyhost = sproxyhost;
Proxyport = sproxyport;
If (sproxypassword! = NULL & sproxypassword. Length ()> 0 ){
Proxyuser = sproxyuser;
Proxypassword = sproxypassword;
}
}
/**
* Get proxy settings
* @ Return
*/
Private Static Properties getproxy (){
Properties propret = NULL;
If (proxyhost! = NULL & proxyhost. Length ()> 0 ){
Propret = system. getproperties ();
// Set the address of the proxy server to be used for HTTP access
Propret. setproperty ("HTTP. proxyhost", proxyhost );
// Set the port for HTTP access to the proxy server to be used
Propret. setproperty ("HTTP. proxyport", proxyport );
If (proxyuser! = NULL & proxyuser. Length ()> 0 ){
// Username and password
Propret. setproperty ("HTTP. proxyuser", proxyuser );
Propret. setproperty ("HTTP. proxypassword", proxypassword );
}
}
Return propret;
}
/**
* Test functions
* @ Param ARGs
* @ Throws ioexception
*/
Public static void main (string [] ARGs) throws ioexception {
// Setproxy ("10.10.10.10", "8080 ");// Proxy Server Settings
String S = getwebcontent ("http://www.my400800.cn", "UTF-8 ");
System. Out. println (s );
}
}