Java captures webpage content and can set the proxy (httpurlconnection)

Source: Internet
Author: User

/*
* To change this template, choose tools | templates
* And open the template in the editor.
*/

Import java. Io. bufferedreader;
Import java. Io. ioexception;
Import java. Io. inputstream;
Import java. Io. inputstreamreader;
Import java.net. httpurlconnection;
Import java.net. url;
Import java. util. properties;

/**
** General webpage capture class
* @ Author
*/
Public class WebClient {

/**
* Proxy server address
*/
Private Static string proxyhost;
/**
* Proxy server port
*/
Private Static string proxyport;
/**
* Proxy Server Username
*/
Private Static string proxyuser;
/**
* Proxy server password
*/
Private Static string proxypassword;

/**
* Webpage Capturing Method
* @ Param urlstring the URL to be crawled
* @ Param charset webpage encoding method
* @ Param timeout
* @ Return the webpage content captured
* @ Throws ioexception exception
*/
Public static string getwebcontent (string urlstring, final string charset, int timeout) throws ioexception {
If (urlstring = NULL | urlstring. Length () = 0 ){
Return NULL;
}
Urlstring = (urlstring. startswith ("http: //") |
Urlstring. startswith ("https ://"))? Urlstring: ("http: //" +
Urlstring). Intern ();
URL url = new URL (urlstring );

Httpurlconnection conn = (httpurlconnection) URL. openconnection ();
Getproxy ();
Conn. setrequestproperty (
"User-Agent ",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; Trident/4.0;. Net CLR 1.1.4322;. Net CLR 2.0.50727 )");// Add a header to simulate the browser to prevent blocking
Conn. setrequestproperty ("accept", "text/html ");// Only the text/html type is accepted. Of course, images, PDF, and */* can be accepted, which are defined in Tomcat/CONF/web.

Conn. setconnecttimeout (timeout );
Try {
If (conn. getresponsecode ()! = Httpurlconnection. http_ OK ){
Return NULL;
}
} Catch (ioexception e ){
E. printstacktrace ();
Return NULL;
}
Inputstream input = conn. getinputstream ();
Bufferedreader reader = new bufferedreader (New inputstreamreader (input,
Charset ));
String line = NULL;
Stringbuffer sb = new stringbuffer ();
While (line = reader. Readline ())! = NULL ){
SB. append (Line). append ("\ r \ n ");
}
If (reader! = NULL ){
Reader. Close ();
}
If (Conn! = NULL ){
Conn. Disconnect ();
}
Return sb. tostring ();

}

/**
* webpage crawling method
* @ Param urlstring URL to be crawled
* @ return the webpage content to be crawled
* @ throws ioexception capture exception
*/
Public static string getwebcontent (string urlstring) throws ioexception {
return getwebcontent (urlstring, "iso-8859-1", 5000);
}

/**
* webpage Capturing Method
* @ Param urlstring URL to be crawled
* @ Param pagecharset target webpage code method
* @ return: webpage content captured
* @ throws ioexception capture exception
*/
Public static string getwebcontent (string urlstring, string pagecharset) throws ioexception {
string strhtml = getwebcontent (urlstring, "iso-8859-1", 5000);
string strencode = new string (strhtml. getbytes ("iso-8859-1"), pagecharset);
return strencode;
}

/**
* Set Proxy Server
* @ Param proxyhost
* @ Param proxyport
*/
Public static void setproxy (string proxyhost, string proxyport ){
Setproxy (proxyhost, proxyport, null, null );
}

/**
* Set Proxy Server
* @ Param proxyhost address of the proxy server
* @ Param proxyport the proxy server port
* @ Param proxyuser Proxy Server Username
* @ Param proxypassword proxy server password
*/
Public static void setproxy (string sproxyhost, string sproxyport, string sproxyuser, string sproxypassword ){
Proxyhost = sproxyhost;
Proxyport = sproxyport;
If (sproxypassword! = NULL & sproxypassword. Length ()> 0 ){
Proxyuser = sproxyuser;
Proxypassword = sproxypassword;
}
}

/**
* Get proxy settings
* @ Return
*/
Private Static Properties getproxy (){
Properties propret = NULL;
If (proxyhost! = NULL & proxyhost. Length ()> 0 ){
Propret = system. getproperties ();
// Set the address of the proxy server to be used for HTTP access
Propret. setproperty ("HTTP. proxyhost", proxyhost );
// Set the port for HTTP access to the proxy server to be used
Propret. setproperty ("HTTP. proxyport", proxyport );
If (proxyuser! = NULL & proxyuser. Length ()> 0 ){
// Username and password
Propret. setproperty ("HTTP. proxyuser", proxyuser );
Propret. setproperty ("HTTP. proxypassword", proxypassword );
}
}

Return propret;
}

/**
* Test functions
* @ Param ARGs
* @ Throws ioexception
*/
Public static void main (string [] ARGs) throws ioexception {
// Setproxy ("10.10.10.10", "8080 ");// Proxy Server Settings
String S = getwebcontent ("http://www.my400800.cn", "UTF-8 ");
System. Out. println (s );
}
}

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.