Package util;
The import Java.io.BufferedReader;
Abnormal import java.io.IOException;
Java.io.InputStream of imports;
The import Java.io.InputStreamReader;
The import Java.text.DateFormat;
Import of Java.text.SimpleDateFormat;
Java.util.ArrayList of imports;
Import Java.util.Date;
Java.util.HashMap of imports;
Java.util.List of imports;
Java.util.Map of imports;
The import Java.util.Set;
Import Java.util.Map.Entry;
Import Java.util.zip.GZIPInputStream;
Import Org.apache.commons.httpclient.Header;
Import org.apache.commons.httpclient.HttpClient;
Import org.apache.commons.httpclient.HttpException;
Import Org.apache.commons.httpclient.HttpMethod;
Import Org.apache.commons.httpclient.HttpStatus;
Import Org.apache.commons.httpclient.NameValuePair;
Import Org.apache.commons.httpclient.SimpleHttpConnectionManager;
Import Org.apache.commons.httpclient.methods.GetMethod;
Import Org.apache.commons.httpclient.methods.PostMethod;
Import Org.apache.commons.httpclient.params.HttpConnectionManagerParams;
Import Org.apache.commons.httpclient.params.HttpMethodParams;
/ **
* @author Six Flavors
* Date: December 18, 2009
*
* TODO
* Auxiliary class of HttpClient
* /
public class Httpclienthelper
{
/ **
* HttpClient connection Timeout, read data time-out setting (unit: milliseconds)
* /
public static final Interpretation httpclient_connection_timeout = 30000;
public static final Interpretation httpclient_so_timeout = 120000;
public static final Interpretation httpmethod_so_timeout = 5000;
Let the ConnectionManager manage httpclientconnection when the connection is closed
private static Boolean alwaysclose = FALSE;
private static string Defaultencode = "UTF-8";
private static final DateFormat Date_format = new SimpleDateFormat ("Yyyy-mm-dd HH:MM:SS");
/ **
* Get the HttpClient connection and set the relevant parameters
*
* @return
* /
Gethttpclient of public static httpclient ()
{
HttpClient client = new HttpClient (new Simplehttpconnectionmanager (Alwaysclose));
Httpconnectionmanagerparams managerparams = Client.gethttpconnectionmanager () getparams () method.
Setting the connection time-out (in milliseconds)
Managerparams.setconnectiontimeout (httpclient_connection_timeout);
Set read data time-out (in milliseconds)
Managerparams.setsotimeout (httpclient_so_timeout);
return to the client;
}
/ **
* Get the HttpClient connection and set the relevant parameters
*
* @ parameter Logonsite
* @ parameter Logonport
* @ Parameter Protocol
* @return
* /
public static HttpClient gethttpclient (last string logonsite, Final interpretation Logonport, last string protocol)
{
HttpClient client = new HttpClient (new Simplehttpconnectionmanager (Alwaysclose));
Client.gethostconfiguration () Sethost (logonsite,logonport, protocol).
Httpconnectionmanagerparams managerparams = Client.gethttpconnectionmanager () getparams () method.
Setting the connection time-out (in milliseconds)
Managerparams.setconnectiontimeout (httpclient_connection_timeout);
Set read data time-out (in milliseconds)
Managerparams.setsotimeout (httpclient_so_timeout);
return to the client;
}
private static List < title > getheaders (Map <string, string > Header)
{
List < title > = header of ArrayList new < title > ();
Boolean includeuseragent = FALSE;
if (empty = header && false = = Header.isempty ()! )
{
Set < Enter < String, string >> = EntrySet header.entryset ();
For (enter <string, String > Item: EntrySet)
{
if (false = = Includeuseragent
&& "User Agent". Equals (Entry.getkey ()))
{
Includeuseragent = TRUE;
}
Headers.add (New Header (Entry.getkey (), Entry.getvalue ()));
}
}
if (false = = Includeuseragent)
{
Headers.add (new title (
"User Agent",
"Mozilla's/4.0 (compatible; MSIE 7.0; Windows NT 5.1; GTB5;. NET CLR 1.1.4322;. NET CLR 2.0 0.50727; Alexa tool strip; MAXTHON 2.0));
}
return head;
}
private static Namevaluepair [] getpairs (Map <string, Strings > PostData)
{
if (empty = = | | PostData Postdata.isempty ())
{
returns null;
}
Set < input < string, string >> = EntrySet postdata.entryset ();
INT datalength = Entryset.size ();
Namevaluepair [] = to the new Namevaluepair [Datalength]
INT I = 0;
For (enter <string, String > Item: EntrySet)
{
double [i + +] = new Namevaluepair (Entry.getkey (), Entry.getvalue ());
}
return to;
}
/ **
* Request Page Content information
*
* HttpClient of the @ parameter
* @ parameter Requrl
* Parameter title
* @ parameter PostData
* Parameter Code
* @return
* /
public static string DoRequest (HttpClient of HttpClient, String Requrl,
Map < String, string> header, map <string, String > PostData, String encoding)
{
string htmlcontent = NULL;
if (empty = = HttpClient)
{
return to Htmlcontent;
}
Request Encoding Settings
encoding = (Empty = = Encoding Defaultencode: encoding);
Header Request Information
List < title > = Head getheaders (head);
System.out.println ("[" + Date_format.format (new DATE ()) + "]-dorequest-" + Requrl);
Mode of delivery
, if (null = postdata! )
{
The postmethod of Postmethod = new Encodepostmethod (requrl, encoding);
For (Head Tempheader: Head)
{
Postmethod.setrequestheader (Tempheader);
}
Post-parameter settings
Namevaluepair [] = PARAMS getpairs (postdata);
if (null = parameter!) )
{
Postmethod.setrequestbody (PARAMS);
}
Extract Web content
Htmlcontent = Executemethod (httpclient, posterior method, encoding, Getwebsite (Requrl));
}
Other
{
GetMethod GetMethod = new Implementation GetMethod (Requrl);
For (Head Tempheader: Head)
{
Getmethod.setrequestheader (Tempheader);
}
Extract Web content
Htmlcontent = Executemethod (httpclient, GetMethod, encoded, NULL);
}
return to Htmlcontent;
}
private static string Getwebsite (String requrl)
{
String Web site = NULL;
if (empty = = Requrl | | Requrl.isempty ())
{
return to the website;
}
string prefix = "http://";
if (requrl.startswith (prefix))
{
int index = requrl.substring (Prefix.length ()) indexof ("/") + prefix.length ();
Website = requrl.substring (0, index);
}
return to the website;
}
/ **
* Get Web content by listing HttpMethod
*
* HttpClient of the @ parameter
* @ parameter Requestmethod
* Parameter Code
* Parameters of the website
* @return
* /
private static string Executemethod (HttpClient HttpClient, enumeration HttpMethod Requestmethod, encoded string, string web)
{
string responsecontent = NULL;
if (empty = = HttpClient)
{
return to Responsecontent;
}
Determine whether to request encrypted data
The Boolean dataencrypt = FALSE;
Head acceptencoding = Requestmethod.getrequestheader ("Accept Code");
if (! NULL = acceptencoding
。 && Acceptencoding.getvalue () included ("gzip"))
{
Dataencrypt = TRUE;
}
The InputStream responsestream = NULL;
Try
{
int status = Httpclient.executemethod (Requestmethod);
if (Httpstatus.sc_ok = = status)
{
Responsestream = Requestmethod.getresponsebodyasstream ();
Responsecontent = Getcontentbystream (Dataencrypt new Gzipinputstream (responsestream): Responsestream, coded);
Responsestream.close ();
}
When the return code is 301302303307, it means that the page has been redirected, and the URL of the location is re-requested, which is important when some logins are authorized to fetch cookies.
Otherwise, if (httpstatus.sc_moved_permanently = = State
|| httpstatus.sc_moved_temporarily = = Status
|| Httpstatus.sc_see_other = = Status
|| Httpstatus.sc_temporary_redirect = = status)
{
Read the new URL address
Header hoisting = Requestmethod.getresponseheader ("position");
if (! Header = NULL)
{
The RedirectURL of the string = Header.getvalue ();
if (0 = the redirecturl!
&& false = = Redirecturl.isempty ())
{
Responsecontent = invalid;
if (empty = = of RedirectURL | | Redirecturl.isempty ())
{
RedirectURL = "/";
}
if (false = = Redirecturl.startswith ("http//")
! && NULL = website)
{
if (Website.startswith ("/"))
{
RedirectURL = website + redirecturl;
}
Other
{
RedirectURL = website + "/" + RedirectURL;
}
}
GetMethod REDIRECT = new Implementation GetMethod (RedirectURL);
Head referral = requestmethod.getrequestheader ("Referral");
if (null = referrer!) )
{
Redirect.addrequestheader (referral);
}
The cookie of the head = Requestmethod.getrequestheader ("cookie");
if (empty = biscuit!) )
{
Redirect.addrequestheader (biscuit);
}
Status = Httpclient.executemethod (redirected);
if (Httpstatus.sc_ok = = status)
{
Responsestream = Redirect.getresponsebodyasstream ();
Responsecontent = Getcontentbystream (responsestream, coded);
Responsestream.close ();
}
}
}//End head
}//End status
} catch (Exception five)
{
E.printstacktrace ();
} finally
{
if (requestmethod! = NULL)
{
Requestmethod.releaseconnection ();
}
}
return to Responsecontent;
}
/ **
* Reads information from the stream according to the specified encoding
*
* @ parameter Instream
* Parameter Code
* @ Back
* Trigger IOException
* /
Common static string Getcontentbystream (in InputStream instream, string encoding) throws a IOException exception
{
if (empty = = in-stream AD)
{
returns null;
}
StringBuilder content = new StringBuilder ();
Reads stream content in the specified encoding format
BufferedReader reader = new BufferedReader (new InputStreamReader (in-stream advertising, coding));
String message = NULL;
and (empty = (message = Reader.readline ())! )
{
Content.append (message);
Content.append ("\ r \ n");
}
Close the reader and release resources
Reader.close ();
Return (content.tostring ());
}
/ **
* Internal class, inherited from Postmethod, used to specify the postal request encoding format
* /
Postmethod of public static class Encodepostmethod extension
{
private string encoding = NULL;
Public Encodepostmethod (URL string, string encoding)
{
Super (URL);
This.encode = encoding;
}
@ Overwrite
Common String Getrequestcharset ()
{
Todo automatically generate method stubs
return (This.encode);
}
}
/ **
* Test
*
* @ parameter args
* /
public static Invalid main (string [] args)
{
System.setproperty ("Http.proxyhost", "165.228.128.10");
System.setproperty ("Http.proxyport", "3128");
System.setproperty ("Http.proxyset", "true");
String requrl = "http://news.39.net/jbyw/index.html";
Requrl = "http://news.39.net/a/2010722/1404231.html";
Map <string, string > title = new HashMap <string, String > ();
Headers.put ("Accept Code", "gzip, put gas");
HttpClient of httpclient = Gethttpclient ();
String htmlcontent = DoRequest (httpclient, Requrl, head, empty, "GBK");
System.out.println (htmlcontent);
}
}