The API for the HttpClient 4.x is still very large, and this code can be used to perform the login process and crawl the Web page.
HttpClient API documentation (4.0.x), Httpcore API documentation (4.1)
Package spider; Import Java.io.bufferedreader;import Java.io.ioexception;import Java.io.inputstreamreader;import Java.util.arraylist;import java.util.List; Import Org.apache.commons.io.ioutils;import Org.apache.http.httpentity;import Org.apache.http.httpresponse;import Org.apache.http.namevaluepair;import Org.apache.http.client.httpclient;import Org.apache.http.client.entity.urlencodedformentity;import Org.apache.http.client.methods.*;import Org.apache.http.impl.client.defaulthttpclient;import Org.apache.http.message.BasicNameValuePair; /** * dict.cn Web site for example crawler * @author Winter Lau */public class Dictspider {Private final static HttpClient client = new Defaulthttpclient (); public static void Main (string[] args) throws IOException {login ("< user name >", "< password >", false); Get ("http://www16.dict.cn/bdc/141"); /** * Crawl Web page * @param URL * @throws ioexception */static void get (String url) throws Ioexcepti on {httpget get = NEW httpget (URL); HttpResponse response = Client.execute (get); System.out.println (Response.getstatusline ()); httpentity entity = response.getentity (); Dump (entity); /** * Login Process * @param user * @param pwd * @param debug * @throws IOException */Stati c void Login (string user, String pwd, Boolean debug) throws IOException {HttpPost post = new HttpPost ("Http://dict . cn/login.php "); Post.setheader ("User-agent", "mozilla/5.0" (Windows; U Windows NT 5.1; En-US) applewebkit/534.3 (khtml, like Gecko) chrome/6.0.472.63 safari/534.3 "); Information about the login form list<namevaluepair> qparams = new arraylist<namevaluepair> (); Qparams.add (New Basicnamevaluepair ("username", user)); Qparams.add (New Basicnamevaluepair ("password", pwd)); Qparams.add (New Basicnamevaluepair ("url", "http://www16.dict.cn/bdc/141")); Qparams.add (New Basicnamevaluepair ("Loginforever", "1")); urlencodedformentity params = new Urlencodedformentity (qparams, "UTF-8"); Post.setentity (params); Execute the request HttpResponse response = Client.execute (POST); if (Debug) {//Examine the response status System.out.println (Response.getstatusline ()); Get hold of the response entity httpentity entity = response.getentity (); Dump (entity); }}/** * Print page * @param entity * @throws IOException */private static void dump (httpentity enti Ty) throws IOException {BufferedReader br = new BufferedReader (New InputStreamReader (Entity.getcon Tent (), "GBK")); SYSTEM.OUT.PRINTLN (Ioutils.tostring (BR)); } }
Website: http://www.oschina.net/code/snippet_12_2209
HttpClient 4.x Execute Website Login and crawl Web page code