Jsoup + HttpClient get Sina news data
Package com. test; import java. io. IOException; import java.net. URI; import org. apache. http. header; import org. apache. http. httpEntity; import org. apache. http. httpResponse; import org. apache. http. client. clientProtocolException; import org. apache. http. client. methods. httpGet; import org. apache. http. client. utils. URIBuilder; import org. apache. http. impl. client. closeableHttpClient; import org. apache. http. impl. cl Ient. httpClients; import org. apache. http. util. entityUtils;/**** dependent on commons-httpclient-3.1.jar commons-codec-1.4.jar ** @ author tianjun **/public class PostTest {public static void main (String [] args) throws Exception {// (1) construct the HttpClient instance CloseableHttpClient httpCLient = HttpClients. createDefault (); // create a get request instance HttpGet httpget = new HttpGet (); // set parameters // http://roll.news.sina.com.cn/s/channe L. php? Ch = 01 # col = 91 & spec = & type = & ch = 01 & k = & offset_page = 0 & offset_num = 0 & num = 60 & asc = & page = NaN/ /http://roll.news.sina.com.cn/s/channel.php? Col = 91 & spec = & type = & ch = 01 & offset_page = 0 & offset_num = 0 & num = 60 & page = 1 URI uri = new URIBuilder (). setScheme ("http "). setHost ("roll.news.sina.com.cn "). setPath ("/s/channel. php "). setParameter ("ch", "01 "). setParameter ("col", "91 "). setParameter ("spec ",""). setParameter ("type ",""). setParameter ("ch", "01 "). setParameter ("offset_page", "0 "). setParameter ("offset_num", "0 "). setParameter ("num", "60 "). set Parameter ("page", "1 "). build (); httpget. setURI (uri); // set the request header information/**/httpget. setHeader ("Accep", "*/*"); httpget. setHeader ("Accept-Encoding", "gzip, deflate"); httpget. setHeader ("Accept-Language", "zh-CN, zh; q = 0.8, en-US; q = 0.5, en; q = 0.3"); httpget. setHeader ("Connection", "keep-alive"); httpget. setHeader ("Host", "roll.news.sina.com.cn"); httpget. setHeader ("Referer", "http://roll.news.sina.com.cn/s/channel. Php? Ch = 01 "); httpget. setHeader ("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv: 37.0) Gecko/20100101 Firefox/37.0"); httpget. setHeader ("Content-Type", "text/html; charset = UTF-8"); System. out. println ("executing request" + httpget. getURI (); try {// The client executes the get request to return the response entity HttpResponse response = httpCLient.exe cute (httpget); // The server response status line System. out. println (response. getStatusLine (); Header [] heads = response. GetAllHeaders (); // print all response headers for (Header h: heads) {System. out. println (h. getName () + ":" + h. getValue ();} // gets the response Message entity HttpEntity entity = response. getEntity (); System. out. println ("------------------------------------"); if (entity! = Null) {// response content System. out. println (new String (EntityUtils. toString (entity ). getBytes ("ISO-8859-1"), "gbk"); System. out. println ("--------------------------------------"); // response Content Length: System. out. println ("response Content Length:" + entity. getContentLength () ;}} catch (ClientProtocolException e) {e. printStackTrace ();} catch (IOException e) {e. printStackTrace ();} finally {httpCLient. getConnectionManager (). shutdown ();}}}