以新安人才網為例
1、使用依賴
httpclient 進行http請求
jsoup 對html進行解析
<dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.8.1</version> </dependency> <dependency> <groupId>commons-httpclient</groupId> <artifactId>commons-httpclient</artifactId> <version>3.0.1</version> </dependency>
2、具體實現如下
package com.ssjk.website.service.JsoupHtml;import org.apache.commons.httpclient.Cookie;import org.apache.commons.httpclient.HttpClient;import org.apache.commons.httpclient.NameValuePair;import org.apache.commons.httpclient.cookie.CookiePolicy;import org.apache.commons.httpclient.cookie.CookieSpec;import org.apache.commons.httpclient.methods.PostMethod;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import java.io.IOException;/** * 爬取新安人才網資訊 */public class XinAnRenCaiWang { private static final String SITE = "login.goodjobs.cn"; private static final int PORT = 80; private static final String loginAction = "/index.php/action/UserLogin"; private static final String forwardURL = "http://user.goodjobs.cn/dispatcher.php/module/Personal/"; private static final String toUrl = "d:\\jsoup_test\\"; private static final String hostCss = "d:\\jsoup_test\\style.txt"; private static final String Img = "http://user.goodjobs.cn/images"; private static final String _JS = "http://user.goodjobs.cn/scripts/fValidate/fValidate.one.js"; /** * 類比等錄 * * @param LOGON_SITE * @param LOGON_PORT * @param login_Action * @param params * @throws Exception */ private static String[] loginHtml(String LOGON_SITE, int LOGON_PORT, String login_Action, String... params) throws Exception { String[] result=null; HttpClient client = new HttpClient(); client.getHostConfiguration().setHost(LOGON_SITE, LOGON_PORT);// 類比登入頁面 PostMethod post = new PostMethod(login_Action); NameValuePair userName = new NameValuePair("memberName", params[0]); NameValuePair password = new NameValuePair("password", params[1]); post.setRequestBody(new NameValuePair[]{userName, password}); client.executeMethod(post); System.out.println("執行狀態:"+client.getState()); post.releaseConnection();// 查看 cookie 資訊 CookieSpec cookiespec = CookiePolicy.getDefaultSpec(); Cookie[] cookies = cookiespec.match(LOGON_SITE, LOGON_PORT, "/", false, client.getState().getCookies()); if (cookies != null) if (cookies.length == 0) { System.out.println("Cookies is not Exists "); } else { for (int i = 0; i < cookies.length; i++) { System.out.println("----------------------------------------------------"); System.out.println(cookies[i].toString()); result = cookies[i].toString().split("="); System.out.println("----------------------------------------------------"); } } return result; } /** * * @param cookies * @return */ public static Document getHtmlDocument(String[] cookies){ try { Document doc = Jsoup.connect(forwardURL).cookie(cookies[0],cookies[1]).get(); return doc; } catch (IOException e) { System.out.println("頁面擷取異常。"); e.printStackTrace(); } return null; } public static void main(String[] args) { String[] params = {"job17093123661", "2534133662qq"}; String[] strings = null; try { strings = loginHtml(SITE, PORT, loginAction, params); if(strings.length>0){ Document htmlDocument = getHtmlDocument(strings); System.out.println(htmlDocument); } } catch (Exception e) { e.printStackTrace(); } }}