java爬蟲--免登入,通過httpclient類比登入並擷取登入後的資訊

來源:互聯網
上載者:User

以新安人才網為例
1、使用依賴
httpclient 進行http請求
jsoup 對html進行解析

 <dependency>            <groupId>org.jsoup</groupId>            <artifactId>jsoup</artifactId>            <version>1.8.1</version>        </dependency>        <dependency>            <groupId>commons-httpclient</groupId>            <artifactId>commons-httpclient</artifactId>            <version>3.0.1</version>        </dependency>

2、具體實現如下

package com.ssjk.website.service.JsoupHtml;import org.apache.commons.httpclient.Cookie;import org.apache.commons.httpclient.HttpClient;import org.apache.commons.httpclient.NameValuePair;import org.apache.commons.httpclient.cookie.CookiePolicy;import org.apache.commons.httpclient.cookie.CookieSpec;import org.apache.commons.httpclient.methods.PostMethod;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import java.io.IOException;/** * 爬取新安人才網資訊 */public class XinAnRenCaiWang {    private static final String SITE = "login.goodjobs.cn";    private static final int PORT = 80;    private static final String loginAction = "/index.php/action/UserLogin";    private static final String forwardURL =            "http://user.goodjobs.cn/dispatcher.php/module/Personal/";    private static final String toUrl = "d:\\jsoup_test\\";    private static final String hostCss = "d:\\jsoup_test\\style.txt";    private static final String Img = "http://user.goodjobs.cn/images";    private static final String _JS = "http://user.goodjobs.cn/scripts/fValidate/fValidate.one.js";    /**     * 類比等錄     *     * @param LOGON_SITE     * @param LOGON_PORT     * @param login_Action     * @param params     * @throws Exception     */    private static String[] loginHtml(String LOGON_SITE, int LOGON_PORT, String            login_Action, String... params) throws Exception {        String[] result=null;        HttpClient client = new HttpClient();        client.getHostConfiguration().setHost(LOGON_SITE, LOGON_PORT);// 類比登入頁面        PostMethod post = new PostMethod(login_Action);        NameValuePair userName = new NameValuePair("memberName", params[0]);        NameValuePair password = new NameValuePair("password", params[1]);        post.setRequestBody(new NameValuePair[]{userName, password});        client.executeMethod(post);        System.out.println("執行狀態:"+client.getState());        post.releaseConnection();// 查看 cookie 資訊        CookieSpec cookiespec = CookiePolicy.getDefaultSpec();        Cookie[] cookies = cookiespec.match(LOGON_SITE, LOGON_PORT, "/", false,                client.getState().getCookies());        if (cookies != null)            if (cookies.length == 0) {                System.out.println("Cookies is not Exists ");            } else {                for (int i = 0; i < cookies.length; i++) {                    System.out.println("----------------------------------------------------");                    System.out.println(cookies[i].toString());                    result = cookies[i].toString().split("=");                    System.out.println("----------------------------------------------------");                }            }            return result;    }    /**     *     * @param cookies     * @return     */    public static Document getHtmlDocument(String[] cookies){        try {            Document doc = Jsoup.connect(forwardURL).cookie(cookies[0],cookies[1]).get();            return doc;        } catch (IOException e) {            System.out.println("頁面擷取異常。");            e.printStackTrace();        }        return null;    }    public static void main(String[] args) {        String[] params = {"job17093123661", "2534133662qq"};        String[] strings = null;        try {            strings = loginHtml(SITE, PORT, loginAction, params);            if(strings.length>0){                Document htmlDocument = getHtmlDocument(strings);                System.out.println(htmlDocument);            }        } catch (Exception e) {            e.printStackTrace();        }    }}

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.