java實現自動登入,並擷取資料

來源:互聯網
上載者:User

為抓取 web的一些隱私資料,需要先登入,然後才能擷取這些資料,用程式來實現,就需要實現自動登入,然後將登入資訊儲存在Cookie中,以便取得資料時,無須再次登入。以網易郵箱為例:
所需jar包:commons-codec-1.3.jar  ,   common-httpclient-3.0.1.jar  ,common-logging-1.1.1.jar ,jaxen-1.1-beta-6.jar(不知道是不是必須)
源碼:
import java.io.IOException;
import java.util.Date;

import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;

public class AutoLogin {

    private static String USERNAME = "***";
    private static String LOGINURL = "http://reg.163.com/login.jsp";
   
    private String CookieStr;
    private Date EndTime;
   
    public void autoLogin(){
        HttpClient httpClient = new HttpClient();
        PostMethod post = new PostMethod(LOGINURL);
        // 注意這裡的地址!
        post.setRequestHeader("Accept-Language", "en-us,en;q=0.5");
        post.setRequestHeader("Accept-Encoding","gzip, deflate");
   
    post.setRequestHeader("User-Agent", "Mozilla/5.0 (Windows; U;
Windows NT 5.2; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7 (.NET
CLR 3.5.30729)");
        post.addRequestHeader("Content-Type","application/x-www-form-urlencoded;charset=utf-8");
       
        NameValuePair user = new NameValuePair("user","***");
        NameValuePair pwd = new NameValuePair("password","***");
        NameValuePair username = new NameValuePair("username",USERNAME + "@163.com");
       
        post.setRequestBody(new NameValuePair[]{username,user,pwd});
        httpClient.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
       
        int status;
        try {
            status = httpClient.executeMethod(post);
            System.out.println("loginUrl:"+status);
            Cookie[] cookies = httpClient.getState().getCookies();
            //擷取cookie
            if (cookies.length == 0){
                System.out.println("Cookie:None");
            } else {
                if (cookies.length >=2)
                {
                    EndTime = cookies[1].getExpiryDate();
                    for(Cookie c:cookies){
                        CookieStr +=c.toString()+";";
                    }
                }
            }
            System.out.println(EndTime);
            System.out.println(CookieStr);
            System.out.println(post.getResponseBodyAsString());
        } catch (HttpException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }finally{
            post.releaseConnection();
        }
    }
   
    public String getContent(){
        Date date = new Date();
        String content = null;
       
        //CookieStr的值為空白或者cookie已經失效
        if (CookieStr == "" || EndTime == null || date.getTime() > EndTime.getTime())
        {
            autoLogin();
        }
        HttpClient client = new HttpClient();
        client.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
        //get data url
        String url = "*******";

        GetMethod get = new GetMethod(url);
        get.setRequestHeader("Accept-Language", "en-us,en;q=0.5");
        get.setRequestHeader("Accept-Encoding","gzip, deflate");
        get.setRequestHeader("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
        get.setRequestHeader("Accept-Charset","ISO-8859-1,utf-8;q=0.7,*;q=0.7");
       
get.setRequestHeader("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT
5.2; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7 (.NET CLR
3.5.30729)");
     
        //設定Cookie,必須
        get.setRequestHeader("Cookie", CookieStr);
        try {
            int status = client.executeMethod(get);
            //擷取你想要的頁面內容
            content = get.getResponseBodyAsString();
            System.out.println("getUrl:"+status);
            System.out.println(content);
        } catch (HttpException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }finally
        {
            get.releaseC

相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.