為抓取 web的一些隱私資料,需要先登入,然後才能擷取這些資料,用程式來實現,就需要實現自動登入,然後將登入資訊儲存在Cookie中,以便取得資料時,無須再次登入。以網易郵箱為例:
所需jar包:commons-codec-1.3.jar , common-httpclient-3.0.1.jar ,common-logging-1.1.1.jar ,jaxen-1.1-beta-6.jar(不知道是不是必須)
源碼:
import java.io.IOException;
import java.util.Date;
import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
public class AutoLogin {
private static String USERNAME = "***";
private static String LOGINURL = "http://reg.163.com/login.jsp";
private String CookieStr;
private Date EndTime;
public void autoLogin(){
HttpClient httpClient = new HttpClient();
PostMethod post = new PostMethod(LOGINURL);
// 注意這裡的地址!
post.setRequestHeader("Accept-Language", "en-us,en;q=0.5");
post.setRequestHeader("Accept-Encoding","gzip, deflate");
post.setRequestHeader("User-Agent", "Mozilla/5.0 (Windows; U;
Windows NT 5.2; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7 (.NET
CLR 3.5.30729)");
post.addRequestHeader("Content-Type","application/x-www-form-urlencoded;charset=utf-8");
NameValuePair user = new NameValuePair("user","***");
NameValuePair pwd = new NameValuePair("password","***");
NameValuePair username = new NameValuePair("username",USERNAME + "@163.com");
post.setRequestBody(new NameValuePair[]{username,user,pwd});
httpClient.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
int status;
try {
status = httpClient.executeMethod(post);
System.out.println("loginUrl:"+status);
Cookie[] cookies = httpClient.getState().getCookies();
//擷取cookie
if (cookies.length == 0){
System.out.println("Cookie:None");
} else {
if (cookies.length >=2)
{
EndTime = cookies[1].getExpiryDate();
for(Cookie c:cookies){
CookieStr +=c.toString()+";";
}
}
}
System.out.println(EndTime);
System.out.println(CookieStr);
System.out.println(post.getResponseBodyAsString());
} catch (HttpException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
post.releaseConnection();
}
}
public String getContent(){
Date date = new Date();
String content = null;
//CookieStr的值為空白或者cookie已經失效
if (CookieStr == "" || EndTime == null || date.getTime() > EndTime.getTime())
{
autoLogin();
}
HttpClient client = new HttpClient();
client.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
//get data url
String url = "*******";
GetMethod get = new GetMethod(url);
get.setRequestHeader("Accept-Language", "en-us,en;q=0.5");
get.setRequestHeader("Accept-Encoding","gzip, deflate");
get.setRequestHeader("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
get.setRequestHeader("Accept-Charset","ISO-8859-1,utf-8;q=0.7,*;q=0.7");
get.setRequestHeader("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT
5.2; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7 (.NET CLR
3.5.30729)");
//設定Cookie,必須
get.setRequestHeader("Cookie", CookieStr);
try {
int status = client.executeMethod(get);
//擷取你想要的頁面內容
content = get.getResponseBodyAsString();
System.out.println("getUrl:"+status);
System.out.println(content);
} catch (HttpException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}finally
{
get.releaseC