Java Web page data capture

Source: Internet
Author: User

I haven't studied encrypted websites yet. I don't know if I can capture them. Now I only want to capture webpage data for some unencrypted websites. At the beginning, I thought many websites could be crawled, but I found that many of them were encrypted. I thought some addresses could be used to detect their data changes through webpage data detection tools, however, only data displayed through js can be detected, and encrypted websites cannot be crawled. Hi, let's talk about this later.
[Java]
Import java.net .*;
Import java. io .*;
Import java. util. regex .*;
Public class Capture {
Public static void main (String args []) throws Exception {
System. out. println ********** **************");
System. out. println ("My location is:" + new GrabMobile (). grabMobileLocation ("15023141745 "));
System. out. println ("mobile phone card type:" + new GrabMobile (). grabMobileType ("15023141745 "));
System. out. println ("My zip code is:" + new GrabMobile (). grabMobilePost ("15023141745 "));
System. out. println ********** **************");
System. out. println ("My gender is:" + new GrabIdentity (). grabIdentitySex ("362203199208243575 "));
System. out. println ("My birthday is:" + new GrabIdentity (). grabIdentityBirth ("362203199208243575 "));
System. out. println ("My hometown is:" + new GrabIdentity (). grabIdentityHome ("362203199208243575 "));
}
}
Class GrabMobile {
Public String grabMobileLocation (String m) throws Exception {
String strUrl = "http://www.ip138.com: 8080/search. asp? Action = mobile & mobile = "+ m;
URL url = new URL (strUrl );
HttpURLConnection httpUrlCon = (HttpURLConnection) url. openConnection ();
InputStreamReader inRead = new InputStreamReader (httpUrlCon. getInputStream (), "GBK ");
BufferedReader bufRead = new BufferedReader (inRead );
StringBuffer strBuf = new StringBuffer ();
String line = "";
While (line = bufRead. readLine ())! = Null ){
StrBuf. append (line );
}
String strStart = "";
String strEnd = "card type ";
String strAll = strBuf. toString ();

Int start = strAll. indexOf (strStart );

Int end = strAll. indexOf (strEnd );

String result = strAll. substring (start + 42, end-33 );
Result = drawChMob (result );
Return result;
}
Public String grabMobileType (String m) throws Exception {
String strUrl = "http://www.ip138.com: 8080/search. asp? Action = mobile & mobile = "+ m;
URL url = new URL (strUrl );
HttpURLConnection httpUrlCon = (HttpURLConnection) url. openConnection ();
InputStreamReader inRead = new InputStreamReader (httpUrlCon. getInputStream (), "GBK ");
BufferedReader bufRead = new BufferedReader (inRead );
StringBuffer strBuf = new StringBuffer ();
String line = "";
While (line = bufRead. readLine ())! = Null ){
StrBuf. append (line );
}
String strStart = "card type ";
String strEnd = "<TD align = \" center \ "> area number </TD> ";
String strAll = strBuf. toString ();

Int start = strAll. indexOf (strStart );

Int end = strAll. indexOf (strEnd );

String result = strAll. substring (start + 12, end );
Result = drawChMob (result );
Result = result. substring (1 );
Return result;
}
Public String grabMobilePost (String m) throws Exception {
String strUrl = "http://www.ip138.com: 8080/search. asp? Action = mobile & mobile = "+ m;
URL url = new URL (strUrl );
HttpURLConnection httpUrlCon = (HttpURLConnection) url. openConnection ();
InputStreamReader inRead = new InputStreamReader (httpUrlCon. getInputStream (), "GBK ");
BufferedReader bufRead = new BufferedReader (inRead );
StringBuffer strBuf = new StringBuffer ();
String line = "";
While (line = bufRead. readLine ())! = Null ){
StrBuf. append (line );
}
String strStart = "Postal editing ";
String strEnd = "more detailed ..";
String strAll = strBuf. toString ();

Int start = strAll. indexOf (strStart );

Int end = strAll. indexOf (strEnd );

String result = strAll. substring (start + 40, end-55 );
Return result;
}
Public String drawChMob (String str ){
StringBuffer strBuf = new StringBuffer ();
String regex = "([\ u4e00-\ u9fa5] + )";
Matcher matcher = Pattern. compile (regex). matcher (str );
While (matcher. find ()){
StrBuf. append (matcher. group (0). toString ();
}
Return strBuf. toString ();
}
}
Class GrabIdentity {
Public String grabIdentitySex (String userid) throws Exception {
String strUrl = "http://qq.ip138.com/idsearch/index.asp? Action = idcard & userid = "+ userid +" & B1 = % B2 % E9 + % D1 % AF ";
URL url = new URL (strUrl );
HttpURLConnection httpUrlCon = (HttpURLConnection) url. openConnection ();
InputStreamReader inRead = new InputStreamReader (httpUrlCon. getInputStream (), "GBK ");
BufferedReader bufRead = new BufferedReader (inRead );
StringBuffer strBuf = new StringBuffer ();
String line = "";
While (line = bufRead. readLine ())! = Null ){
StrBuf. append (line );
}
String strStart = "Don ";
String strEnd = "Date of Birth ";
String strAll = strBuf. toString ();

Int start = strAll. indexOf (strStart );

Int end = strAll. indexOf (strEnd );

String result = strAll. substring (start + 7, end );
Result = drawCh (result );
Return result;
}
Public String grabIdentityBirth (String userid) throws Exception {
String strUrl = "http://qq.ip138.com/idsearch/index.asp? Action = idcard & userid = "+ userid +" & B1 = % B2 % E9 + % D1 % AF ";
URL url = new URL (strUrl );
HttpURLConnection httpUrlCon = (HttpURLConnection) url. openConnection ();
InputStreamReader inRead = new InputStreamReader (httpUrlCon. getInputStream (), "GBK ");
BufferedReader bufRead = new BufferedReader (inRead );
StringBuffer strBuf = new StringBuffer ();
String line = "";
While (line = bufRead. readLine ())! = Null ){
StrBuf. append (line );
}
String strStart = "Date of Birth: </td> <td class = \" tdc2 \ "> ";
String strEnd = "</td> <tr> <td class = ";
String strAll = strBuf. toString ();

Int start = strAll. indexOf (strStart );
Int end = strAll. indexOf (strEnd );

String result = strAll. substring (start + 27, end );
Return result;
}
Public String grabIdentityHome (String userid) throws Exception {
String strUrl = "http://qq.ip138.com/idsearch/index.asp? Action = idcard & userid = "+ userid +" & B1 = % B2 % E9 + % D1 % AF ";
URL url = new URL (strUrl );
HttpURLConnection httpUrlCon = (HttpURLConnection) url. openConnection ();
InputStreamReader inRead = new InputStreamReader (httpUrlCon. getInputStream (), "GBK ");
BufferedReader bufRead = new BufferedReader (inRead );
StringBuffer strBuf = new StringBuffer ();
String line = "";
While (line = bufRead. readLine ())! = Null ){
StrBuf. append (line );
}
String strStart = "Certificate: </td> <td class = \" tdc2 \ "> ";
String strEnd = "<br/> </td> </tr> <td class = \" tdc3 \ "valign = \" top \ "align = \" right \ "> part or ";
String strAll = strBuf. toString ();

Int start = strAll. indexOf (strStart );
Int end = strAll. indexOf (strEnd );

String result = strAll. substring (start + 31, end );
Return result;
}
Public String drawCh (String str ){
StringBuffer strBuf = new StringBuffer ();
String regex = "([\ u4e00-\ u9fa5] + )";
Matcher matcher = Pattern. compile (regex). matcher (str );
If (matcher. find ()){
Str = strBuf. append (matcher. group (0). toString ();
}
Return str;
}
}

The converted android applet will be uploaded later. You can query the mobile phone number and ID card.

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.