1 Packagecom.entrym.crawler.test;2 3 ImportJava.util.HashMap;4 ImportJava.util.Map;5 6 Importorg.apache.commons.lang.StringUtils;7 ImportOrg.apache.http.client.methods.HttpGet;8 ImportOrg.apache.http.client.methods.HttpPost;9 ImportOrg.jsoup.Jsoup;Ten Importorg.jsoup.nodes.Document; One ImportOrg.slf4j.Logger; A Importorg.slf4j.LoggerFactory; - - Importcom.entrym.crawler.constans.CommonConstants; the ImportCom.entrym.crawler.util.CommonUtil; - Importcom.entrym.crawler.util.PowerHttpClient; - ImportCom.entrym.crawler.util.verifyCode.Captcha; - + /** - * Central bank credit report acquisition crawler + * @ClassName: Pbccrccrawler A * @Description: TODO (Here's a word describing the role of this class) at * - */ - Public classPbccrccrawler - { - - Private StaticLogger Logger = Loggerfactory.getlogger (Pbccrccrawler.class); in Private StaticString loginurl = "https://ipcrs.pbccrc.org.cn/page/login/loginreg.jsp"; - Private StaticString Loginposturl = "Https://ipcrs.pbccrc.org.cn/login.do"; to Private StaticString Welcomeurl = "Https://ipcrs.pbccrc.org.cn/welcome.do"; + Private StaticString Reporturl = "Https://ipcrs.pbccrc.org.cn/reportAction.do"; - Private StaticString Summaryreporturl = "Https://ipcrs.pbccrc.org.cn/summaryReport.do"; the Private StaticString Simplereporturl = "Https://ipcrs.pbccrc.org.cn/simpleReport.do"; * $ Private StaticString host = "ipcrs.pbccrc.org.cn";Panax Notoginseng Private StaticString useragent = "mozilla/5.0 (Windows NT 10.0; WOW64; trident/7.0; rv:11.0) Like Gecko "; - the Privatepowerhttpclient powerhttpclient; + Private Booleanislogin=false;//Login Status A Private Booleanreport0=false;//personal credit Information tips the Private Booleanreport1=false;//Summary of personal credit information + Private BooleanReport2=false;//Personal Credit Report - $ PrivateString task_id=NULL; $ - /** - * Login the * @Title: Login - * @Description: TODO (here is a word describing the effect of this method)Wuyi * @param @paramLoginName the * @param @paramPassword Parameters - * @returnvoid return type Wu * @throws - */ About Publicstring Login (string loginname, string password) $ { - Try - { -Powerhttpclient =Newpowerhttpclient (); A //Open Home +String HTML =powerhttpclient.gettostring (loginurl); theLogger.debug ("----Home---", HTML); - $Document doc =jsoup.parse (HTML); theString token = Doc.select ("Input[name=org.apache.struts.taglib.html.token]"). Val (); theString method = Doc.select ("Input[name=method]"). Val (); theString date = Doc.select ("input[name=date]"). Val (); theString Imgurl = Doc.select ("IMG[ID=IMGRC]"). attr ("src"); -String Imgdata = ""; in if(Stringutils.isnotblank (imgurl)) the { the //Process Verification Code AboutImgurl = "https://ipcrs.pbccrc.org.cn" +Imgurl; theLogger.info (task_id+ "Captcha imgurl = {}", Imgurl); theHttpGet HttpGet =Powerhttpclient.gethttpget (imgurl); theHttpget.addheader ("Host", host); +Httpget.addheader ("User-agent", useragent); -Httpget.addheader ("Referer", loginurl); the Bayi byte[] Imgbyte =Powerhttpclient.gettobytearray (httpget); theCaptcha Captcha =NewCaptcha ("img"); theCommonutil.writebytearraytofile (Imgbyte, commonconstants.runtime_tempimg_folder+Captcha.getfilepath ()); -System.out.println (commonconstants.runtime_tempimg_folder+Captcha.getfilepath ()); -Imgdata =Commonutil.consolescanner (); the } the the //Submit Login themap<string, string> map =NewHashmap<>(); -Map.put ("Org.apache.struts.taglib.html.TOKEN", token); theMap.put ("Method", method); theMap.put ("Date", date); theMap.put ("[Email protected]@_"), imgdata);94Map.put ("LoginName", loginname); theMap.put ("Password", password); the theHttpPost HttpPost =Powerhttpclient.gethttppost (loginposturl);98Httppost.addheader ("Host", host); AboutHttppost.addheader ("User-agent", useragent); -Httppost.addheader ("Referer", loginurl);101String HTML1 =Powerhttpclient.postwithmap (map,httppost);102Logger.debug ("----Submit Login Results---", HTML1);103 104HttpGet HttpGet1 =Powerhttpclient.gethttpget (welcomeurl); theHttpget1.addheader ("Host", host);106Httpget1.addheader ("User-agent", useragent);107Httpget1.addheader ("Referer", Loginposturl);108String HTML2 = powerhttpclient.gettostring (HttpGet1, "");109Logger.info ("----Welcome page---{}", HTML2); the Parselogin (HTML2);111 returnHTML2; the}Catch(Exception e) {113Logger.error (task_id+ "---login exception: {}", Commonutil.getexceptiontrace (e)); the } the return NULL; the }117 118 119 /** - * Resolve login and report status121 * @Title: Parselogin122 * @Description: TODO (here is a word describing the effect of this method)123 * @param @paramHTML Parameters124 * @returnvoid return type the * @throws126 */127 Private voidparselogin (String html) - {129 if(Stringutils.isblank (HTML)) { the return;131 } the if(Html.contains ("Welcome to login personal credit information Service platform")) {133IsLogin =true;134Logger.info (task_id+ "----Congratulations, login successful---");135 }136 137 }138 139 $ 141 Public Static voidMain (string[] args) {142Pbccrccrawler Pbccrccrawler =NewPbccrccrawler ();143Pbccrccrawler.login ("88888", "9999999");144 }145 146 147}
Java crawler. Sign in to the central bank's credit website