ImportJava.io.File;ImportJava.io.FileInputStream;Importjava.io.FileNotFoundException;ImportJava.io.FileOutputStream;Importjava.io.IOException;ImportJava.io.ObjectInputStream;Importjava.io.UnsupportedEncodingException;Importjava.net.MalformedURLException;ImportJava.text.SimpleDateFormat;Importjava.util.List;ImportJava.util.Random;Importjava.util.concurrent.Callable;ImportOrg.apache.http.client.CookieStore;ImportOrg.apache.log4j.Logger;Importcom.gargoylesoftware.htmlunit.BrowserVersion;Importcom.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;Importcom.gargoylesoftware.htmlunit.WebClient;ImportCom.gargoylesoftware.htmlunit.html.HtmlPage;ImportCom.gargoylesoftware.htmlunit.util.Cookie; Public classSinasearchcrawlercommandImplementsCallable<object> { Private StaticLogger Logger = Logger.getlogger (Sinasearchcrawlercommand.class); Private StaticString word= "Like Home"; Private StaticString cookiepath= "e:\\ learn \ \ Weibo crawler \\cookie\\cookie.file"; Private StaticString outputpath= "e:\\ learn \ \ Weibo crawler \ \"; //Public Object Call () { Public Static voidMain (string[] args) {Try{Word= Java.net.URLEncoder.encode (Word, "utf-8"); } Catch(unsupportedencodingexception E2) {//TODO auto-generated Catch blockE2.printstacktrace (); } WebClient WebClient=NewWebClient (BROWSERVERSION.FIREFOX_17); Webclient.getcookiemanager (). setcookiesenabled (true); for(inti=1;i<=100;i++) {System.out.println (Cookiepathappendrandom ()); File File=NewFile (Cookiepathappendrandom ()); if(File.exists ()) {FileInputStream fin=NULL; Try{fin=Newfileinputstream (file); } Catch(FileNotFoundException E1) {e1.printstacktrace (); } cookiestore Cookiestore=NULL; ObjectInputStream in; Try{ in=NewObjectInputStream (Fin); Cookiestore=(Cookiestore) in.readobject (); In.close (); } Catch(IOException e) {logger.error (e); } Catch(ClassNotFoundException e) {logger.error (e); } List<org.apache.http.cookie.Cookie> L =cookiestore.getcookies (); for(Org.apache.http.cookie.Cookie temp:l) {Cookie Cookie=NewCookie (Temp.getdomain (), Temp.getname (), Temp.getvalue (), Temp.getpath (), Temp.getexpirydate () , false); Webclient.getcookiemanager (). Addcookie (cookie); } /*HtmlPage page = null; try {page = Webclient.getpage ("http://weibo.cn/search/?tf=5_012"); } catch (Failinghttpstatuscodeexception e) {logger.error (e); } catch (Malformedurlexception e) {logger.error (e); } catch (IOException e) {logger.error (e); } HtmlForm form = Page.getforms (). get (0); Htmlsubmitinput button = Form.getinputbyname ("Smblog"); Form.getinputbyname ("keyword"). Setvalueattribute (Word); Logger.info ("Search:" + word); try {page = Button.Click (); } catch (IOException E1) {Logger.error (E1); }*/HtmlPage Page=NULL; Try { //logger.info ("execution:" +this);page = Webclient.getpage ("http://weibo.cn/search/mblog?hideSearchFrame=&keyword=" +word+ "&page=" +i); } Catch(failinghttpstatuscodeexception e) {logger.error (e); } Catch(malformedurlexception e) {logger.error (e); } Catch(IOException e) {logger.error (e); } SimpleDateFormat DayFormat=NewSimpleDateFormat ("YyyyMMdd"); LongStart =System.currenttimemillis (); Start=System.currenttimemillis (); String Path=NULL; File file2=NULL; Path=NewString (OutputPath + "/" +Dayformat.format (start)+ "/" + system.currenttimemillis () + file.getname () + ". html" ); File2=NewFile (OutputPath + "/" +Dayformat.format (start)); if(!file2.exists ()) file2.mkdirs (); File2=NewFile (path); System.out.println ("Current Page" +i+ ", collect to" +path); if(File2.exists ()) Logger.warn ("OutFile exit!"); Else{FileOutputStream outputstream; Try{OutputStream=NewFileOutputStream (file2); Outputstream.write (Page.getwebresponse (). getcontentasstring (). GetBytes ()); Outputstream.close (); } Catch(FileNotFoundException e) {logger.error (e); } Catch(IOException e) {logger.error (e); }} webclient.closeallwindows (); } Else{Logger.warn ("Cookiepath doesn ' t exit!!!"); } logger.info ("Execution:"); Try{Thread.Sleep (10000); } Catch(interruptedexception e) {logger.error (e); return; } } return; } Private StaticString cookiepathappendrandom () {random random=NewRandom (); returnCookiepath+random.nextint (7); } PublicSinasearchcrawlercommand (string Word, String cookiepath, String outputpath) {if(Word.contains ("&")) {Word= Word.replace ("&", "" "); } This. Word =Word; This. Cookiepath =Cookiepath; This. OutputPath =OutputPath; } @Override PublicString toString () {return"Sinasearchcrawlercommand [word=" + Word + ", outputpath=" + OutputPath + "]"; } @Override PublicObject Call ()throwsException {//TODO auto-generated Method Stub return NULL; }}
Java Weibo search keyword acquisition