Import Java.io.ioexception;import Java.net.malformedurlexception;import Com.gargoylesoftware.htmlunit.browserversion;import Com.gargoylesoftware.htmlunit.failinghttpstatuscodeexception;import Com.gargoylesoftware.htmlunit.nicelyresynchronizingajaxcontroller;import Com.gargoylesoftware.htmlunit.silentcsserrorhandler;import Com.gargoylesoftware.htmlunit.webclient;import Com.gargoylesoftware.htmlunit.html.htmlpage;public class Worldbankcrawl {public static void main (string[] args) throws Failinghttpstatuscodeexception, Malformedurlexception, IOException {WebClient WebClient = new WebClient (Browserversio N.FIREFOX_24); Webclient.setcsserrorhandler (New Silentcsserrorhandler ()); Webclient.setajaxcontroller (New Nicelyresynchronizingajaxcontroller ()); Webclient.getoptions (). Setcssenabled (True); Webclient.getoptions (). setredirectenabled (false); Webclient.getoptions (). setappletenabled (false); Webclient.getoptions (). Setjavascriptenabled (True); WEbclient.getoptions (). Setpopupblockerenabled (True); Webclient.getoptions (). SetTimeout (10000); HtmlPage page = webclient.getpage ("http://huaban.com/favorite/home/"); System.out.println (Page.asxml ()); Webclient.closeallwindows ();}}
Htmlunit emulate browser crawl data (including Ajax)