1 Environment Construction:
1) Download
From Link: http://sourceforge.net/projects/htmlunit/files/htmlunit/
Download the latest bin file
2) About bin file
It consists of two parts, one is the. jar file in the Lib directory, and the Help file in the Apidocs directory (that is, the API documentation, open index-all.html, is provided as a Web page)
3) Configure Java Classpath (Pure manual method)
Copy all. jar files from the Lib directory to any directory (for example: c:\htmlunit\lib\)
Then right-click My Computer, properties, advanced-environment variable--system variables, edit the classpath, and if not, create a new one (if you run Java or compile with errors)
Be sure to add the detailed address of all the. jar files to the classpath instead of "c:\htmlunit\lib\" instead, such as.; C:\htmlunit\lib\1.jar;c:\htmlunit\lib\2.jar; is the correct wording
Be sure to write each one clearly and be aware that there is a "." At the very beginning.
2 Explanation and Description:
1). Jar is actually a compiled. class file set that can be opened using RAR decompression software. So. Jar essence is a directory
2) The official website of the tutorial some places are very strange and not intuitive, so I made some adjustments, mainly to make the output more intuitive
3) The specific use of each function in the Apidocs has been described in detail, I do not repeat here
3 Start Translation Tutorials
3.1 Get the title, XML code, text of the page
Importcom.gargoylesoftware.htmlunit.WebClient;ImportCom.gargoylesoftware.htmlunit.html.HtmlPage;Importcom.gargoylesoftware.htmlunit.BrowserVersion;Importcom.gargoylesoftware.htmlunit.html.HtmlDivision;ImportCom.gargoylesoftware.htmlunit.html.HtmlAnchor;Importcom.gargoylesoftware.htmlunit.*;Importcom.gargoylesoftware.htmlunit.WebClientOptions;ImportCom.gargoylesoftware.htmlunit.html.HtmlInput;ImportCom.gargoylesoftware.htmlunit.html.HtmlBody;Importjava.util.List; Public classhellohtmlunit{ Public Static voidMain (string[] args)throwsexception{String str; //Create a WebClientWebClient WebClient =NewWebClient (); //htmlunit support for CSS and JavaScript is not good, so please close theWebclient.getoptions (). setjavascriptenabled (false); Webclient.getoptions (). setcssenabled (false); //Get pageHtmlPage page = webclient.getpage ("http://www.baidu.com/"); //gets the title of the pagestr =Page.gettitletext (); System.out.println (str); //get the XML code for the pagestr =Page.asxml (); System.out.println (str); //get the text of a pagestr =Page.astext (); System.out.println (str); //close WebClientwebclient.closeallwindows (); }}
3.2 Open with a different version of the browser
Importcom.gargoylesoftware.htmlunit.WebClient;ImportCom.gargoylesoftware.htmlunit.html.HtmlPage;Importcom.gargoylesoftware.htmlunit.BrowserVersion;Importcom.gargoylesoftware.htmlunit.html.HtmlDivision;ImportCom.gargoylesoftware.htmlunit.html.HtmlAnchor;Importcom.gargoylesoftware.htmlunit.*;Importcom.gargoylesoftware.htmlunit.WebClientOptions;ImportCom.gargoylesoftware.htmlunit.html.HtmlInput;ImportCom.gargoylesoftware.htmlunit.html.HtmlBody;Importjava.util.List; Public classhellohtmlunit{ Public Static voidMain (string[] args)throwsexception{String str; //use Firefox to read Web pagesWebClient WebClient =NewWebClient (browserversion.firefox_24); //htmlunit support for CSS and JavaScript is not good, so please close theWebclient.getoptions (). setjavascriptenabled (false); Webclient.getoptions (). setcssenabled (false); HtmlPage Page= Webclient.getpage ("http://www.baidu.com/"); STR=Page.gettitletext (); System.out.println (str); //close WebClientwebclient.closeallwindows (); }}
3.3 Find specific elements in the page
Public classhellohtmlunit{ Public Static voidMain (string[] args)throwsexception{//Create WebClientWebClient WebClient =NewWebClient (browserversion.chrome); //htmlunit support for CSS and JavaScript is not good, so please close theWebclient.getoptions (). setjavascriptenabled (false); Webclient.getoptions (). setcssenabled (false); HtmlPage Page= (htmlpage) webclient.getpage ("http://www.baidu.com/"); //get "Baidu button" by IDHtmlinput btn = (htmlinput) Page.gethtmlelementbyid ("Su"); System.out.println (Btn.getdefaultvalue ()); //close WebClientwebclient.closeallwindows (); }}
3.4 Element Retrieval
Public classhellohtmlunit{ Public Static voidMain (string[] args)throwsexception{//Create WebClientWebClient WebClient =NewWebClient (browserversion.chrome); //htmlunit support for CSS and JavaScript is not good, so please close theWebclient.getoptions (). setjavascriptenabled (false); Webclient.getoptions (). setcssenabled (false); HtmlPage Page= (htmlpage) webclient.getpage ("http://www.baidu.com/"); //Find all Divlist<?> hblist = Page.getbyxpath ("//div"); HTMLDivision HB= (htmldivision) hblist.get (0); System.out.println (Hb.tostring ()); //find and get specific inputlist<?> inputlist = Page.getbyxpath ("//input[@id = ' su ']"); Htmlinput input= (htmlinput) inputlist.get (0); System.out.println (Input.tostring ()); //close WebClientwebclient.closeallwindows (); }}
3.5 Submit Search
Public classhellohtmlunit{ Public Static voidMain (string[] args)throwsexception{//Create WebClientWebClient WebClient =NewWebClient (browserversion.chrome); //htmlunit support for CSS and JavaScript is not good, so please close theWebclient.getoptions (). setjavascriptenabled (false); Webclient.getoptions (). setcssenabled (false); HtmlPage Page= (htmlpage) webclient.getpage ("http://www.baidu.com/"); //Get search Input box and submit search contentHtmlinput input = (htmlinput) Page.gethtmlelementbyid ("kw"); System.out.println (Input.tostring ()); Input.setvalueattribute ("Ya Minute Fly Butterfly"); System.out.println (Input.tostring ()); //Get the Search button and clickHtmlinput btn = (htmlinput) Page.gethtmlelementbyid ("Su"); HtmlPage Page2=Btn.click (); //output text for a new pageSystem.out.println (Page2.astext ()); }}
Htmlunit Official website Simple Tutorial (translation)