Package Org. ZZ. test; <br/> Import org.html parser. nodefilter; <br/> Import org.html parser. parser; <br/> Import org.html parser. filters. tagnamefilter; <br/> Import org.html parser. tags. linktag; <br/> Import org.html parser. util. nodelist; <br/> Import org.html parser. util. parserexception; <br/> Import org.html parser. visitors. htmlpage; <br/> public class parserhtmlpage {<br/> Public nodelist Parser (string URL) throws parserexception {<br/>/** create a parser object based on the URL, | parser = parser. createparser (URL, encoding) **/<br/> parser = new Parser (URL); </P> <p>/** sets the encoding, it must be the same as the URL encoding. Otherwise, the **/<br/> parser is suspended. setencoding ("UTF-8"); </P> <p>/** construct an HTML page object **/<br/> htmlpage = new htmlpage (parser ); <br/> parser. visitallnodeswith (htmlpage); </P> <p>/** get all nodes under the body **/<br/> nodelist list = htmlpage. getbody (); </P> <p>/** create a filter to filter nodes. **/<br/> nodefilter filter = new tagnamefilter (""); </P> <p>/** the filtered node **/<br/> List = List. extractallnodesthatmatch (filter, true); </P> <p> for (int c = 0; C <list. size (); C ++) {<br/> linktag = (linktag) list. elementat (c); <br/> system. out. println ("[" + linktag. getstringtext () + "]" + linktag. getattribute ("href"); // obtain the value of the href attribute <br/> // system. out. println (linktag. getstringtext (); // The text that gets the link <br/>}< br/> return list; <br/>}</P> <p> Public static void main (string [] ARGs) throws parserexception {<br/> parserhtmlpage pp = new parserhtmlpage (); <br/> PP. parser ("http://www.baidu.com"); <br/>}< br/>