Package Com.baidu;
Import java.io.IOException;
Import Org.jsoup.Jsoup;
Import org.jsoup.nodes.Document;
Import org.jsoup.nodes.Element;
public class Jsoupnode {
/*
* Jsoup is a Java HTML parser that can parse a URL address and HTML text content directly.
* It provides a very labor-saving API that can be used to extract and manipulate data through dom,css and jquery-like operations.
*/
public static void Main (string[] args) throws IOException {
Htmldemo ();//html formatting
HTMLTitle ();//Get header information for a website
Htmlpost ();//Get information about the site
HtmlBody ();
Htmlquery ();
}
public static void Htmlquery () throws IOException {
Document doc = Jsoup.connect ("http://example.com")
. Data ("Query", "Java")//Request
. useragent ("Mozilla")//useragent
. Cookie ("auth", "token")//cookie
. Timeout. Post ();//Request method
System.out.println (DOC);
}
public static void HtmlBody () {
String html = "<div><p>lorem ipsum.</p>";
Document doc = jsoup.parsebodyfragment (HTML);
Element BODY = Doc.body ();
System.out.println (body);
}
public static void Htmldemo () {
Enter HTML documents directly from the string
String HTML = "+ "<body><p> here is a related article on the Jsoup Project </p></body>Document doc = jsoup.parse (HTML);
System.out.println (DOC);
}
public static void HTMLTitle () throws IOException {
Loading HTML documents directly from URLs (header information)
Document doc = Jsoup.connect ("http://www.baidu.com/"). get ();
String title = Doc.title ();
System.out.println (title);
}
public static void Htmlpost () throws IOException {
Loading HTML documents directly from URLs
Document doc = Jsoup.connect ("http://www.baidu.com/")
. Data ("Query", "Java")//Request Parameters
. useragent ("I ' m Jsoup")//settings user-agent
. Cookies ("auth", "token")//Set Cookies
. Timeout (3000)//Set connection time-out
. Post (); Using the POST method to access URLs
System.out.println (DOC);
}
}
Jsop Getting Started Program