First, import Jsoup core jar package Jsoup-xxx.jar
Jar Package: jsoup-1.8.2.jar
English API address: http://www.open-open.com/jsoup/parsing-a-document.htm Second, Java use Jsoup Crawl Web source, and bulk download pictures
Package Com.dgh.test;import Java.io.file;import Java.io.fileoutputstream;import java.io.ioexception;import Java.io.inputstream;import Java.io.unsupportedencodingexception;import Java.net.httpurlconnection;import Java.net.url;import Java.net.urlencoder;import Org.jsoup.jsoup;import Org.jsoup.nodes.document;import Org.jsoup.nodes.element;import org.jsoup.select.elements;/** * * Crawl Web Resources * @author Wangcunhuazi * */public class Jsoup Test {//The Web page address where the resource is located private static string ResourceUrl = "http://www.csdn.net/";//after the resource is downloaded, save the local file path to the private static string Downloadfilepath = "e://downloadimage//";/** * * Download image to local hard drive FilePath * @param filePath file path of local save picture * @param imgUr L Picture of the extranet address * @throws unsupportedencodingexception * */public static void Downimages (String filepath,string imgurl) throws unsupportedencodingexception {//Picture URL in the previous section: for example "http://images.csdn.net/" String beforeurl = imgurl.substring (0, Imgurl.lastindexof ("/") +1);//The following part of the picture URL: for example "20150529/pp6a7429_ copy 1.jpg" String fileName = ImguRl.substring (Imgurl.lastindexof ("/") +1);//filename after encoding, the space becomes the character "+" String newfilename = urlencoder.encode (filename , "UTF-8");//Replace the character "+" in the filename after encoding with a space in UTF-8: "%20" NewFileName = Newfilename.replaceall ("\\+", "\\%20");// Urlimgurl after encoding = Beforeurl + newfilename;try {//Create files directory file = new file (FilePath); if (!files.exists ()) {Files.mkdirs ( );} Get URL url = new URL (imgurl);//link network address httpurlconnection connection = (httpurlconnection) url.openconnection ();// Gets the output stream of the link inputstream is = Connection.getinputstream ();//create file, filename is the file name before encoding file File = new file (FilePath + filename) ;//write the file according to the input stream fileoutputstream out = new FileOutputStream (file); int i = 0;while ((i = Is.read ())! =-1) {out.write (i);} Out.close (); Is.close ();} catch (Exception e) {e.printstacktrace ();}} public static void Main (string[] args) throws IOException {//Gets and parses an HTML document from a Web site, and the Jsoup API has a description of this method. Document document = Jsoup.connect (ResourceUrl). get ();//system.out.println (document);//Get all IMG tags elements elements = Document.getelementsbytag ("IMG "); for (Element element:elements) {//Gets the contents of the SRC attribute of each img tag, i.e. the picture address, plus" ABS: "means the absolute path of string imgsrc = Element.attr (" Abs:src ") ;//Download the picture file to your computer's local hard drive System.out.println ("Downloading Picture:-----------" + imgsrc);d ownimages (Downloadfilepath, IMGSRC); System.out.println ("Picture download Completed:-----------" + imgsrc); System.out.println ("------------------------------------------------------------------------------------------- ------------------");} SYSTEM.OUT.PRINTLN ("Total downloaded" + elements.size () + "files (not Heavy)");}}
More Jsoup How to use the method: http://blog.csdn.net/wangcunhuazi/article/details/46237277
http://blog.csdn.net/wangcunhuazi/article/details/46237211
http://blog.csdn.net/wangcunhuazi/article/details/46237325
Java use Jsoup Crawl Web source, and bulk download pictures