Crawlerpicture.java file
Package Com.lym.crawlerdemo;import Java.io.datainputstream;import Java.io.file;import java.io.FileOutputStream; Import Java.io.ioexception;import java.io.inputstream;import Java.net.url;import Java.util.arraylist;import Java.util.list;import Org.jsoup.jsoup;import Org.jsoup.nodes.document;import Org.jsoup.select.elements;import com.lym.mode.picture;/** * Crawl Beautiful pictures from http://m.qqba.com/* @author Administrator * */public class Crawlerpicture {public fi nal static int startpage = 301;public final static int endpage = 500;//number of pages crawled/** * Gets the src and alt attribute values of the picture * @return * @throws IO Exception */public static list<picture> Getpictureurl () throws Ioexception{int number = 1; List<picture> pics = new arraylist<picture> ();//stores the URL address of all images retrieved for (int i = startpage; i < endpage; i++) { String url = "http://m.qqba.com/people/list/" +i+ ". htm";D ocument doc = Null;doc = Jsoup.connect (URL). get ();// Gets the page document elements divlist = Doc.body (). Select ("Div.image-cell"); for (int j = 0; J < Divlist.size (); j+ +) {Elements imglist = Divlist.get (j). Select ("img");//all IMG tags within a web page for (int k = 0; k < imglist.size (); k++) {picture P IC = new picture ();p Ic.setid (number++);p ic.setsrc (Imglist.get (k). attr ("src"));p Ic.setalt (Imglist.get (k). attr ("alt "));p Ics.add (pic);}} return pics;} /** * Get Picture input stream * @param picurl image URL Address * @return * @throws ioexception */public static InputStream Getpictureinputstream (String picurl) throws Ioexception{url url = new URL (picurl);D atainputstream dis = new DataInputStream (Url.openstream ()); Get the input stream of the picture return dis;} /** * Save the picture to a local disk * @param number * @throws ioexception */public static void SavePicture (InputStream in, picture pic) Throws ioexception{string Newimgurl = "d:/picture/" +pic.getalt () + "--" +pic.getid () + ". jpg";// Image storage path on disk FileOutputStream fos = new FileOutputStream (new File (Newimgurl)); byte[] buf = new Byte[1024];int len = -1;while (len = In.read (buf)) >0) {fos.write (buf, 0, Len);} Fos.close ();} /** * Test * @param args */public static void main (string[] args) {try {list<picture> pics = Getpictureurl (); System.out.println ("Picture is downloading ..."); for (int i = 0; I < pics.size (); i++) {Picture pic = Pics.get (i); String Picurl = Pic.getsrc (); InputStream in = Getpictureinputstream (Picurl); SavePicture (in, pic); In.close ();} System.out.println ("Download done! ");} catch (IOException e) {e.printstacktrace ();}}}
Picture.java file
Package Com.lym.mode;public class Picture {/** * image number */private int id;/** * Picture address */private String src;/** * Picture description information */priv ATE String alt;public int getId () {return ID;} public void setId (int id) {this.id = ID;} Public String getsrc () {return src;} public void Setsrc (String src) {this.src = src;} Public String Getalt () {return alt;} public void Setalt (String alt) {this.alt = alt;} @Overridepublic String toString () {return "picture [id=" + ID + ", src=" + src + ", alt=" + ALT + "]";}}
Copyright NOTICE: This article for Bo Master original article, without Bo Master permission not reproduced.
A simple web crawler-crawl beautiful pictures from the internet