Package com.company.reptile;
Import Java.io.BufferedReader;
Import Java.io.DataInputStream;
Import Java.io.File;
Import Java.io.FileOutputStream;
Import java.io.IOException;
Import Java.io.InputStreamReader;
Import Java.net.URL;
Import java.net.URLConnection;
Import java.util.ArrayList;
Import java.util.List;
Import Java.util.regex.Matcher;
Import Java.util.regex.Pattern; /*** * @author God * @see java Crawl site beauty images * @info A simple reptile does not involve breadth first and depth priority just as understanding/public class Javareptileutil {//
Address private static final String Web_site = "http://www.4493.com"; Get the IMG tag regular private static final String Image_tag_reg = "]*?> "; Get the regular private static final String Image_src_reg = "http:\" for the SRC path? (.*?)
(\ "|>|\\s+"); /** * Test Small crawler * @param args * @throws Exception/public static void main (string[] args) throws Exception {//
1. Get Beauty Pictures official address String Htmlinfo = Gethtmlinfo (web_site);
Get Picture URL link address list<string> imagesrc = GETIMAGESRC (Htmlinfo); Download Beauty pictures downloadimage (IMAGESRC); /** * Parse HTML page * @param host * @return * @throws Exception/public static string Gethtmlinfo (String host)
Throws exception{URL url=new url (host);
URLConnection urlconnection = Url.openconnection ();
BufferedReader BufferedReader = new BufferedReader (New InputStreamReader (Urlconnection.getinputstream ()));
String Buffer=null;
StringBuffer sbf=new StringBuffer ();
while ((Buffer=bufferedreader.readline ())!=null) {sbf.append (buffer);
return sbf.tostring (); /** * resolves all the SRC attributes of the image label * @param args * @throws Exception/public static list<string> GETIMAGESRC (
String htmlinfo) {Matcher matcher_image = Pattern.compile (Image_tag_reg). Matcher (Htmlinfo);
list<string> imagesrc = new arraylist<string> ();
while (Matcher_image.find ()) {Matcher matcher_src = Pattern.compile (Image_src_reg). Matcher (Matcher_image.group ()); while (Matcher_src.find ()) {Imagesrc.add (Matcher_src.grouP (). substring (0, Matcher_src.group (). Length ()-1));
} return IMAGESRC; /** * Download Crawl URL link * @param imagesrc * @throws Exception/public static void Downloadimage (LIST<STRING&G T
IMAGESRC) throws IOException {for (String src:imagesrc) {URL URL =null;
try {url = new URL (src);
catch (IOException e) {continue;
}//Next in resource datainputstream DataInputStream = new DataInputStream (Url.openstream ());
FileOutputStream FileOutputStream = new FileOutputStream (New File ("f:\\beauty\\" + netutil.getstrname (src));
byte[] bytes = new byte[1024];
int length = 0;
while (length = datainputstream.read (bytes))!=-1) {fileoutputstream.write (bytes, 0, length);
System.out.println ("Download ...");
} System.out.println ("Download complete ...");
Datainputstream.close ();
Fileoutputstream.close ();
}
}
}
//
Package com.company.reptile;
public class Netutil {
/**
* Get URL link picture name
* @param URL
* @return/public
static String Getstrname (String url) {
string[] Sarry = Url.split ("/");
return sarry[sarry.length-1];
}
Run results
Image