Use Java to crawl all the pictures on a Web page:
With two regular expressions:
1, matching the HTML IMG tags in the regular: ]*?>
2, matching the IMG tags in src in the HTTP path of the regular: Http:\ "? (.*?) (\ "|>|\\s+")
Realize:
Package org.swinglife.main;
Import Java.io.File;
Import Java.io.FileOutputStream;
Import Java.io.InputStream;
Import Java.net.URL;
Import java.net.URLConnection;
Import java.util.ArrayList;
Import java.util.List;
Import Java.util.regex.Matcher;
Import Java.util.regex.Pattern; /*** * Java Crawl network picture * @author Swinglife * */public class Catchimage {//address private static final String UR
L = "Http://www.csdn.net";
Code private static Final String ecoding = "UTF-8"; Get the IMG tag regular private static final String Imgurl_reg = "]*?> "; Get the regular private static final String Imgsrc_reg = "http:\" for the SRC path? (.*?)
(\ "|>|\\s+");
public static void Main (string[] args) throws Exception {catchimage cm = new catchimage ();
Get HTML text content String HTML = cm.gethtml (URL);
Get Picture label list<string> Imgurl = Cm.getimageurl (HTML);
Get picture src address list<string> imgsrc = CM.GETIMAGESRC (Imgurl); Download picturePiece cm.
Download (IMGSRC); /*** * Obtain HTML content * * @param URL * @return * @throws Exception/private String get
HTML (String url) throws Exception {URL uri = new URL (URL);
URLConnection connection = Uri.openconnection ();
InputStream in = Connection.getinputstream ();
byte[] buf = new byte[1024];
int length = 0;
StringBuffer sb = new StringBuffer ();
while (length = In.read (buf, 0, buf.length)) > 0) {sb.append (new String (BUF, ecoding));
} in.close ();
return sb.tostring (); /*** * Get imageurl Address * * @param HTML * @return/private list<string> Getimageurl (stri
ng HTML) {Matcher Matcher = Pattern.compile (Imgurl_reg). Matcher (HTML);
list<string> Listimgurl = new arraylist<string> ();
while (Matcher.find ()) {Listimgurl.add (Matcher.group ());
return listimgurl; /*** * Get imagesrc address * * @param Listimageurl * @return * * Private list<string> GETIMAGESRC (list<string> listimageurl) {list& Lt
string> listimgsrc = new arraylist<string> ();
for (String image:listimageurl) {Matcher Matcher = Pattern.compile (Imgsrc_reg). Matcher (image);
while (Matcher.find ()) {Listimgsrc.add (Matcher.group (). substring (0, Matcher.group (). Length ()-1));
} return LISTIMGSRC;
/*** * Download Picture * * * @param listimgsrc * * private void Download (list<string> listimgsrc) { try {for (string url:listimgsrc) {string imagename = Url.substring (Url.lastindexof ("/") + 1, Url.leng
th ());
URL uri = new URL (URL);
InputStream in = Uri.openstream ();
FileOutputStream fo = new FileOutputStream (new File (imagename));
byte[] buf = new byte[1024];
int length = 0;
System.out.println ("Start download:" + URL); while (length = In.read (buf, 0, buf.length))!=-1) {fo.write (buf, 0, length);
} in.close ();
Fo.close ();
System.out.println (imagename + "Download Complete");
The catch (Exception e) {System.out.println ("Download Failed");
}
}
}
The above is the entire content of this article, I hope to help you learn, but also hope that we support the cloud habitat community.