Package com.blue.common.util;
Import java.util.ArrayList;
Import Java.util.HashSet;
Import java.util.List;
Import Java.util.Set;
Import Java.util.regex.Matcher;
Import Java.util.regex.Pattern;
public class Getimagepathutil {
/**
* Get the address of the picture on the webpage
*/
public static set<string> Getimgstr (String htmlstr) {
Set<string> pics = new hashset<string> ();
String img = "";
Pattern P_image;
Matcher M_image;
String regex_img = "]*?> "; Image link Address
String regex_img = "]*?> ";
P_image = Pattern.compile
(Regex_img, pattern.case_insensitive);
M_image = P_image.matcher (HTMLSTR);
while (M_image.find ()) {
Get Data
img = M_image.group ();
Match src data in
Matcher m = pattern.compile ("src\\s*=\\s*\"? *?) (\ "|>|\\s+)"). Matcher (IMG);
while (M.find ()) {
Pics.add (M.group (1));
}
}
return pics;
}
}
Java gets the collection of addresses of the images inside the HTML page code