Import Java.io.BufferedReader;
Import Java.io.FileReader;
Import Java.io.InputStreamReader;
Import Java.net.URL;
Import java.net.URLConnection;
Import Java.util.regex.Matcher;
Import Java.util.regex.Pattern;
public class Getmail {
public static void Main (string[] args) throws Exception {
Getmails ();
Getmails_url ();
}
public static void Getmails_url () throws Exception {
URL url = new URL ("https://wenku.baidu.com/view/ce81b0a1ddccda38366baf61.html");//This is the page to crawl
URLConnection conn = Url.openconnection ();
BufferedReader bufr = new BufferedReader (New InputStreamReader (Conn.getinputstream ()));
String line = null;
String maileres = "[\u4e00-\u9fa5]+";//Store the rules that need to be set
Match mailbox: "\\[email protected]\\w+ (\\.\\w+) +"
Match Chinese characters: "[\u4e00-\u9fa5]+";
Match QQ number: "[1-9][0-9]{4,14}"
QQ e-mail: "(.) [Email protected] (.) +(\\. [a-z]+] {1,} ";
Pattern p = pattern.compile (maileres);
while ((Line=bufr.readline ())!=null) {
Matcher m = p.matcher (line);
while (M.find ()) {
System.out.println (M.group ());
}
}
}
Java Crawl page QQ number, mailbox number, etc.