We can first download a webpage file on our own hard disk, then read the file through the URL address, and use this expression to compare whether the read string is an email address to capture it! The legend is that spam first crawls others' email addresses on the webpage. The specific applet is as follows:
Import java. Io. bufferedreader;
Import java. Io. filenotfoundexception;
Import java. Io. filereader;
Import java. Io. ioexception;
Import java. util. RegEx. pattern;
Import java. util. RegEx .*;
Public class spider {
Public static void main (string [] ARGs ){
Try {
Bufferedreader Buf = new bufferedreader (New filereader ("F: \ A. MHT "));
String line = "";
While (line = Buf. Readline ())! = NULL)
Pattern (line );
} Catch (filenotfoundexception e ){
// Todo auto-generated Catch Block
E. printstacktrace ();
} Catch (ioexception e ){
// Todo auto-generated Catch Block
E. printstacktrace ();
}
}
Private Static void pattern (string line ){
Pattern P = pattern. compile ("[\ W [. _] + @ [\ W [. _] + \\. [\ W] + "); // Regular Expression of the email address;
Matcher M = P. matcher (line );
While (M. Find ()){
System. Out. println (M. Group ());
}
/* While (P. matcher (line )! = NULL ){
}*/
}
}