1 /*2 * Web crawler: In fact, a program is used to obtain data that conforms to the specified rules on the Internet. 3 * 4 * Crawl email address. 5 * 6 */7 Public classRegexTest2 {8 9 /**Ten * @paramargs One * @throwsIOException A */ - Public Static voidMain (string[] args)throwsIOException { - the -list<string> list =Getmailsbyweb (); - - for(String mail:list) { + System.out.println (mail); - } + } A at Public StaticList<string> Getmailsbyweb ()throwsIOException { - - //1. Read the source file. - //BufferedReader bufr = new BufferedReader (New FileReader ("c:\\mail.html")); - -URL url =NewURL ("http://192.168.1.100:8080/myweb/mail.html"); in -BufferedReader Bufin =NewBufferedReader (NewInputStreamReader (Url.openstream ())); to + //2. Match the data that is read to the rule. Gets the data that conforms to the rule from. -String Mail_regex = "\\[email protected]\\w+ (\\.\\w+) +"; the *list<string> list =NewArraylist<string>(); $ Panax Notoginseng -Pattern p =Pattern.compile (Mail_regex); the +String line =NULL; A the while((Line=bufin.readline ())! =NULL){ + -Matcher m =P.matcher (line); $ while(M.find ()) { $ //3, the data that conforms to the rule is stored in the collection. - List.add (M.group ()); - } the - }Wuyi returnlist; the } - Wu Public StaticList<string> Getmails ()throwsioexception{ - About //1. Read the source file. $BufferedReader BUFR =NewBufferedReader (NewFileReader ("c:\\mail.html")); - - //2. Match the data that is read to the rule. Gets the data that conforms to the rule from. -String Mail_regex = "\\[email protected]\\w+ (\\.\\w+) +"; A +list<string> list =NewArraylist<string>(); the - $Pattern p =Pattern.compile (Mail_regex); the theString line =NULL; the the while((Line=bufr.readline ())! =NULL){ - inMatcher m =P.matcher (line); the while(M.find ()) { the //3, the data that conforms to the rule is stored in the collection. About List.add (M.group ()); the } the the } + returnlist; - the }Bayi the}
Regular Expressions--web crawler