Regular Expression, Web Crawler

Source: Internet
Author: User

Package cn. itcast. regex. demo;/** regular expression. ** Regular expressions are used to manipulate string data. * Specific symbols are used for representation. * In order to grasp regular expressions, we must learn some symbols. ** Although simplified, it has poor readability. * ***/Public class RegexDemo {/*** @ param args */public static void main (String [] args) {String qq = "123k4567 "; // checkQQ (qq); String regex = "[1-9] [0-9] {4, 14}"; // regular expression. // Boolean B = qq. matches (regex); // System. out. println (qq + ":" + B); // String str = "aoooooooooob"; // String reg = "ao {4, 6} B"; // boolean B = str. matches (reg); // System. out. println (str + ":" + B);}/** requirement: Define a function to verify the QQ number. * Requirement: Length: 5 ~ 15. only numbers are allowed. 0 cannot start with */public static void checkQQ (String qq) {int len = qq. length (); if (len> = 5 & len <= 15) {if (! Qq. startsWith ("0") {try {long l = Long. parseLong (qq); System. out. println (l + ": Correct");} catch (NumberFormatException e) {System. out. println (qq + ": contains invalid characters") ;}} else {System. out. println (qq + ": cannot start with 0") ;}} else {System. out. println (qq + ": incorrect length ");}}}


Package cn. itcast. regex. demo; import java. util. regex. matcher; import java. util. regex. pattern; public class RegexDemo2 {/*** @ param args */public static void main (String [] args) {/** common operations of regular expressions on strings: * 1, match. * The matches method in the String class is actually used. ** 2. Cut. * Actually, the split method in the String class is used. ** 3. replace. * Actually, the replaceAll () method in the String class is used. ** 4. obtain it. **/FunctionDemo_4 ();}/** get * encapsulate the regular rules into objects. * Pattern p = Pattern. compile ("a * B"); * // associate a string using the matcher method of the regular object. Obtain the Matcher. * Matcher m = p. matcher ("aaaaab") for string operations. * // use the Matcher object method to perform string operations. * Boolean B = m. matches (); ***/public static void functionDemo_4 () {String str = "da jia hao, ming tian bu fang jia! "; String regex =" \ B [a-z] {3} \ B "; // 1, which encapsulates the regular expression into an object. Pattern p = Pattern. compile (regex); // 2. Obtain the vertex object through the regular object. Matcher m = p. matcher (str); // use the Matcher object method to operate the string. // Now that you want to get a word consisting of three letters, // search for it. Find (); System. out. println (str); while (m. find () {System. out. println (m. group (); // obtain the matched sub-sequence System. out. println (m. start () + ":" + m. end () ;}}/** replace */public static void functionDemo_3 () {String str = "zhangsanttxiaoqiangmmmmmmzhaoliu"; str = str. replaceAll ("(.) \ 1 + "," $1 "); System. out. println (str); String tel = "15800001111"; // 158 ***** 1111; tel = tel. replaceAll ("(\ d {3}) \ d {4} (\ d {4})", "$1 ***** $2 "); system. out. Println (tel);}/** cut. ** Group: (A) (B (C) */public static void functionDemo_2 () {String str = "zhangsanttttxiaoqiangmmmmmmzhaoliu"; String [] names = str. split ("(.) \ 1 + "); // str. split ("\\. "); for (String name: names) {System. out. println (name) ;}}/** demo match. */Public static void functionDemo_1 () {// check whether the matching mobile phone number is correct. String tel = "15800001111"; String regex = "1 [358] \ d {9}"; boolean B = tel. matches (regex); System. out. println (tel + ":" + B );}}



A pair of parentheses is added as a group. It can be referenced through the dollar match $.
Package cn. itcast. regex. test; import java. util. treeSet; public class RegexTest {/*** @ param args */public static void main (String [] args) {/** 1. Treating stuttering: I am... I am... I want... to be... yes .. learning... learn to compile... compile and compile .. compile .. cheng... cheng... process * 2, which sorts IP addresses. * 3. Verify the email address. */Test_3 ();} // verify the email address. Public static void test_3 () {String mail = "abc1@sina.com.cn"; String regex = "[a-zA-Z0-9 _] + @ [a-zA-Z0-9] + (\\. [a-zA-Z] {1, 3}) + "; regex =" \ w + (\\. \ w +) + "; // 1@1.1boolean B = mail. matches (regex); System. out. println (mail + ":" + B);}/** 1, to cure stuttering. */Public static void test_1 () {String str = "I am... I am... I want... to be... yes .. learning... learn to compile... compile and compile .. compile .. cheng... cheng... cheng "; // 1, in the string. remove. Replace. Str = str. replaceAll ("\. +", ""); System. out. println (str); // 2, replace the overlapping words. Str = str. replaceAll ("(.) \ 1 +", "$1"); System. out. println (str) ;}/ ** sort IP addresses. ** 192.168.10.34 127.0.0.1 3.3.3.3 105.70.11.55 */public static void test_2 () {String ip_str = "192.168.10.34 127.0.0.1 3.3.3.3 105.70.11.55"; // 1. In order to compare ip addresses in String order, as long as the number of digits in each segment of the ip address is the same. // Therefore, add zero and add multiple zeros for each digit. Add two zeros to each segment. ip_str = ip_str.replaceAll ("(\ d +)", "00 $1"); System. out. println (ip_str); // each segment is reserved with three digits. Ip_str = ip_str.replaceAll ("0 * (\ d {3})", "$1"); System. out. println (ip_str); // 1, switch the IP address out. String [] ips = ip_str.split ("+"); TreeSet
 
  
Ts = new TreeSet
  
   
(); For (String ip: ips) {// System. out. println (ip); ts. add (ip) ;}for (String ip: ts) {System. out. println (ip. replaceAll ("0 * (\ d +)", "$1 "));}}}
  
 


Web Crawler

Package cn. itcast. regex. test; import java. io. bufferedReader; import java. io. fileReader; import java. io. IOException; import java. io. inputStreamReader; import java.net. URL; import java. util. arrayList; import java. util. list; import java. util. regex. matcher; import java. util. regex. pattern;/** web crawler: in fact, a program is used to obtain data that meets the specified rules on the Internet. ** Crawl the email address. **/Public class RegexTest2 {/*** @ param args * @ throws IOException */public static void main (String [] args) throws IOException {List
 
  
List = getMailsByWeb (); for (String mail: list) {System. out. println (mail) ;}} public static List
  
   
GetMailsByWeb () throws IOException {// 1, read the source file. // BufferedReader bufr = new BufferedReader (new FileReader ("c: \ mail.html"); URL url = new URL ("http: // 192.168.1.100: 8080/myweb/mail.html "); BufferedReader bufIn = new BufferedReader (new InputStreamReader (url. openStream (); // 2, matching the read data rules. Obtain the data that meets the rule. String mail_regex = "\ w + (\. \ w +) +"; List
   
    
List = new ArrayList
    
     
(); Pattern p = Pattern. compile (mail_regex); String line = null; while (line = bufIn. readLine ())! = Null) {Matcher m = p. matcher (line); while (m. find () {// 3, store the data that meets the rules in the collection. List. add (m. group () ;}return list ;}public static List
     
      
GetMails () throws IOException {// 1, read the source file. BufferedReader bufr = new BufferedReader (new FileReader ("c: \ mail.html"); // 2, matching the read data rules. Obtain the data that meets the rule. String mail_regex = "\ w + (\. \ w +) +"; List
      
        List = new ArrayList
       
         (); Pattern p = Pattern. compile (mail_regex); String line = null; while (line = bufr. readLine ())! = Null) {Matcher m = p. matcher (line); while (m. find () {// 3, store the data that meets the rules in the collection. List. add (m. group () ;}} return list ;}}
       
      
     
    
   
  
 




Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.