1. Regular expression rules
The character x character X. Example: ' A ' represents the character a \ \ backslash character. \ n New Line (newline) (' \u000a ') \ R (' \u000d ') character class [ABC] A, B, or C (simple Class) [^ABC] any character except A, B, or C (negation) [a-za- Z] A to Z or A to Z, and the letters at both ends include (range) [0-9] The characters from 0 to 9 include the predefined character classes . Any character. And mine is. The character itself, how to express it? \. \d number: [0-9] \d non-numeric: [^\d]/[^0-9] \w word character: [a-za-z_0-9] \w non-character [^\w] boundary match ^ line begins at the end of line \b Word boundary, just is not the word character of the place. greedy number of words X? x, one or more times x* x, 0 or more x+ x, one or more x{n} x, exactly n times X{n,} x, at least n times x{n,m} x, at least n times, but not more than M-th operator XY
Packagecom.zwj.string;ImportJava.util.regex.MatchResult;ImportJava.util.regex.Matcher;ImportJava.util.regex.Pattern; Public classMatcherdemo { Public Static voidMain (string[] args) {//userstringregular (); //usermatcherregular ();usermatcherresultregular (); } /*** Three basic operations in the string class use a regular: match: Matches () Cut: Split () Replace: ReplaceAll ()*/ Public Static voiduserstringregular () {String str1= "1 2 3 4 54 5 6"; String[] Numbers= Str1.split ("+"); for(String temp:numbers) {System.out.println (temp); } //Replace, replace all numbers as * abd***:adad*****:asdadasadsfgi#% ^^ * * * *String str2 = "abd123:adad46587:asdadasadsfgi#% ^^ 9090"; System.out.println (Str2.replaceall ("[0-9]", "*")); System.out.println (Str2.replaceall ("\\d", "*")); //Match matching mailboxesString mail1 = "[Email protected]"; String Mail2= "[Email protected]"; String mail3= "[Email protected]"; //String Mainregex =//"[0-9a-za-z_][email protected][0-9a-za-z_]++ (\\.[ 0-9a-za-z_]+{2,4}) + ";String Mainregex = "\\[email protected]\\w+ (\\.\\w{2,4}) +"; System.out.println (Mail1.matches (Mainregex));//trueSystem.out.println (Mail2.matches (Mainregex));//trueSystem.out.println (Mail3.matches (Mainregex));//false } /*** Java matches the object: * Pattern: * Pattern Pattern.complie (regexstring) * Macther Pattern.match ES (regexstring) * Matcher: * Boolean matcher.find ()//Find Next matching Object * String Matcher.guorp ()//return entire Match pattern matches to the result * Boolean matcher.matches ()//try to match the entire area to the pattern * int matcher.groupcount ()//Return the grouping of the matching rules, such as: ( aa) (BB): This represents two sets of * String matcher.group (int group)///return matching results for the corresponding grouping of matched objects * Matcheresult Matcher.toma Tchresult ()//returns the match result in a matchresult form*/ Public Static voidusermatcherregular () {//matches a string of 3 charactersString str = "ABC 124 ewqeq QEQE qeqe qeqe AAAA fs fsdfs d SF SF SF SF sfada DSS Dee ad a F S f sa a ' lfsd; ' L; Pattern PT= Pattern.compile ("\\b\\w{3}\\b"); Matcher Match=Pt.matcher (str); while(Match.find ()) {System.out.println (Match.group ()); } //match out email addressString str2 = "Dadaadad da da dask[pweoo-123-[email protected] [email protected] =0kfpos9ir23j0is [email protected]@a Da.com. CN [email protected] ufsfjsfi-si-"; Pattern Pet2= Pattern.compile ("\\b\\[email protected]\\w+ (\\.\\w{2,4}) +\\b"); Matcher MATCH2=Pet2.matcher (STR2); while(Match2.find ()) {System.out.println (Match2.group ()); } //MatchString sr = "Dada Ada adad ADSDA ad asdda adr3 FAs daf FAs FDSF 234 Adda"; //contains two matching groups, one is three characters, the other is four charactersPattern pet = Pattern.compile ("\\b (\\w{3}) * (\\w{4}) \\b"); Matcher Match1=Pet.matcher (SR); intCountall = Match1.groupcount ();//2 while(Match1.find ()) {System.out.print ("Match Group Result:"); for(inti = 0; i < Countall; i++) {System.out.print (String.Format ("\n\t The result of the%s group is:%s", I + 1, Match1.group (i+ 1))); } System.out.print ("\ n match the entire result:"); System.out.println (Match1.group ()); } /** Matching Group results: * The results of group 1th were: ADA * The result of the 2nd group was: Adad matches the entire result: ADA Adad matching Group results: * The results of the 1th group are: FAs * The result of Group 2nd is: FDSF matches the entire result: FAs FDSF Matching Group results: The result of the 1th group is: 234 The result of the 2nd group is: Adda * matches the entire result: 234 Adda*/ } Public Static voidusermatcherresultregular () {String SR= "Dada Ada adad ADSDA ad asdda adr3 FAs daf FAs FDSF 234 Adda"; Pattern Pet= Pattern.compile ("\\b (\\w{3}) * (\\w{4}) \\b"); Matcher Match=Pet.matcher (SR); Matchresult Ms=NULL; while(Match.find ()) {MS=Match.tomatchresult (); System.out.print ("Match Group result for object:"); for(inti = 0; I < Ms.groupcount (); i++) {System.out.print (String.Format ("\n\t The result of the%s group is:%s", I + 1, Ms.group (i+ 1))); } System.out.println (Ms.group ()); } }/*Group results for matching objects: The result of Group 1th is: Ada The result of the 2nd group is: Adad matches the entire result: ADA Adad group results for matching objects: The result of the 1th group is: The result of the FAS 2nd Group is: The entire result of the FDSF match: FAs FDSF Group of matching objects Results: The result of Group 1th is: 234 The result of the 2nd group is: Adda matches the entire result: 234 Adda*/}
--Crawling the URL of a webpage
Packagecom.zwj.string;ImportJava.io.BufferedReader;Importjava.io.IOException;ImportJava.io.InputStreamReader;Importjava.net.MalformedURLException;ImportJava.net.URL;ImportJava.nio.charset.Charset;Importjava.util.ArrayList;Importjava.util.Arrays;Importjava.util.List;ImportJava.util.regex.Matcher;ImportJava.util.regex.Pattern; Public classStringdemo { Public Static voidMain (string[] args) {String str=geturl ("https://www.163.com/", "GBK"); //String regex = "^ ([hh][tt]{2}[pp]:/*|[ hh][tt]{2}[pp][ss]:/*| [FF] [TT] [pp]:/*] ([a-za-z0-9-~]+).) + ([a-za-z0-9-~\\/]) + (\\?{ 0,1} ([[a-za-z0-9-~]+\\={0,1}] ([a-za-z0-9-~]*) \\&{0,1}) *) $ ";String regex= "href=\" ([\\w\\s./:]+?) \""; List<String> list=getmatchersubstrings (regex, str); for(String string:list) {System.out.println (string); } } //the URL of the crawl page is returned to the list collection Public StaticList<string>getmatchersubstrings (String regex,string str) {List<String> list=NewArraylist<string>(); Pattern Patter=pattern.compile (regex); Matcher Matcher=Patter.matcher (str); intCountall=Matcher.groupcount (); while(Matcher.find ()) {/*System.out.print ("Match Group Result:"); for (int i = 0; i < Countall; i++) {System.out.print (the result of String.Format ("\n\t%s Group is:%s", i + 1, Matcher.group (i + 1))); } System.out.print ("\ n matches the entire result:");*/List.add (Matcher.group (1)); } returnlist; } //crawling HTML code for 163 of pages Public Staticstring GetURL (String urlstring, String charcode) {StringBuilder sb=NewStringBuilder (); URL URL=NULL; Try{URL=NewURL (urlstring); BufferedReader BD=NewBufferedReader (NewInputStreamReader (Url.openstream (), Charset.forname (CharCode)); String Str=NULL; while(str = bd.readline ())! =NULL) {sb.append (str); } } Catch(malformedurlexception e) {e.printstacktrace (); } Catch(IOException e) {e.printstacktrace (); } returnsb.tostring (); }}
Java Basics-Regular expressions