// Chinese character range u4E00-u9FA5import java. util. regex. matcher; import java. util. regex. pattern;/************************ create by fzw my website: www.itstack.org * November 19, 2013 * Regular Expression ************************/public class sxtRegex01 {public static void main (String [] args) {p ("check for matching:" + "abc ". matches ("... "); p (" replacement string: "+" abc123aa ". replaceAll ("\ d ",". "); Pattern p = Pattern. compile ("[a-z] {3}"); Matcher m = p. matcher ("fgha" ); P ("Pattern + Matcher method verification match:" + m. matches (); p ("------------------"); p ("". matches ("[abc]"); p ("". matches ("[^ abc]"); p ("". matches ("[a-zA-Z]"); p ("". matches ("[a-z] | [A-Z]"); p ("". matches ("[a-z (A-Z)]"); p ("R ". matches ("[A-Z & (RFG)]"); p ("----------------"); p ("a_8 ". matches ("\ w {3}"); p ("\\". matches ("\\\\"); p ("----------------"); p ("hello sir ". matches ("h. * "); p (" hello sir ". matches (". * ir $ "); p (" hell O sir ". matches ("^ h [a-z] {1, 3} o \ B. * "); // match the word boundary. The word boundary matches p (" hellosir ". matches ("^ h [a-z] {1, 3} o \ B. * "); p (" \ n ". matches ("^ [\ s & [^ \ n] * \ n $"); // It must start with a space and cannot be a line break, the last line must be p ("------------------"); Pattern p2 = Pattern. compile ("\ d {123}"); String s = "4536-89789--000"; Matcher m2 = p2.matcher (s); p (m2.matches ()); m2.reset (); // spit out the characters to be rematched. Otherwise, the matching below the characters will not be successful (m2.find (); p (m2.start () + "- "+ M2.end (); // print the first position (which must be found before printing) p (m2.find (); p (m2.start () + "-" + m2.end (); p (m2.find (); p (m2.start () + "-" + m2.end (); p (m2.find ()); p (m2.lookingAt (); // The head starts to look for p ("------------------"); Pattern p3 = Pattern. compile ("java", Pattern. CASE_INSENSITIVE); // after adding the attribute, Patter. CASE_INSENSITIVE indicates case-insensitive regardless of Matcher m3 = p3.matcher ("java_java_java_ilovejava"); p (m3.replaceAll ("JAVA"); // replace all p ("-------------------- Replace "); Pattern p4 = Pattern. compile ("java", Pattern. CASE_INSENSITIVE); // after adding the attribute, Patter. CASE_INSENSITIVE indicates case-insensitive regardless of Matcher m4 = p4.matcher ("java_java_java_ilovejava fdasfas"); StringBuffer sb = new StringBuffer (); int I = 0; while (m4.find () {I ++; if (I % 2 = 0) {m4.appendReplacement (sb, "java");} else {m4.appendReplacement (sb, "JAVA");} m4.appendTail (sb ); // adding the tail to the buf is a combination of the following parameters: Word Group "); Pattern p5 = Pattern. compile ("(\ d {3, 5}) ([a-z] {2})"); Matcher m5 = p5.matcher ("123bb_78987dd_090po"); while (m5.find ()) {p (m5.group (1); // The 0th group in the grop bracket is the whole, and the first group is the first bracket on the left, the second group is left and second parentheses} p ("------------------ greedy match and non-Greedy match"); Pattern p6 = Pattern. compile ("(. {3, 10 }?) [0-9] "); //. {3, 10} is followed by no question mark, that is, the greedy match will accompany the longest. If {3, 10 }? Jia? Matcher m6 = p6.matcher ("aaaa5dddd8"); while (m6.find () {// if you use if (m6.find) () {p (m6.start () + "-" + m6.end ();} Then match the first p (m6.start () + "-" + m6.end ());} p ("------------------ normal capture"); Pattern p7 = Pattern. compile (". {3} "); Matcher m7 = p7.matcher (" ab4dd5 "); while (m7.find () {p (m7.group ());} p ("------------------ non-capturing group"); Pattern p8 = Pattern. compile (". {3 }(? = )");//(? = A) This is a non-capturing group. The last one is a and this a is not retrieved yet !! (? = A) This is different if it is written in front of Matcher m8 = p8.matcher ("ab4add5"); while (m8.find ()) {p ("cannot be followed by a" + m8.group ();} p8 = Pattern. compile ("(?! A). {3 }");//(?! A) the front cannot be a's m8 = p8.matcher ("abbsab89"); while (m8.find () {p ("the front cannot be a" + m8.group ());} //(? <! A) The number from the back is not //(? <= A) p ("-------------------- remove> <matching"); Pattern p9 = Pattern. compile ("(?!> ). + (? = <) "); Matcher m9 = p9.matcher ("> programming China <"); while (m9.find () {p (m9.group ());} p ("------------------ forward reference"); Pattern p10 = Pattern. compile ("(\ d) \ 1"); // here 1 is a forward reference, 12 is the first matched, the next matching 12 is the same as the previous one, so it is trueMatcher m10 = p10.matcher ("1212"); p (m10.matches (); p ("---------------- case-insensitive, regular expressions embedded "); //(? I) in a non-capturing group, this indicates that the case-insensitive p ("java". matches ("(? I) JAVA ");} public static void p (Object o) {System. out. println (o );}}
Character x character x \ backslash character \ 0n character with octal value 0 n (0 <= n <= 7) \ 0nn: nn (0 <= n <= 7) character with a octal value of 0) \ 0mnn: mnn (0 <= m <= 3, 0 <= n <= 7) \ xhh character with hexadecimal value 0x hh \ uhhhh character with hexadecimal value 0x hhhh \ t tab ('\ u0009') \ n New Line (line feed) letter ('\ u000a') \ r carriage return (' \ u000d') \ f page feed ('\ u000c') \ a alarm (bell) character (' \ u0007 ') \ e escape character ('\ u001B') \ control character class [abc] a, B, or c (simple class) of x corresponding to x [^ abc] any character, except for a, B, or c (negative) [a-zA-Z] a to z or A to Z, the two letters are included in Inner (range) [a-d [m-p] a to d or m to p: [a-dm-p] (union) [a-z & [def] d, e, or f (intersection) [a-z & [^ bc] a to z, except for B and c: [ad-z] (minus) [a-z & [^ m-p] a to z, not m to p: [a-SCSI-z] (minus) pre-defined character class. any character (may or may not match the line terminator) \ d Number: [0-9] \ D non-number: [^ 0-9] \ s blank character: [\ t \ n \ x0B \ f \ r] \ S non-white space character: [^ \ s] \ w word character: [a-zA-Z_0-9] \ W non-word character: [^ \ w] POSIX character class (US-ASCII only) \ p {Lower} lowercase letter character: [a-z] \ p {Upper} uppercase letter character: [A-Z] \ p {ASCII} All ASCII: [\ x00-\ x7F] \ p {Alpha} letter character: [\ p {Lower} \ p {Upper}] \ p {Digit} decimal number: [0-9] \ p {Alnum} alphanumeric characters: [\ p {Alpha} \ p {Digit}] \ p {Punct} punctuation :! "# $ % & '() * +,-./:; <=>? @ [\] ^ _ '{| }~ \ P {Graph} visible characters: [\ p {Alnum} \ p {Punct}] \ p {Print} printable characters: [\ p {Graph} \ x20] \ p {Blank} space or tab: [\ t] \ p {Cntrl} control character: [\ x00-\ x1F \ x7F] \ p {XDigit} hexadecimal number: [0-9a-fA-F] \ p {Space} blank character: [\ t \ n \ x0B \ f \ r] java. lang. character class (simple java Character type) \ p {javaLowerCase} is equivalent to java. lang. character. isLowerCase () \ p {javaUpperCase} is equivalent to java. lang. character. isUpperCase () \ p {javaWhitespace} is equivalent to java. lang. character. isWhitespace () \ p {javaMir Rored} is equivalent to java. lang. character. characters in the isMirrored () Unicode block and category \ p {InGreek} Greek block (Simple Block) \ p {Lu} uppercase letters (simple category) \ p {SC} currency symbol \ P {InGreek} All characters, except in the Greek block (NO) [\ p {L} & [^ \ p {Lu}] All letters, except uppercase letters (minus) the beginning of the boundary vertex ^ The Beginning of the line $ the end of the line \ B the word boundary \ B the non-word boundary \ The Beginning of the input \ G A matched end \ The End Of The Z input, only used for the last terminator (if any) \ z input end Greedy quantizer X? X, once or once does not have X * X, zero or multiple times X + X, once or multiple times X {n} X, EXACTLY n times X {n,} X, at least n times X {n, m} X, at least n times, but not more than m times Reluctant quantifiers X ?? X, once or once, no x *? X, zero or multiple times X +? X, once or multiple times X {n }? X, EXACTLY n times X {n ,}? X, at least n times X {n, m }? X, at least n times, but not more than m Possessive quantifiers X? + X, no x * + X at a time or at a time, zero or multiple times X ++ X, one or more times X {n} + X, exactly n times X {n,} + X, at least n times X {n, m} + X, at least n times, but no more than m Logical operator xy x followed by y x | y x or Y (X) X, as the capture group Back reference \ n any matching nth capture group reference \ Nothing, however, the following character \ Q Nothing is referenced, but all characters are referenced until \ E Nothing, but the reference special structure (non-capturing) starting from \ Q is ended )(?: X) X, used as a non-capturing group (? Idmsux-idmsux) Nothing, but will match the flag I d m s u x on-off (? Idmsux-idmsux: X) X, used as a non-capturing group (? = X) X, through the zero-width positive lookahead (?! X) X, using a zero-width negative lookahead (? <= X) X, through the zero-width positive lookbehind (? <! X) X, using a zero-width negative lookbehind (?> X) X, used as an independent non-capturing Group