a common regular expression:
1, "." : Default match any character except \ n, if flag Dotall is specified, matches any character, including line break
2, "^": match the beginning of the character, if you specify flag MULTILINE, this can also match on (R "^a", "\nabc\neee", Flags=re. MULTILINE)
3, "$": matches the end of the character, or Re.search ("foo$", "BFOO\NSDFSF", Flags=re. MULTILINE). Group () can also
4, "*": match the character before the * number 0 or more times, Re.findall ("ab*", "Cabb3abcbbac") results for ["ABB", "AB", "a"]
5, "+": Match the previous character 1 or more times, Re.findall ("ab+", "Ab+cd+abb+bba") results for ["AB", "ABB"]
6, "?" : Matches the previous character 1 or 0 times
7, "{m}": matches the previous character m times
8, "{n,m}": Match the previous character N times to M times, Re.findall ("ab{1,3}", "ABB ABC abbcbbb") Results for ["ABB", "AB", "ABB"]
9, "|" : Match "|" Left or "|" The right character, Re.search ("abc| ABC "," ABCBABCCD "). Group () results [" ABC "]
10. "(...)" : Group matching, Re.search ("(ABC) {2}A (123|456) C", "abcabca456c"). Group () The result is "abcabca456c"
11, "\a": only from the beginning of the character matching, Re.search ("\aabc", "XIAOABC") is not matched
12, "\z": Match the end of the character, with "$"
13, "\d": Match number 0-9
14, "\d": matching non-digital
15, "\w": Match [a-za-z0-9]
16, "\w": Match non-[a-za-z0-9]
17, "\s": match white space characters, \ t, \ n, \r,re.search ("\s+", "Ab\tc1\n3"). Group () result is "\ t"
18, "(? P<name>, ...) " : Group matching,Re.search (? P<province>[0-9]{4}) (? P<city>[0-9]{2}) (? P<BIRTHDAY>[0-9]{4}) "," 371481199306143242 "). Groupdict (" city ") The result is {' Province ': ' 3714 ', ' City ': ' 81 ', ' Birthday ': ' 1993 '}
Second, matching mode
1. Match: Matches from the beginning of the character, ^ no use in this mode
2, search: From the entire text to find, if it appears once return
3, FindAll: Returns all matching values, this mode does not have a group method
4, split: Can play a role in the division
5, Sub: Replace, is also the match to replace after
Iii. examples
1, >>> re.match ("xiao\d+", "xiao123ming321")//indicates a match number one or more times
<_sre. Sre_match object; span= (0, 7), match= ' Xiao123 ' >
2, >>> re.match ("m.+g$", "xiao123ming321")//want to take Ming out, use match is not working, to use the search mode
>>> re.search ("M[a-z]+a", "xiao123ming321a")//Note the meaning of these two regular
>>> re.search ("m[a-z]+a$", "xiao123ming321a")
>>> re.search ("m[a-z]+", "xiao123ming321a")//here is the correct match for "Ming", if the string is "xiao123ming321a", the match condition should be "m[a-za-z]+"
<_sre. Sre_match object; Span= (7, one), match= ' Ming ' >
3, >>> re.search ("#.+#", "123#hello#")//Match "#" and "#" have any characters between
<_sre. Sre_match object; Span= (3, ten), Match= ' #hello # ' >
4, >>> re.search ("AAA", "AABCAAA")//"?" Match the preceding character 0 or 1 times, "AA?" is to match the second A0 or one time, "AAA?" is to match a third A0 or 1 times.
<_sre. Sre_match object; span= (0, 2), match= ' AA ' >
>>> re.search ("AAA?", "ABCAAA")
<_sre. Sre_match object; Span= (3, 6), match= ' AAA ' >
5, >>> re.search ("[0-9]{3}", "A1b2c123aaa")//[0-9]{3} matches any number 3 times
<_sre. Sre_match object; Span= (5, 8), match= ' 123 ' >
>>> Re.search ("[0-9]{1,3}", "A1b2c123aaa")//[0-9]{1,3} matches the number at least once, up to three times, if you want to match all numbers, you need to use FindAll match mode
<_sre. Sre_match object; Span= (1, 2), match= ' 1 ' >
>>> Re.findall ("[0-9]{1,3}", "A1B2C123AAA")
[' 1 ', ' 2 ', ' 123 ']
6, >>> re.search ("abc| ABC "," ABC2C123ABC ")//" | " For or in the sense of
<_sre. Sre_match object; Span= (0, 3), match= ' ABC ' >
7, >>> Re.search ("abc{2}", "ABC2C123ABCC")
<_sre. Sre_match object; Span= (8,), match= ' ABCC ' >
>>> Re.search ("(ABC) {2}", "ABC2C123ABCABC")//(...) It means to treat the characters in parentheses as a whole.
<_sre. Sre_match object; Span= (8, +), match= ' Abcabc '
>>> Re.search ("(ABC) {2} (\|\|=) {2}", "abc2c123abcabc| | =|| = ")//" = "does not need to be escaped
<_sre. Sre_match object; Span= (8,), Match= ' abcabc| | =|| = ' >
8, >>> re.search ("\a[0-9]+[a-z]\z", "1213213a")//\a equivalent to "^", \z equivalent to "$"
<_sre. Sre_match object; span= (0, 8), match= ' 1213213a >
9, >>> re.search ("\d+", "$-\n\ta")//"\d" matches any character except the number, including \ n, \ t
<_sre. Sre_match object; span= (0, 7), match= ' $-\n\ta ' >
10, >>> Re.search ("(? p<id>[0-9]+) (? p<name>[a-za-z]+) "," ABC23CCC "). Groupdict ()//You can set the matching things to a dictionary
{' id ': ' + ', ' name ': ' CCC '}
>>> a =re.search ("(? p<id>[0-9]+) (? p<name>[a-za-z]+) "," ABC23CCC "). Groupdict ()
>>> print (a["name"])
Ccc
>>> a =re.search ("(? p<id>[0-9]+) (? p<name>[a-za-z]+) "," ABC23CCC "). Group (" name ")
>>> Print (a)
Ccc
11, >>> re.split ("[0-9]", "abc12def3d")//split can split the string
[' abc ', ' ', ' def ', ' d ']
>>> re.split ("[0-9]+", "ABC12DEF3D")
[' abc ', ' Def ', ' d ']
12, >>> re.sub ("[0-9]", "a", "abc12def3d")//"[0-9]" is the matching mode, "A" matches the value to be changed after
' Abcaadefad '
>>> re.sub ("[0-9]", "A", "abc12def3d", count=2)//count Specifies to replace only the first n
' Abcaadef3d '
13, >>> Re.search (r "\ \", "ABC12D\\EF3D")//Match two backslashes
<_sre. Sre_match object; Span= (6, 7), match= ' \ \ ' >
14, >>> Re.search ("[a-z]+", "Abcabc", Flags=re. I)//flags=re. I is ignoring case
<_sre. Sre_match object; span= (0, 6), match= ' Abcabc ' >
15, >>> Re.search ("^a", "\NABCABC\NDD\NCCDE")//Cannot match
>>> re.search ("^a", "\NABCABC\NDD\NCCDE", Flags=re. M
<_sre. Sre_match object; Span= (1, 2), match= ' a ' >
16, >>> Re.search (". +", "\NABCABC\NDD\NCCDE")
<_sre. Sre_match object; span= (1, 7), match= ' Abcabc ' >
>>> Re.search (". +", "\NABCABC\NDD\NCCDE", Flags=re. S) //flags=re. s can also match \ n
<_sre. Sre_match object; span= (0,), match= ' \NABCABC\NDD\NCCDE ' >
Python3 RE Regular module