Re
Python in the RE module provides regular expression-related operations
Character:
. Match any character other than line break
\w match letters or numbers or underscores or kanji
\s matches any whitespace character
\d Matching numbers
\b Match the beginning or end of a word
^ Start of matching string
$ match End of string
Number:
* Repeat 0 or more times
+ Repeat one or more times
? Repeat 0 or one time
{n} repeats n times
{n,} repeats n or more times
{N,m} repeats n to M times
Match
12345678910111213141516171819202122232425 |
# match,从起始位置开始匹配,匹配成功返回一个对象,未匹配成功返回None
match(pattern, string, flags
=
0
)
# pattern: 正则模型
# string : 要匹配的字符串
# falgs : 匹配模式
X VERBOSE Ignore whitespace
and comments
for nicer looking RE‘s.
I IGNORECASE Perform case
-
insensitive matching.
M MULTILINE
"^" matches the beginning of lines (after a newline)
as well as the string.
"$" matches the end of lines (before a newline) as well
as the end of the string.
S DOTALL
"." matches
any character at
all
, including the newline.
A ASCII For string patterns, make \w, \W, \b, \B, \d, \D
match the corresponding ASCII character categories
(rather than the whole
Unicode categories, which
is the
default).
For bytes patterns, this flag
is the only available
behaviour
and needn‘t be specified.
L LOCALE Make \w, \W, \b, \B, dependent on the current locale.
U
UNICODE
For compatibility only. Ignored
for string patterns (it
is the default),
and forbidden
for bytes patterns.
|
650) this.width=650; "id=" Code_img_opened_6b21dc08-4d87-4b54-9103-22571fd90ec4 "class=" code_img_opened "src=" http ://images.cnblogs.com/outliningindicators/expandedblockstart.gif "/>
650) this.width=650; "src=" Http://common.cnblogs.com/images/copycode.gif "alt=" Copy Code "/>
# No group r = re.match ("h\w+", origin) print (R.group ()) # get all results matching to print (R.groups ( ) # get the grouped results that match in the model print ( R.groupdict ()) # get the grouped results that match in the model # have groups Why do # have groups? Extracts the specified content that matches successfully (first matches all the regular, then matches the successful local content extract) r = re.match ("H (\w+). * (? p<name>\d) $ ", origin) print (R.group ()) # get all results matched to print (R.groups ()) # get the grouped results that match in the model &nbsP;print (R.groupdict ()) # gets all the groups in the group that have the key executed in the model
650) this.width=650; "src=" Http://common.cnblogs.com/images/copycode.gif "alt=" Copy Code "/>
Search
12 |
# search,浏览整个字符串去匹配第一个,未匹配成功返回None # search(pattern, string, flags=0) |
650) this.width=650; "id=" Code_img_opened_30d43937-815c-4182-a3c0-4e9a3326b6c7 "class=" code_img_opened "src=" http ://images.cnblogs.com/outliningindicators/expandedblockstart.gif "/>
650) this.width=650; "src=" Http://common.cnblogs.com/images/copycode.gif "alt=" Copy Code "/>
# No group r = re.search ("a\w+", origin) print ( R.group ()) # get all the results that match print (R.groups ()) # gets the grouped result print that matches in the model ( R.groupdict ()) # get the grouped results that match in the model # have groups r = re.search ("A (\w+). * (? p<name>\d) $ ", origin) print (R.group ()) # get all results matched to print (R.groups ()) # get the grouped results matched to in the model print (R.groupdict ()) # gets all the groups in the group that have executed the key in the matching group in the model
650) this.width=650; "src=" Http://common.cnblogs.com/images/copycode.gif "alt=" Copy Code "/>
FindAll
123 |
# findall,获取非重复的匹配列表;如果有一个组则以列表形式返回,且每一个匹配均是字符串;如果模型中有多个组,则以列表形式返回,且每一个匹配均是元祖; # 空的匹配也会包含在结果中 #findall(pattern, string, flags=0) |
650) this.width=650; "id=" Code_img_opened_5d8426be-6941-4067-bcf0-763a69e4d0a7 "class=" code_img_opened "src=" http ://images.cnblogs.com/outliningindicators/expandedblockstart.gif "/>
650) this.width=650; "src=" Http://common.cnblogs.com/images/copycode.gif "alt=" Copy Code "/>
# no grouping R = Re.findall ("a\w+", origin) print (r) # has grouping origin = "Hello Alex bcd abcd Lge ACD 19" r = Re.findall ("A ((\w*) c) (d)", origin) print (R)
650) this.width=650; "src=" Http://common.cnblogs.com/images/copycode.gif "alt=" Copy Code "/>
Sub
12345678 |
# sub,替换匹配成功的指定位置字符串 sub(pattern, repl, string, count = 0 , flags = 0 ) # pattern: 正则模型 # repl : 要替换的字符串或可执行对象 # string : 要匹配的字符串 # count : 指定匹配个数 # flags : 匹配模式 |
650) this.width=650; "id=" code_img_opened_2ba83fbe-9c22-4c71-b122-ad646815ed3a "class=" code_img_opened "src=" http ://images.cnblogs.com/outliningindicators/expandedblockstart.gif "/>
# unrelated to grouping origin = "Hello Alex bcd Alex Lge" Alex ACD "r = Re.sub (" a\w+ "," 999 ", origin, 2) print (R)
Split
1234567 |
# split,根据正则匹配分割字符串 split(pattern, string, maxsplit = 0 , flags = 0 ) # pattern: 正则模型 # string : 要匹配的字符串 # maxsplit:指定分割个数 # flags : 匹配模式 |
650) this.width=650; "id=" code_img_opened_35bca42c-d6e9-4b1d-821f-2a9a40b98413 "class=" code_img_opened "src=" http ://images.cnblogs.com/outliningindicators/expandedblockstart.gif "/>
650) this.width=650; "src=" Http://common.cnblogs.com/images/copycode.gif "alt= "Copy Code"/>
# No group origin = "Hello alex bcd alex lge alex acd 19" r = re.split ("Alex", origin, 1) print (R) # with group origin = "Hello alex bcd alex lge alex acd 19 " r1 = re.split ("(Alex)", origin, 1) Print (R1) r2 = re.split ("(Al (ex))", origin, 1) print (R2)
650) this.width=650; "src=" Http://common.cnblogs.com/images/copycode.gif "alt=" Copy Code "/>
650) this.width=650; "id=" code_img_opened_8cc16280-67b6-4030-9bc7-b070adad470d "class=" code_img_opened "src=" http ://images.cnblogs.com/outliningindicators/expandedblockstart.gif "/>
ip:^ (25[0-5]|2[0-4]\d|[ 0-1]?\d?\d) (\. ( 25[0-5]|2[0-4]\d| [0-1]?\d?\d]) {3}$ phone number: ^1[3|4|5|8][0-9]\d{8}$ mailbox: [A-za-z0-9_-][email protected][a-za-z0-9_-]+ (\.[ a-za-z0-9_-]+) +
Python Regular Expressions