#!/usr/bin/env python#-*- coding:utf-8 -*-##################################################### # author: sunfx [email protected]# last modified: 2014/11 /12 - 2014/11/13# filename: re.py# q q Group: 236147801#################################################### #import re#1. Find the characters in the text pattern = ' This ' text = ' Does this text match the pattern? ' Match = re.search (Pattern,text) S = match.start () e = match.end () print ' Found '%s ' \nin '%s ' \nfrom %d to %d ('%s ') ' % (Match.re.pattern,match.string,s,e,text[s:e]) "match.re.pattern to match the content match.string match the characters s Match to content start index d match to content end index text[s:e] match character ' #2. Compile expression regexes = [ re.compile (p) &nbSp; for p in [' This ' and ' that '] ] #把字符转换Regexobject格式print ' text: %r\n ' % text #输出text内容for regex in regexes:print ' seeking '%s ' % regex.pattern, #regex .pattern to match the character If regex.search (text): #在text中搜索this or thatprint ' match! ' else:print ' No match ' #3. Multiple match text = ' abbaaabbbbaaaaa ' pattern = ' ab ' For match in re.findall (pattern,text):p rint ' found: '%s ' % match#findall return string directly for match in re.finditer (Pattern,text): S = match.start () e = match.end () print ' found '%s ' at %d:%d ' % (text[s:e],s,e) #finditer Return the original input text at the position of the string # #. Pattern Syntax def test_patterns (text,patterns=[]):for pattern,desc in patterns: print ' Pattern %r (%s) \n ' % (pattern,desc) print ' %r ' % textfor match in re.finditer (Pattern,text): S = match.start () e = match.end () substr = text[s:e] #匹配到的字符n_backslashes = text[:s].count (' \ \ ') #查找文本: How many \prefix are included before the s-coordinate = '. ' * ( s + n_backslashes ) print ' %s%r ' % (PREFIX,SUBSTR) printreturntest_patterns (' Abbaaabbbbaaaaa ', [(' ab ', ' ' A ' followed by ' B ')]) #贪婪模式 This mode reduces the single match reduction ' * ' match once to multiple ' + ' match at least once to multiple ' ? ' matches only once ' ab*, ' a followerd by Zero or more b '), #匹配0次或者更多次 ab+, ' a followerd by one or Mrore b '), #最少匹配一次或者更多次 ab?, ' a followerd by zero or one b '), #匹配0最多一次 ab{3}, ' a followerd by three b '), #最少匹配三次 ab{2,3}, ' A&NBSP;FOLLOWERD&NBsp;by two to three b ') #匹配两至三次 ab*?, ' A followerd by zero or more b '), #匹配0次或者更多次 ab+?, ' A followerd by one or mrore b '), #最少匹配一次或者更多次 ab??, ' a followerd by zero or one b '), #匹配0最多一次 ab{3}?, ' a followerd by three b '), #最少匹配三次 ab{2,3}?, ' a followerd&Nbsp;by two to three b ') #匹配两至三次 ' #用法如下:str = ' ABSDSDSDSDSD ' Print re.findall (' ab* ', str) #[' AB ']print re.findall (' ab*? ', str) #[' a '] #5. Character Set ' [ab] ' either a or b match A or B ' a[ab]+ ' a followerd by 1 more a or b match A, B or multiple ' a[ab]+? ' a followerd by 1 or more a or b,not greedy Match 11 times can match multiple ' [^] ' does not contain content ' [a-z] ' all lowercase ascii letters ' [A-Z] ' all uppercase write ASCII letters ' [a-za-z] ' a lowercase and uppercase sequence ' [a-za-z] ' an uppercase lowercase sequence ' ' str = ' Aaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbasbsbab,a_baba ' Print re.findall (' [AB] ', str) print Re.findall (' a[ab]+ ', str) print re.findall (' a[ab]+? ', str) print re.findall (' [^_] ', str) str = ' China,love ' Print re.findall (' [a-z][a-z] ', str) #['VE '] print re.findall (' [a-z][a-z] ', str) #[' Ch ']print re.findall (' [a-z][a-z]+ ', str) #[' China ']print re.findall (' [a-z][a-z]+ ', str) #[' VE ']print re.findall (' [a-z][a-z]* ', str) #[' China ', ' E ']print re.findall (' [a-z][a-z]* ', str) #[' h ', ' i ', ' n ', ' a ', ' l ', ' o ', ' VE ']print re.findall (' [a-z][a-z]? ', str) #[' Ch ', ' E ']print Re.findall (' [a-z][a-z]? ', str) #[' h ', ' i ', ' n ', ' a ', ' l ', ' o ', ' VE ' ". metacharacters match one character A.b.a.*ba.*?b ' c = ' WOAIZHONGGUOAWSB, Wasssssssssssssdsdsdsdbsdddddddbaaabbbbbbbsd ' Print re.findall (' A. ', c) #[' ai ', ' aw ', ' as ', ' AA ', ' ab ']print re.findall (' B. ', c) #[' B, ', ' bs ', ' ba ', ' BB ', ' BB ', ' BB ', ' BS ']print re.findall (' a.*b ', c) #[' AIZHONGGUOAWSB, WASSSSSSSSSSSSSDSDSDSDBSDDDDDDDBAAABBBBBBB '] #贪婪模式匹配a到Any character length between B characters print re.findall (' A.*?b ', c) #[' AIZHONGGUOAWSB ', ' asssssssssssssdsdsdsdb ', ' Aaab '] # end the greedy pattern of * , # It's not going to go to the last B match and just get it, match the shortest possible character. Escape code ' escape code meaning \d a digital \D a non-character \s whitespace characters (tabs, spaces, line breaks) \S non-whitespace characters (symbols, letters, numbers) \w Alpha-Numeric \W non-alphanumeric (symbol , tabs, spaces, line breaks) ' #7. Anchoring "anchor code &NBsp; meaning ^ start of string or line $ string or line end \A string Start \Z End of String \b an empty string at the beginning or end of a word \B not at the beginning of a word the empty string "#8. Limit search match, searchtext = ' this is some text --with punctuation. ' pattern = ' is ' print ' text : ',textprint ' pattern: ', patternm = re.match (pattern,text) #因为match是从字符开头开始匹配 is not at the beginning so no match to .print ' match : ' , M s = re.search (pattern,text) #is在文本中出现了两次所以匹配到内容print ' search : ', Spattern = re.compile (R ' \b\w*is\w*\b ') #编译规则print ' Text: ',textpos = 0while true:match = pattern.search (text,pos) #搜索规则if &NBSP;NOT&NBSp;match:breaks = match.start () e = match.end () print ' %d : %d = "%s" ' % (S,e-1,text[s:e]) pos = e#9 User Group Resolution match ( Any one of the regular can be a group and nested in a larger expression) Regex = re.compile (R ' (\bt\w+) \w+ (\w+) ') print ' Input text : ',textprint ' pattern : ', Regex.patternmatch = regex.search (text) print ' entire match : ', Match.group (0) #表示整个表达式的字符串, sub-group sort print ' world start with "T" starting from 1: ', Match.group (1) #匹配到的第一组print ' world after "T" &NBSP;WORD&NBSP;: ', Match.group (2) # The second group matched to the #python extended the basic grouping (? P<name>pattern) Print textprintfor pattern in [ r ' ^ (? p<first_word>\w+) ', r ' (? p<last_word>\w+) \s*$ ', r ' (? p<t_word>\bt\w+) \w+ (? p<other_word>\w+) ', r ' (? p<ends_with_t>\w+t) \b ', ]: regex = re.compile (pattern) match = regex.search (text) print ' matching '%s ' % pattern print ' ', match.groups () print ' ', Match.groupdict () print
Continue to study in .....
This article is from the "brotherxing" blog, make sure to keep this source http://brotherxing.blog.51cto.com/3994225/1576216
Python Re Library-----Learning (regular expression)