Original: http://start2join.me/python-regex-answer-20141030/
###########################################
######## #Exercises after Chapter 15th ##########
###########################################
Related Learning Books: the 15th chapter of Python core programming--regular expression exercises and answers.
Regular expression-related knowledge:
Chapter Exercises and Answers:
All the first lines of the program include the introduction of the RE module code:
Import re
#15-1: Identify the following strings: Bat, bit, but, hat, hit, or hut.
Print ("#15-1: Identifies the following string: Bat, bit, but, hat, hit, or hut. ") strlist = [' bat ', ' bit ', ' but ', ' hat ', ' hits ', ' Hut ']for C in strlist: print (Re.match (R ' [bh][aiu]t ', c). Group ())
#15-2: Matches any pair of words separated by a space, such as first name and last name.
Print (' #15-2: Matches any pair of words separated by a space, such as first name and last name. result = Re.match (R ' [a-za-z]+\s[a-za-z]+ ', ' Jim Green ') print (Result.group ())
#15-3: Matches a word and a letter separated by a comma and a space. For example, the first letter of a surname and a name in an English name.
Print (' #15-3: Matches a word and a letter separated by a comma and a space. For example, the first letter of a surname and a name in an English name. ") result = Re.match (R ' ([A-za-z]\.) +?\s[a-za-z]+ ', ' J. Green ') Print (Result.group ())
#15-4: Matches all valid Python identifiers. Note: The letter underlines the beginning, followed by any number of letters and underscores
Print (' #15-4: Matches all valid Python identifiers. Note: The letter underlines the beginning, followed by any number of letters and underscores "") valueList = [' 10_mys ', ' _myvalue09 ', ' Post99 ', ' *hahah ', ' [email protected]% ']for C in ValueList: result = Re.match (R ' [a-za-z_][\w_]+$ ', c) if result: print (c+ ': ' +result.group ()) else: Print (c+ ': illegal ... ')
#15-6: Matches a simple web domain name that begins with a. com ending with www. such as www.baidu.com. Add-on: The expressions you write also support other top-level domains. such as:. edu,. NET, and so on.
Print ("#15-6: Matches a simple web domain name that starts with. com at www. such as www.baidu.com. Add-on: The expressions you write also support other top-level domains. such as:. edu,. NET, and so on. ") comlist = [' com ', ' edu ', ' net ']result = Re.match (R ' www.\w+.com ', ' www.baidu.com ') print (Result.group (0)) result = Re.match (R ' www[.\w]+ ', ' www.yahoo.net ') if result: print (Result.group (0))
#15-7: A collection of string representations that match the entire Python shape
Print ("#15-7: Matches the entire Python shape of the string representation of the collection") result = Re.match (R ' \d+[ll]? ', ' 43545464 ') print (Result.group (0))
#15-8: Matches the collection of string representations of all Python long shapes
Print ("#15-8: Matches the entire Python long shape of the string representation of the collection") result = Re.match (R ' \d+[ll] ', ' 43545464L ') print (Result.group (0))
#15-9: A collection of string representations that match the entire Python floating-point form
Print (' #15-9: Matches the entire Python floating-point string representation of the collection ' ') result = Re.match (R ' \d+\. \d+ ', ' 43545464.00001 ') print (Result.group (0))
#15-10: A collection of string representations that match the entire Python complex form
Print ("#15-10: Matches the entire Python complex string representation of a set of" ") result = Re.match (R ' \d+\. \d+\+\d+\.? \d+j ', ' 4.5+0.1j ') print (Result.group (0))
#15-11: A collection that matches the string representation of all e-mail messages
Print ("#15-11: Matches the string representation of all e-mail Sets") result = Re.match (R ' \[email protected] (\w+\.) *\w+ ', ' [email protected] ') print (Result.group (0))
#15-12: A collection of string representations that match all Web site addresses
Print ("#15-12: Matches the collection of string representations of all Web site addresses") result = Re.match (R ' \w+ (\w+\.) *\w+ ', ' www.host.cool.com ') print (Result.group (0))
#15-13: Match the first 9 months on the standard calendar
Print ("#15-13: Matches the first 9 months on the standard calendar") result = Re.match (R ' 0?[ 1-9] ', ' ') print (Result.group (0))
#15-14: Match the last three months on the standard calendar
Print ("#15-14: Match the last three months on the standard calendar") result = Re.match (R ' 1[0-2] ', ' One ') print (Result.group (0))
#15-15: Credit card problem: the 15-digit credit card format is 4-6-5. The 16-bit credit card format is 4-4-4-4 and the number of digits is not sufficient to add 0.
Print ("#15-15: Credit card problem: 15-bit credit card format is 4-6-5. The 16-bit credit card format is 4-4-4-4 and the number of digits is not sufficient to add 0. ") Cardno = ' 1321-1544-6511-0001 ' result = Re.findall (R ' (\d) ', cardno) if + = Len (result): Reresult = Re.match (R ' [\ D]{4}-[\d]{4}-[\d]{4}-[\d]{4} ', Cardno) if Reresult: print (' It is ok! ') else: print (' It is illegal ... ') elif = = Len (result): Reresult = Re.match (R ' [\d]{4}-[\d]{6}-[\d]{5} ', Cardno) if Reresult: print (' It is ok! ') else: print (' It is illegal ... ') Else: print (' It is illegal ... ')
#15-16: Modify the getdata.py to save the generated data in Redate.txt.
Print ("' #15-16: Modifies the getdata.py to store the generated data in Redate.txt. ") from the random import randint, choiceimport stringfrom time Import ctimelowercase = []for C in ' Abcdefghijklmnopqrstuvwxy Z ': lowercase.append (c) maxint = 23554545646doms = (' com ', ' edu ', ' net ', ' org ', ' gov ') F = open (' Redata.txt ', ' w+ ') for I In Range (Randint (5, Ten)): dtint = randint (0, maxint-1) dtstr = CTime (dtint) shorter = Randint (4, 7) em = " for j in Range (shorter): em + = choice (lowercase) longer = Randint (shorter, k) DN = ' for j in R ' Ange (longer): DN + = choice (lowercase) f.write ('%s::%[email protected]%s.%s::%d-%d-%d\n '% (dtstr, EM , DN, Choice (doms), Dtint, shorter, longer) print ('%s::%[email protected]%s.%s::%d-%d-%d '% (DTSTR, EM, DN, Choice (doms), Dtint, shorter, longer) f.close ();
#15-17: Counts the number of occurrences of the week in the Redata.txt.
Print ("#15-17: Counts the number of occurrences of the week in the Redata.txt. ") Mon, Tue, Wed, Thu, Fri, Sat, Sun = 0, 0, 0, 0, 0, 0, 0 with open (' Redata.txt ', ' R ') as F: For line in f:
result = Re.match (R ' ^[\w]{3} ', line) if result.group () = = ' Mon ': mon + = 1 elif result.group () = = ' Tue ': Tue + = 1 elif result.group () = = ' Wed ': Wed + = 1 elif result.group () = = ' Thu ': Thu + = 1 elif result.g Roup () = = ' Fri ': Fri + = 1 elif result.group () = = ' sat ': Sat + 1 elif result.group () = = ' Sun ': fr i + = 1print ('%s\t%s\t%s\t%s\t%s\t%s\t%s ' % (' mon ', ' Tue ', ' wed ', ' Thu ', ' Fri ', ' sat ', ' Sun ')) print ('%d\t%d\t%d\t%d \t%d\t%d\t%d\n ' % (Mon, Tue, Wed, Thu, Fri, Sat, Sun)) F.close ()
#15 18: Determine whether a file is complete by comparing it with the time stamp in the body part.
Print ("#15-18: Determines whether the file is complete by comparing the shape of the body part to the timestamp. ") with open (' Redata.txt ', ' R ') as F: For line in f: timestr = Re.match (R '. +\s\d{4} ', line) Timeint = RE.MATC H (R '. +::(\d+)-', line ' if CTime (int (timeint.group (1)))! = Timestr.group (0): print (CTime (int (Timeint.group ( 1))) print (' File is Error ... ') break ;
#15-19: Extracts the full timestamp field in each row.
Print ("#15-19: Extracts the full timestamp field in each line. ") with open (' Redata.txt ', ' R ') as F: For line in f: timestr = Re.match (R '. +\s\d{4} ', line) print ( Timestr.group (0))
#15-20: Extracts the complete e-mail field in each row.
Print ("#15-20: Extracts the complete e-mail field in each line. ") with open (' Redata.txt ', ' R ') as F: For line in f: mailstr = Re.match (R '. +::(. +):: ', line) print ( Mailstr.group (1))
#15-21: Extracts only the months in each row.
Print (' #15-21: Extracts only the months in each row. ") with open (' Redata.txt ', ' R ') as F: For line in f: mailstr = Re.match (R '. +\s ([\w]{3}) \s ', line) print ( Mailstr.group (1))
#15-22: Extracts only the year in each row.
Print (' #15-22: Extracts only the year in each row. ") with open (' Redata.txt ', ' R ') as F: For line in f: mailstr = Re.match (R '. +\s ([\w]{4}):: ', line) print ( Mailstr.group (1))
#15-23: Extracts only the Time field (HH:MM:SS) in each row.
Print (' #15-23: Extracts only the Time field (HH:MM:SS) in each row. ") with open (' Redata.txt ', ' R ') as F: For line in f: mailstr = Re.match (R '. +\d\s (. +) \s\d ', line) print ( Mailstr.group (1))
#15-24: Extracts only the login name and domain name (connection extraction) in the email.
Print ("#15-24: Extracts only the login name and domain name in the email (connection extraction)." With open (' Redata.txt ', ' R ') as F: For line in f: mailstr = Re.match (R '. +::(. +) \. ', line) print ( Mailstr.group (1))
#15-25: Extracts only the logins and domain names in the email (extracted separately).
Print ("#15-25: Extracts only the logins and domain names in the email (extracted separately). ") with open (' Redata.txt ', ' R ') as F: For line in f: nameStr = Re.match (R '. +::(\w+) @ ', line) zonestr = Re.mat CH (r ' [email protected] (\w+) \. ', line) print (Namestr.group (1) + '--' + zonestr.group (1))
#15-26: Replace each line of e-mail with your own e-mail address.
Print (' #15-26: Replace each line of e-mail with your own e-mail address. ") lines = []with open (' Redata.txt ', ' R ') as R: for line in r: nameStr = re.sub (R ' \[email protected]\w+\.\w+ ',
' [email protected] ', line) Lines.append (NAMESTR) with open (' Redata.txt ', ' w+ ') as W: for Line in lines: W.write (line) with open (' Redata.txt ', ' R ') as R: for line in R: print (line)
#15-27: Extracts the day of the month from the timestamp and displays it as ' Month day, year ' (each line is traversed only once).
Print ("#15-27: Extracts the day of the month from the timestamp and displays it as ' Month day, year ' (each line is traversed only once). ") with open (' Redata.txt ', ' R ') as F: For line in f: datestr = \ Re.match ( R '. +\s ([\w]{3}[\s]{1,2}[\d] {.}) \s.+\s (\d+): ', line ' print (Datestr.group (1) + ', ' + datestr.group (2))
#15-28: Extract the phone number 3-3-4, which requires that the area code is optional (800-555-1212 and 555-1212 can match).
Print ("#15-28: Extract phone number 3-3-4, require area code is optional. (both 800-555-1212 and 555-1212 can be matched). ") numlist = [' 800-555-1212 ', ' 555-1212 ']for nums in numlist: number = Re.match (R ' (\d{3}-)? \d{3}-\d{4} ', Nums) print (Number.group ())
#15-29: Extract phone Numbers 3-3-4, require that the area code can contain either a bracket or a hyphen, they are optional (800-555-1212, 555-1212, or (800) 555-1212 can match).
Print ("#15-29: Extract phone number 3-3-4, require the area code can contain either a bracket or a hyphen, they are optional (800-555-1212, 555-1212 or (800) 555-1212 can match). ") numlist = [' 800-555-1212 ', ' 555-1212 ', ' (+) 555-1212 ']for nums in numlist: number = Re.match (r ' (\ (\d{3}\) | (\d{3}-))? \d{3}-\d{4} ', nums) print (Number.group ())
Python advanced Programming – Regular expressions (exercise)