Some redundant information, because the regular expressions were tested before, so they are not removed, but do not affect the use.
#-*-Coding:utf-8-*-import re,urllib,sys,os,timedef getallurl (): entry=sys.argv[1] #try: Getpage=urllib.urlopen ( Entry). Read () #except: #print "Error" Pattern=re.compile (R ' <a href= "(. +?)". +?> ') Web_site_pattern=re.compile (R ' (http:.+?) ') All_url = Pattern.findall (getpage) for URLs in All_url:if web_site_pattern.match (URLs):p rint url#print urlprint "Done" def Download_pic (): url=sys.argv[1]; #local_path = "c:/tools/source/" Connection=urllib.urlopen (URL) data= Connection.read () print "Waiting to get Data" Time.sleep (3) connection.close () #analyze #p =re.compile (R ' img width= "\d+". +src= "(. +)". + ') Download_pic_pattern=re.compile (R ' ') #p10 =re.compile (R ' (. +) \.jpg ' ) All_url=download_pic_pattern.findall (data) #print all_urli=1directory= "C:/tools/source" Name_pattern=re.compile ( R '/(\w+?\.jpg) ') if not os.path.exists (directory): Os.mkdir (directory) for URLs in All_url:print urls#print "Working" # Print Local_pathi=i+1name=name_pattern.findall (URLs) print name[0]local_path= "c:/tools/source/%s" % name[0] Jpeg_connection=urllib.urlopen (URLs) jpeg=jpeg_connection.read () time.sleep (1) print "Waiting" f=file (local _path, "WB") f.write (JPEG) f.close () jpeg_connection.close () #i =i+1#f=file (Local_path, "WB") #f. Write (data) #f. Close () Print ("Done") def download_pic_2 (): url=sys.argv[1];local_path= "C:/tools/a.jpg" Data=urllib.urlretrieve (Url,local_ Path) print ("Done") def Regulation (): str1= "abc123*gbk1024abc*defb1kc12*addd" P1=re.compile (R ' abc ') Print P1.findall ( STR1) P2=re.compile (R ' a.c ') print P2.findall (str1) p3=re.compile (R ' abc\* ') print P3.findall (str1) p4=re.compile (R ' [ Abc]12 ') print P4.findall (str1) p5=re.compile (R ' \d\* ') print P5.findall (str1) p6=re.compile (R ' a[^\d] ') print P6.findall (STR1) p7=re.compile (R ' a[^\d]* ') print P7.findall (str1) p8=re.compile (R ' [a-za-z]+ (\d+) ') Print P8.findall ( STR1) str2= "Dadfae ef Asdfe aa<ima" p9=re.compile (R ') ') Urls=p9.findall (str2) #printprint urlsfor URL in urls:print urlif __name__ = = "__maIn__ ": #main () #download_pic_2 () #regulation () Download_pic () #getAllUrl ()
Python bulk download image of Mowgli and Hummingbird pictures crawler Small application