Python batch download of image crawlers and small apps
Some redundant information is not removed because regular expressions are tested before, but it does not affect the use.
# -*- coding:utf-8 -*-import re,urllib,sys,os,timedef getAllUrl():entry=sys.argv[1]#try:getPage=urllib.urlopen(entry).read()#except:#print "Error"pattern=re.compile(r'')web_site_pattern=re.compile(r'(http:.+?)')all_url = pattern.findall(getPage)for url in all_url:if web_site_pattern.match(url):print url#print urlprint "done"def download_pic():url=sys.argv[1];#local_path="C:/Tools/source/"connection=urllib.urlopen(url)data=connection.read()print "Waiting to get data"time.sleep(3)connection.close()#analyze #p=re.compile(r'img width="\d+".+src="(.+)".+')download_pic_pattern=re.compile(r'')#p10=re.compile(r'(.+)\.jpg')all_url=download_pic_pattern.findall(data)#print all_urli=1directory="C:/Tools/source"name_pattern=re.compile(r'/(\w+?\.jpg)')if not os.path.exists(directory):os.mkdir(directory)for urls in all_url:print urls#print "working"#print local_pathi=i+1name=name_pattern.findall(urls)print name[0]local_path="C:/Tools/source/%s" % name[0] jpeg_connection=urllib.urlopen(urls)jpeg=jpeg_connection.read()time.sleep(1)print "waiting"f=file(local_path,"wb")f.write(jpeg)f.close()jpeg_connection.close()#i=i+1#f=file(local_path,"wb")#f.write(data)#f.close()print("Done")def download_pic_2():url=sys.argv[1];local_path="C:/Tools/a.jpg"data=urllib.urlretrieve(url,local_path)print("Done")def regulation():str1="abc123*GBK1024abc*defb1kc12*addd"p1=re.compile(r'abc')print p1.findall(str1)p2=re.compile(r'a.c')print p2.findall(str1)p3=re.compile(r'abc\*')print p3.findall(str1)p4=re.compile(r'[abc]12')print p4.findall(str1)p5=re.compile(r'\d\*')print p5.findall(str1)p6=re.compile(r'a[^\d]')print p6.findall(str1)p7=re.compile(r'a[^\d]*')print p7.findall(str1)p8=re.compile(r'[a-zA-Z]+(\d+)')print p8.findall(str1)str2="dadfae ef asdfe aa