#!/usr/bin/env python#-*-coding:utf-8-*-ImportOSImportReImportUrllibImportJSONImportSocketImporturllib.requestImportUrllib.parseImportUrllib.error#Set TimeoutImportTimetimeout= 5socket.setdefaulttimeout (Timeout)classCrawler:#Sleep Duration __time_sleep= 0.1__amount=0__start_amount=0__counter=0#get Picture URL content, etc. #t download picture time interval def __init__(Self, t=0.1): Self.time_sleep=T#Get Started def __getimages(Self, word='Beauty'): Search=urllib.parse.quote (Word)#number of PN int picturesPN = self.__start_amount whilePN < Self.__amount: Headers= {'user-agent':'mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) gecko/20100101 firefox/23.0'} URL='http://image.baidu.com/search/avatarjson?tn=resultjsonavatarnew&ie=utf-8&word='+ Search +'&cg=girl&pn='+str (PN)+'&rn=60&itg=0&z=0&fr=&width=&height=&lm=-1&ic=0&s=0&st=-1&gsm= 1e0000001e' #set the header anti-ban Try: Time.sleep (self.time_sleep) Req= Urllib.request.Request (Url=url, headers=headers) Page=Urllib.request.urlopen (req) Data= Page.read (). Decode ('UTF8') exceptUnicodedecodeerror as E:Print('-----Unicodedecodeerrorurl:', URL)exceptUrllib.error.URLError as E:Print("-----Urlerrorurl:", URL)exceptSocket.timeout as E:Print("-----Socket Timout:", URL)Else: #parsing JSONJson_data =json.loads (data) self.__saveimage(Json_data, Word)#Read Next page Print("Download Next page") PN+ = 60finally: Page.close ()Print("Download Task End") return #Save Picture def __saveimage(self, JSON, word):if notOs.path.exists ("./"+word): Os.mkdir ("./"+word)#determine if the name repeats, get the length of the pictureSelf.__counter= Len (Os.listdir ('./'+ word) + 1 forInfoinchjson['IMGs']: Try: ifSelf.__downloadimage(info, word) = =false:self.__counter-= 1exceptUrllib.error.HTTPError as Urllib_err:Print(Urllib_err)Pass exceptException as Err:time.sleep (1) Print(ERR); Print("Generate unknown error, discard save") Continue finally: Print("little Yellow figure +1, already available"+ str (self.__counter) +"Zhang Xiao Huang image") self.__counter+ = 1return #Download Image def __downloadimage(self, info, word): Time.sleep (self.time_sleep) Fix= self.__getfix(info['Objurl']) Urllib.request.urlretrieve (info['Objurl'],'./'+ Word +'/'+ str (self.__counter) +str (fix))#Get suffix name def __getfix(self, name): M= Re.search (r'\. [^\.] *$', name)ifM.group (0) andLen (m.group (0)) <= 5: returnm.group (0)Else: return '. JPEG' #Get prefix def __getprefix(self, name):returnName[:name.find ('.')] #Page_number need to crawl the total number of pages to grab the image count of pages x60 #start_page Start Pages defStart (self, word, spider_page_num=1, start_page=1): Self.__start_amount= (start_page-1) * 60Self .__amount= Spider_page_num * + self.__start_amountSelf .__getimages(word) crawler= Crawler (0.05) Crawler.start ('Sci Fi', 1, 5)#Crawler.start (' Two-dimensional sexy ', 3, 3)#Crawler.start (' Handsome ', 5)
fromSeleniumImportWebdriver fromSelenium.webdriver.common.keysImportKeys fromSelenium.webdriver.support.waitImportwebdriverwait fromSelenium.webdriver.supportImportExpected_conditions as EC fromSelenium.webdriver.common.byImport by fromTimeImportSleepuserid=input ('username') Passid=input ('Password') Sendtxt="Hello"#Message ContentSendto=input ('SendTo')#Open FirefoxDriver =Webdriver. Firefox () Driver.get ('http://mail.10086.cn/')#Email Address#fill in the user nameEmailelem = driver.find_element_by_id ('Txtuser') Emailelem.send_keys (userid)#User name#fill in the passwordPasswordelem = driver.find_element_by_id ('Txtpass') Passwordelem.send_keys (PASSID)#PasswordPasswordelem.submit ()#Write MailSleep (10) Write=DRIVER.FIND_ELEMENT_BY_ID ('Btn_compose') Write.click () Sleep (10) Iframe_xpath="//iframe[contains (@id, ' Compose_ ')]"#The XPath path of the IFRAME where the letter page is located, can be viewed in the page of an IFRAME via the Firebug plugin of the Firefox browseriframe =Driver.find_element_by_xpath (Iframe_xpath) driver.switch_to_frame (iframe) sleep (5) driver.find_element_by_id ('Tocontaine'). Send_keys (SendTo) driver.find_element_by_id ('Txtsubject'). Send_keys (Sendtxt) driver.find_element_by_id ('Topsend'). Click ()
Python Programming Quick Start 10th Chapter Practical Project Reference Answer (11.11.2)