The code is as follows:
1 ImportRe2 Importurllib.request3 ImportUrllib4 Import Time5 6 fromCollectionsImportdeque7 8Head = {9 'Connection':'keep-alive',Ten 'Accept':'text/html, Application/xhtml+xml, */*', One 'Accept-language':'en-us,en;q=0.8,zh-hans-cn;q=0.5,zh-hans;q=0.3', A 'user-agent':'mozilla/5.0 (Windows NT 6.3; WOW64; trident/7.0; rv:11.0) Like Gecko' - } -visited =set () the -URL ='http://xlfans.com' #The entry page can be replaced by another -data =None -Full_url=urllib.request.Request (Url,data,head) +Urlop =Urllib.request.urlopen (Full_url) -data = Urlop.read (). Decode ('Utf-8') +temp = Re.search (r'href=\ "http://xlfans.com/archives/(. { 4}) \ "class=\" Thumbnail\ "> (. *) alt=\" Thunderbolt powder (. *) Thunderbolt member account sharing', data, Re. m|Re. I) Aresult = Re.search (r'href=\ "http://xlfans.com/archives/(. { 4})', Temp.group (), Re. m|Re. I) aturl = url +"/archives/"+ Temp.group (1) -data =None -Full_url=urllib.request.Request (Url,data,head) -Urlop =Urllib.request.urlopen (Full_url) -data = Urlop.read (). Decode ('Utf-8') -Save_path ='D:\\Program Files\\python\\test.txt' inF_obj = open (Save_path,'W') - #get system time to determine if it is a weekend toCur_day = Time.strftime ("%w", Time.localtime (Time.time ())) + if(Cur_day = ='5'): -String ="Thunder Powder Weekend Thunder member account" the elifCur_day = ='6': *String ="Thunder Powder Weekend Thunder member account" $ Print(Cur_day)Panax Notoginseng Else: -String ="Thunderbolt Powder Exclusive Thunderbolt member account" the #you forced me, Mom . +Start =data.find (String) Adata =Data[start:] theData_que = Data.split ("</p>") +Count =0 - forIinchRange (3): $Data_temp =Data_que[i] $num =-1 -Acc_que = Data_temp.split ("<br/>") - forResultinchAcc_que: thenum = num + 1 - if(Count! =0):Wuyi if(num = =0): the Continue - f_obj.write (Result) WuF_obj.write ("\ n") -Count = Count + 1 About f_obj.close () $
Python crawler get Thunderbolt member account