Import Requestsclass CFDA: # Initialize function def __init__ (self): # Initialize the URL to submit data Self.url = ' http://125.35.6.84:81 /xk/itownet/portalaction.do?method=getxkzslist ' # Anti-crawl f12-> network--Request Header Self.header = {' Accept ': ' */* ', ' Content-type ': ' Application/x-www-form-urlencoded;utf-8 ', ' x-requested-with ': ' XMLHttpRequest ', ' Referer ': ' http://125.35.6.84:81/xk/', ' accept-language ' : ' Zh-cn ', ' accept-encoding ': ' gzip,deflate ', ' user-agent ': ' mozilla/5.0 (Win Dows NT 6.3; WOW64; trident/7.0;rv:11.0) Like Gecko ', ' Host ': ' 125.35.6.84:81 ',} self.f = Open (' F:\BaiduYunDownload\python\cfda.txt ', ' a ') def getData (self,data): # gets data self.html = Requests.post (self. Url,data = Data,headers = self.header) # print (Self.html.json ()) # The contents of the response body are JSON information, similar to the TXT file def extractdat A (self): # Extract Information;firefox->f12-> Network--response for I in range (Len (Self.html.json () [' List ']): # It's unclear how many times sel F.cfda_data = Self.html.json () [' List '][i][' Eps_name '] self.f.write (self.cfda_data + ' \ n ') def fclose (self) : # Close File Self.f.close () if __name__ = = ' __main__ ': CFDA = CFDA () for M in Range (1,20): data = {' on ': ' TR UE ', ' page ': M, ' pageSize ': ' A ', ' productName ': ', ' conditionty PE ': ' 1 ', ' applyname ': ', ' applysn ': ', ' cfda.getdata (data) CFD A.extractdata () Cfda.fclose ()
Python topic 5: Crawling CFDA data