1 fromBs4ImportBeautifulSoup2 ImportRequests,urllib.request,urllib.parse3 ImportJSON4 Import Time5 ImportOS6headers = {'user-agent':'mozilla/5.0 (IPhone; CPU iPhone os 9_1 like Mac os X applewebkit/601.1.46 (khtml, like Gecko) version/9.0 mobile/13b143 safari/601.1',7 'Cookies': " sinaglobal=115.171.224.117_1478818430.840585; Uor=www.baidu.com,www.sina.com.cn,; sguid=1479602257214_22629370; u_trs1=000000ca.e4817e03.5830f3d9.0954d478; Vjuids=8b9ebf053.1588e9bbe9b.0.a7d3c9f0da2d8; Lxlrtst=1480138279_o; vjlast=1479861321.1480207111.11; lxlrttp=1480138279; SCF=AVQGHEYBOZJIT9ZUITL3EGB1W7DGNLFZQC_FT1HI_O6VRMHL4NJAJ8QKEGO6QZ5961-UNIGKEJJ59-0W1IOAMQC.; apache=115.171.186.136_1481426939.303674; SUB=_2A251SKFNDERXGEVM6LIU8IZEWJYIHXVWP5WFRDV_PUNBM9ANLXJ4KW-ZXH1EJQZVQCFCS2TJHJUWL2NPFA.; subp=0033wrsxqpxfm725ws9jqgmf55529p9d9whqmfym94ynlsl9jbzenks15jpx5kzhugl.foeeek5feozr1k52djloi7d8mjloiefekx4g; alf=1512965277; u_trs2=00000088.95c945f5.584cd14e.d3ef2984; Web2_apache2_yf=53ce2a867ebeada0edd63e211478fed5; WEB2_APACHE2_JA=4E81A2DFE1AFDCEDFB634BA45827A3FB; ulv=1481429361019:7:1:1:115.171.186.136_1481426939.303674:1480134833882; Appclose=true; NTKF_T2D_CLIENTID=GUESTAE2E8836-1881-93C9-A9BE-EC1265A9B9B5; NTALK_CACHE_DATA={UID:KF_9378_ISME9754_3210522890,TID:1481429378473190}'}8Downloadlinks = []9Folder_path ='d:/'Ten forXinchRange (46): OneURL ='Http://all.vic.sina.com.cn/weibo_alk/hiddphp.php?page={}&act=jplist_ajax'. Format (x) Adata = Requests.get (Url,headers =headers) -Time.sleep (1) -Data.encoding ='Utf-8' theres =Data.text -Lal =json.loads (RES) - forIinchLal: - ifLen (i) < 5: +File_url =lal[i]['Attachmentarr'][0]['URL'] - Else: +File_url = i['Attachmentarr'][0]['URL'] A downloadlinks.append (File_url) at - forIteminchdownloadlinks: - ifItem! =None: -URLs = Urllib.parse.quote (item,safe='/:[email protected]$&') -Time.sleep (1) -Name = Urllib.parse.unquote (Item.split ('/') [-1]) in Try: -Urllib.request.urlretrieve (Urls,folder_path +name) to Print(Name +'Download Successful') + exceptUrllib.error.HTTPError: - Print('page does not exist') the exceptValueError: * Print('Unknown')
Use Python to crawl the Sina Weibo marketing case Library and download it locally