標籤:爬蟲 Python 百科段子
直接上代碼
#!/usr/bin/env python# -*- coding: utf-8 -*-import reimport urllib.requestdef gettext(url,page):headers=("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36")opener=urllib.request.build_opener()opener.addheaders=[headers]urllib.request.install_opener(opener)data=urllib.request.urlopen(url).read().decode("utf-8")userpat='<h2>(.*?)</h2>'textpat='<div class="content">(.*?)</div>'userlist=re.compile(userpat,re.S).findall(data)textlist=re.compile(textpat,re.S).findall(data)dictionary=dict(zip(userlist,textlist))x=1for key,value in dictionary.items():value=value.replace("\n","")value=value.replace("<span>","")value=value.replace("</span>","")value=value.replace("<br/>","\n")print("第"+str(page)+"頁"+str(x)+"使用者"+key)print("內容:"+value)print('\n')print("-----------------------------")x+=1for i in range(1,3):url="https://www.qiushibaike.com/8hr/page/"+str(i)gettext(url,i)
執行結果
Python 爬取糗事百科段子