Import requests
Import re
headers = {
' User-agent ': ' mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; trident/5.0) '
}
Info_lists = []
def judgment_sex (class_name):
if class_name = = ' Womenicon ':
Return ' woman '
Else
Return ' Male '
def get_info (URL):
res = requests.get (URL)
ids = Re.findall (' levels = Re.findall (' <div class= ' articlegender (. *?) " > ', Res.text,re. S
Sexs = Re.findall (' <div class= ' number ' > ', Res.text,re. S
Contents = Re.findall (' <div class= ' content ' >.*?<span> (. *?) </span> ', Res.text,re. S
laughs = Re.findall (' <span class= "stats-vote" ><i class= "number" > (\d+) <i> ', Res.text,re. S
Comments = Re.findall (' <i class= "number" > (\d+) </i> comments ', res.text,re. S
For id,level,sex,content,laugh,comment in Zip (ids,levels,sexs,contents,laughs,comments):
info = {
' ID ': ID,
' Level ': level,
' Sex ': judgment_sex (Sex),
' Content ': content,
' Laugh ': laugh,
' Comment ': comment
}
Info_lists.append (Info)
if __name__ = = ' __main__ ':
URLs = [' https://www.qiushibaike.com/text/page/{}/'. Format (str (i)) for I in Range (1,12)]
For URL in URLs:
Get_info (URL)
For info_list in info_lists:
f = open (' E:/qiushi.text ', ' A + ')
Try
F.write (info_list[' id ']+ ' \ n ')
F.write (info_list[' level ']+ ' \ n ')
F.write (info_list[' sex ']+ ' \ n ')
F.write (info_list[' content ']+ ' \ n ')
F.write (info_list[' laugh ']+ ' \ n ')
F.write (info_list[' comment ']+ ' \ n ')
F.close ()
Except Unicodeencodeerror:
Pass
Problem: Unable to generate document debug error-Free
(python) View embarrassing Wikipedia text likes author rating comments