#-*-coding:utf-8-*-import urllib.requestimport refrom _io import opendef yunpan_search (): url = "Https://www.zhihu.c Om/explore "req = urllib.request.Request (url, headers = {' Connection ': ' keep-alive ', ' Accept ': ' text/htm L, Application/xhtml+xml, */* ', ' accept-language ': ' en-us,en;q=0.8,zh-hans-cn;q=0.5,zh-hans;q=0.3 ', ' User-Age NT ': ' mozilla/5.0 (Windows NT 6.3; WOW64; trident/7.0; rv:11.0) like Gecko '}) opener = Urllib.request.urlopen (req) HTML = opener.read () HTML = Html.decode (' utf-8 ') r ex = ' (? <=<textarea class="content hidden">\ n). *? (?=</textarea>) ' m = Re.findall (rex,html,re. S) F = open ('/root/desktop/zhihu.txt ', ' W ') for I in M:f.write (i) f.write (' \ n ') f.close () Prin T ("Crawl success!") File = open ('/root/desktop/zhihu.txt ', ' r+ ') fullfile = File.readlines () text = [] p = re.compile (R ' \w* ', re. L) pp = Re.compile (r "(&;) *") for line in fullfile:lines = P.sub (", line) liness = Pp.sub (", Lines ) Text.append (liness) file.seek (0) file.truncate (0) file.writelines (text) file.close () print ("Processing succeeded! ") if __name__== ' __main__ ': Yunpan_search ()