This article describes the Python collection of Baidu Encyclopedia method. Share to everyone for your reference. Specifically as follows:
?
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 The |
#!/usr/bin/python #-*-Coding:utf-8-*-#encoding =utf- 8 #Filename: get_baike.py import urllib2,re import sys def gethtml (url,time=10): Response = Urllib2.urlopen (url,timeout= Time) HTML = Response.read () response.close () return HTML def clearblank (HTML): If Len (html) = = 0:return ' html = re.su B (' r|n|t ', ', HTML ') while Html.find ("")!=-1 or Html.find (')!=-1:html = Html.replace (', '). Replace (', ') return h tml if __name__ = = ' __main__ ': html = gethtml (' http://baike.baidu.com/view/4617031.htm ', ten) HTML = Html.decode (' gb2312 ', ' Replace '). Encode (' Utf-8 ') #转码 Title_reg = R ' |
I hope this article will help you with your Python programming.