function
Take the initiative to get a list of CSDN articles and add clicks to each post.
Source Code
Import urllib.requestimport reimport timeimport randomfrom bs4 Import Beautifulsoupp = Re.compile ('/a359680405/article/ details/... ') #自己的博客主页url = "http://blog.csdn.net/a359680405" #使用build_opener () is intended to allow Python programs to mimic the browser's access opener = Urllib.request.build_opener () opener.addheaders = [(' User-agent ', ' mozilla/5.0 ')]html = Opener.open (URL). Read (). Decode (' utf-8 ') allfinds = P.findall (HTML) print (allfinds) urlbase = "Http://blog.csdn.net" #须要将网址合并的部分 # The URLs on the page are repeated, Need to use set to go back and forth mypages = List (set (allfinds)) for I in Range (len (mypages)): mypages[i] = urlbase+mypages[i]print (' the page to be brushed has: ') For index, page in Enumerate (mypages): Print (str (index), page) #设置每一个网页要刷的次数brushMax = 200# All of the pages are brushed print (' The following starts to brush OH: ') for Index, page in Enumerate (mypages): Brushnum=random.randint (0,brushmax) for J in Range (Brushnum): try: PageContent = Opener.open (page). Read (). Decode (' utf-8 ') #使用BeautifulSoup解析每篇博客的标题 soup = Beau Tifulsoup (pagecontent) blogtitle = str (soup.title. String) Blogtitle = Blogtitle[0:blogtitle.find ('-')] Print (str (j), Blogtitle) Except Urllib.error.HTTPError:print (' Urllib.error.HTTPError ') time.sleep (1) #出现错误. Stop a few seconds first except Urllib.error.URLError:print (' Urllib.error.URLError ') time.sleep (1) #出 Time.sleep (0.1) #正常停顿 for a few seconds before the server refuses to ask
Python----rage brush hits