function
Get a list of CSDN articles automatically and increase the number of clicks per article.
Source Code
Import urllib.requestimport reimport timeimport randomfrom bs4 Import Beautifulsoupp = Re.compile ('/a359680405/article/ details/... ') #自己的博客主页url = "http://blog.csdn.net/a359680405" #使用build_opener () is to allow Python programs to mimic the browser for access opener = Urllib.request.build_opener () opener.addheaders = [(' User-agent ', ' mozilla/5.0 ')]html = Opener.open (URL). Read (). Decode (' utf-8 ') allfinds = P.findall (HTML) print (allfinds) urlbase = "Http://blog.csdn.net" #需要将网址合并的部分 # The URLs in the page are duplicated, You need to use set to repeat mypages = List (set (allfinds)) for I in Range (len (mypages)): mypages[i] = urlbase+mypages[i]print (' the page to be brushed has: ') For index, page in Enumerate (mypages): Print (str (index), page) #设置每个网页要刷的次数brushMax = 200# All pages are brushed print (' Start brush below ') for Index, page in Enumerate (mypages): Brushnum=random.randint (0,brushmax) for J in Range (Brushnum): try: PageContent = Opener.open (page). Read (). Decode (' utf-8 ') #使用BeautifulSoup解析每篇博客的标题 soup = Beaut Ifulsoup (pagecontent) blogtitle = str (soup.title.String) Blogtitle = Blogtitle[0:blogtitle.find ('-')] Print (str (j), Blogtitle) E Xcept urllib.error.HTTPError:print (' Urllib.error.HTTPError ') time.sleep (1) #出现错误, stop a few seconds first Except Urllib.error.URLError:print (' Urllib.error.URLError ') time.sleep (1) #出现错误, stop for a few seconds Tim E.sleep (0.1) #正常停顿 to prevent the server from denying access
Python----rage brush hits