#-*-Coding:utf-8-*-"" "@author: Jiangfuqiang" "" Import reimport urllib2import cookielibimport timedef startparser ( author,page=1): Reg = R ' <a href= "/\w+/article/details/\d+" >\s*\t*\n*\s*\t*\s*.*?\t*\n*\t*\s*</a> ' CJ = C Ookielib. Lwpcookiejar () Cookie_support = Urllib2. Httpcookieprocessor (CJ) opener = Urllib2.build_opener (cookie_support,urllib2. HttpHandler) Urllib2.install_opener (opener) headers = {' user-agent ': ' mozilla/5.0 (Windows NT 6.1; WOW64; rv:14.0) gecko/20100101 firefox/14.0.1 ', ' Referer ': ' http://my.csdn.net/my/favorite '} flag = True whi Le flag = = True:time.sleep (2) url = "http://blog.csdn.net/%s/article/list/%d"% (author,page) req = ur Llib2. Request (url,headers=headers) resp = Urllib2.urlopen (req) data = Resp.read () regex = Re.compile (REG,FL Ags=re. MULTILINE) result = Regex.findall (data) for Rd in Result:print Rd if Len (result) < 20: Flag = False page = page + 1 print ' success............page:%d '%page #print result.group () if __name__ = = ' __main_ _ ': Startparser (' Yiyaaixuexi ', 1)
This Python crawl favorite article link and the title of Python sent the message of the code, the program can be slightly modified after the article link sent to the mailbox for later review
Use Python to crawl all published articles of CSDN followers