1. Principle:
This program can achieve bulk access to a CSDN blog's personal information, directory and link to the corresponding, and coexist in a directory of the Mulu.txt file
2. Specific code:
#-*-coding:cp936-*-import urllib.request#import re#import sys#import time#import Randomimport stringheaders = {# ' U Ser-agent ': ' mozilla/5.0 (Windows; U Windows NT 6.1; En-us; rv:1.9.1.6) gecko/20091201 firefox/3.5.6 '}url_end=[] #title_end =[]for N in range (2): req = Urllib.request.Request ( url = ' http://blog.csdn.net/wangquannetwork/article/list/' +str ((n+1)), headers = headers) content = Urll Ib.request.urlopen (req). read () content = Content.decode (' utf-8 ') p=re.compile (' \ r \ n ') content=re.sub (p, ', content ) Url_str = Re.findall (' (? <= (link_title\ "><a href=\"). *? =\ ")) ', content) for I in Range (len (url_str)): Url_end.append (' blog.csdn.net ' +url_str[i][0]) Title_str = re.fi Ndall (' (? <= ([0-9][0-9][0-9][0-9][0-9]\ >)]. = (</a></span>))) ', content ' for I in Range (len (title_str)): Title_end.append (title_str[i][0][8:]) cont ent = Urllib.request.urlopen (req). read () content = Content.decode (' utf-8 ') span_str= Re.findall (R ' (?<=<li>). +? =</li>) ', content ' Title_str = Re.findall ((? <= (K\ >)). = (</a>))) ', content ' sname= './mulu.txt ' with open (SName, ' W ') as File:file.write (' This is ' +title_str[0][0]+ ' blog ') f Ile.write (' \ n ') file.write (' blog info below ') file.write (' \ n ') for x in range (0,5): File.write (Span_str[x]) File.write (' \ n ') file.write (' \ n ') file.write (' A total of ' +str (Len (url_end)) + ' article ') file.write (' \ n ') file.write (' \ n ') ') for I in Range (len (url_end)): File.write (str ((i+1)) + '. ') File.write (Title_end[i]) file.write (' \ n ') File.write (Url_end[i]) file.write (' \ n ')
3.Python Code Implementation results:
Note: The above contents are reproduced in original works, please specify the source http://blog.csdn.net/wangquannetwork/article/details/45832109
Python crawler _ Crawl csdn page information directory with Python