Python crawler crawls Beautiful pictures
#coding =utf-8import urllibimport reimport osimport timeimport threadingdef gethtml (URL): page = Urllib.urlopen (URL) html = Page.read () return htmldef Getimgurl (html,src): Srcre = Re.compile (src) srclist = Re.findall (srcre,html) return srclistdef getimgpage (html): url = R ' http://.*\.html ' Urlre = re.compile (URL) urllist = Re.findall (urlre , HTML) return urllistdef downloadimg (URL): html = gethtml (URL) src = R ' rel=.*\.jpg ' srclist = Getimgurl (html,s RC) Srclist2 = [] for srcs in srclist:temp = Srcs.replace ("'", ' "') temp = temp.split ('" ') SRCL Ist2.append (Temp[1]) for srcurl in srclist2:imgname = Srcurl.replace (': ', ' _ ') Imgname = Imgname.replace ( '/', ' _ ') print ' download pic%s ... '% srcurl if Os.path.isfile (' pic/%s '% imgname): Contin UE Urllib.urlretrieve (Srcurl, ' pic/%s '% Imgname) class MyThread (threading. Thread): Def __init__ (self,urllist): Threading. thread.__Init__ (self) self.urllist = Urllist def run (self): for u in SELF.URLLIST:DOWNLOADIMG (u) def ma In (): url = ' http://www.6188.net/' html = gethtml (URL) urllist = getimgpage (html) urllist2 = [] Length = le N (urllist)/7 for I in Range (1,8): temp = urllist[(i-1) *length:i*length] Urllist2.append (temp) for u In urllist2:t = MyThread (u) T.start () main ()
Python crawler crawls Beautiful pictures