Package Cn.searchphoto.util;import java.io.file;import java.io.fileoutputstream;import java.io.InputStream; Import Java.io.outputstream;import java.net.url;import Java.net.urlconnection;import Java.util.zip.GZIPInputStream ;/** * Download the picture of the remote website by setting Referer anti-hotlinking. * * @author java Century web (java2000.net, laozizhu.com)*/ Public classImagedownloader {/** * Download the file to the specified location * @param imgurl Download Connection * @param f destination File * @return successfully returned the file, failed to return null*/ Public Staticfile Download (String Imgurl, file f) {Try{URL URL=NewURL (Imgurl); URLConnection Con=url.openconnection ();intindex = Imgurl.indexof ("/",Ten); Con.setrequestproperty ("Host", index = =-1? Imgurl.substring (7): Imgurl.substring (7, index)); Con.setrequestproperty ("Referer", Imgurl); InputStream is=Con.getinputstream ();if(con.getcontentencoding ()! =NULL&& con.getcontentencoding (). Equalsignorecase ("gzip")) { is=NewGzipinputstream (Con.getinputstream ());}byte[] bs =New byte[1024x768];intLen =-1; OutputStream os=NewFileOutputStream (f);Try { while(len = is. Read (BS))! =-1) {os.write (BS,0, Len);}} finally {Try{os.close ();}Catch(Exception ex) {}Try { is. Close ();} Catch(Exception ex) {}}returnF;} Catch(Exception ex) {ex.printstacktrace ();return NULL;}}}
#1Processing of cookies import urllib2, Cookielibcookie_support=Urllib2. Httpcookieprocessor (Cookielib. Cookiejar ()) opener=Urllib2.build_opener (Cookie_support, Urllib2. HttpHandler) Urllib2.install_opener (opener) content= Urllib2.urlopen ('Http://XXXX'). Read () #2using proxies and Cookieopener=Urllib2.build_opener (Proxy_support, Cookie_support, Urllib2. HttpHandler) #3processing of the form import Urllibpostdata=Urllib.urlencode ({'username':'XXXXX', 'Password':'XXXXX', 'Continueuri':'http://www.verycd.com/', 'FK': FK,'Login_submit':'Login'}) Req=Urllib2. Request (URL='http://secure.verycd.com/signin/*/http://www.verycd.com/', the data=postdata) Result=Urllib2.urlopen (req). Read () #4masquerading as a browser access headers= { 'user-agent':'mozilla/5.0 (Windows; U Windows NT 6.1; En-us; rv:1.9.1.6) gecko/20091201 firefox/3.5.6'}req=Urllib2. Request (URL='http://secure.verycd.com/signin/*/http://www.verycd.com/', the data=PostData, headers=headers) #5anti-"anti-hotlinking" headers= { 'Referer':'Http://www.cnbeta.com/articles'}
#6Multithreading concurrent fetching fromThreading Import Thread fromQueue Import Queue fromTime Import Sleep#q is the task queue #num is the total number of concurrent threads #jobs is how many tasks Q=Queue () NUM=2JOBS=Ten#具体的处理函数, responsible for handling a single task Def do_somthing_using (arguments): Print arguments# This is a worker process, responsible for constantly fetching data from the queue and processing def working (): whiletrue:arguments= Q.Get() do_somthing_using (arguments) sleep (1) q.task_done () #fork num threads wait Queue forIinchRange (NUM): t= Thread (target=working) T.setdaemon (True) T.start () #把JOBS排入队列 forIinchRange (JOBS): Q.put (i) #等待所有JOBS完成q. Join ()
Reverse "anti-hotlinking" by setting Referer