Java File Download crawler timeout Solution

Source: Internet
Author: User
Import Java. util. list; import Java. io. bufferedreader; import Java. io. bufferedwriter; import Java. io. file; import Java. io. filenotfoundexception; import Java. io. fileoutputstream; import Java. io. filereader; import Java. io. filewriter; import Java. io. ioexception; import Java. io. inputstream; import Java. io. inputstreamreader; import Java. io. outputstream; import java.net. httpurlconnection; import java.net. malformedurle Xception; import java.net. sockettimeoutexception; import java.net. URL; import Java. util. arraylist; import Java. util. logging. logger; import Java. util. regEx. matcher; import Java. util. regEx. pattern; public class main {public static final int sleepmsperconnection = 1000; public static final int timeoutms = 20000; public static final int retry = 2; Private Static void download (string urlstr, string filepath) {Int retrycount = 0; while (true) {try {downloadthread thread = new downloadthread (urlstr, filepath); thread. Start (); thread. Join (timeoutms); If (! Thread. isalive () {return;} else {thread. interrupt (); // The tested thread cannot end. For details, see how to interrupt the Java thread.} catch (interruptedexception e) {e. printstacktrace ();} retrycount ++; If (retrycount> retry) {Throw new runtimeexception ("still timeout after retry" + (retry-1) + "times ");} system. out. println ("retry") ;}} Private Static string gethtml (string urlstr) {int retrycount = 0; while (true) {try {gethtmlthread thre AD = new gethtmlthread (urlstr); thread. Start (); thread. Join (timeoutms); If (! Thread. isalive () {return thread.html;} else {thread. interrupt () ;}} catch (interruptedexception e) {e. printstacktrace ();} retrycount ++; If (retrycount> retry) {Throw new runtimeexception ("still timeout after retry" + (retry-1) + "times ");} system. out. println ("retry") ;}} import Java. io. bufferedreader; import Java. io. inputstreamreader; import java.net. URL; public class gethtmlthread extends t Hread {Public String HTML; private string urlstr; Public gethtmlthread (string urlstr) {This. urlstr = urlstr;} public void run () {try {thread. sleep (main. sleepmsperconnection); Url url = new URL (urlstr); stringbuilder sb = new stringbuilder (); bufferedreader BR = new bufferedreader (New inputstreamreader (URL. openstream (); string line = NULL; while (line = BR. readline ())! = NULL) {sb. append (line); sb. append ('\ n');} BR. close (); this.html = sb. tostring ();} catch (interruptedexception e) {// do nothing?} Catch (exception e) {e. printstacktrace (); system. exit (1) ;}} import Java. io. file; import Java. io. fileoutputstream; import Java. io. ioexception; import Java. io. inputstream; import Java. io. outputstream; import java.net. URL; public class downloadthread extends thread {private string urlstr; private string filepath; Public downloadthread (string urlstr, string filepath) {This. urlstr = urlstr; this. filepath = filepath;} public void run () {try {URL url = new URL (urlstr); inputstream is = URL. openstream (); file pdffile = new file (filepath); fileoutputstream OS = new fileoutputstream (pdffile); copystream (is, OS); is. close (); OS. close ();} catch (exception e) {e. printstacktrace (); system. exit (1 );}} /*** still need to close inputstream and outputstream after call this method * @ Param inputstream * @ Param outputstream * @ throws ioexception */private void copystream (inputstream, outputstream) throws ioexception {byte [] B = new byte [1024]; int Len; while (LEN = inputstream. read (B)> 0) {outputstream. write (B, 0, Len);} outputstream. flush ();}}

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.