Java下載檔案 爬蟲 逾時處理解決方案

來源:互聯網
上載者:User
import java.util.List;import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.File;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.FileReader;import java.io.FileWriter;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.OutputStream;import java.net.HttpURLConnection;import java.net.MalformedURLException;import java.net.SocketTimeoutException;import java.net.URL;import java.util.ArrayList;import java.util.logging.Logger;import java.util.regex.Matcher;import java.util.regex.Pattern;public class Main { public static final int sleepMsPerConnection = 1000; public static final int timeOutMs = 20000; public static final int retry = 2; private static void download(String urlStr, String filePath) {  int retryCount = 0;  while(true){   try {    DownloadThread thread = new DownloadThread(urlStr, filePath);    thread.start();    thread.join(timeOutMs);    if(!thread.isAlive()){     return;    }else{     thread.interrupt();//實測並不能結束線程,請參考如何中斷JAVA線程一文    }   } catch (InterruptedException e) {    e.printStackTrace();   }   retryCount++;   if(retryCount > retry){    throw new RuntimeException("still timeout after retry " + (retry - 1) + " times");   }   System.out.println("retry");  } } private static String getHtml(String urlStr) {  int retryCount = 0;  while(true){   try {    GetHtmlThread thread = new GetHtmlThread(urlStr);    thread.start();    thread.join(timeOutMs);    if(!thread.isAlive()){     return thread.html;    }else{     thread.interrupt();    }   } catch (InterruptedException e) {    e.printStackTrace();   }   retryCount++;   if(retryCount > retry){    throw new RuntimeException("still timeout after retry " + (retry - 1) + " times");   }   System.out.println("retry");  } }}import java.io.BufferedReader;import java.io.InputStreamReader;import java.net.URL;public class GetHtmlThread extends Thread { public String html; private String urlStr; public GetHtmlThread(String urlStr) {  this.urlStr = urlStr; } public void run() {  try {   Thread.sleep(Main.sleepMsPerConnection);   URL url = new URL(urlStr);   StringBuilder sb = new StringBuilder();   BufferedReader br = new BufferedReader(new InputStreamReader(url     .openStream()));   String line = null;   while ((line = br.readLine()) != null) {    sb.append(line);    sb.append('\n');   }   br.close();   this.html = sb.toString();  } catch (InterruptedException e) {   // do nothing?  } catch (Exception e) {   e.printStackTrace();   System.exit(1);  } }}import java.io.File;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.OutputStream;import java.net.URL;public class DownloadThread extends Thread { private String urlStr; private String filePath; public DownloadThread(String urlStr, String filePath) {  this.urlStr = urlStr;  this.filePath = filePath; } public void run() {  try {   URL url = new URL(urlStr);   InputStream is = url.openStream();   File pdfFile = new File(filePath);   FileOutputStream os = new FileOutputStream(pdfFile);   copyStream(is, os);   is.close();   os.close();  } catch (Exception e) {   e.printStackTrace();   System.exit(1);  } }  /**  * still need to close inputstream and outputstream after call this method  * @param inputStream  * @param outputStream  * @throws IOException  */ private void copyStream(InputStream inputStream, OutputStream outputStream)   throws IOException {  byte[] b = new byte[1024];  int len;  while ((len = inputStream.read(b)) > 0) {   outputStream.write(b, 0, len);  }  outputStream.flush(); }}
相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.