標籤:
程式碼範例
package demo0806;import java.io.BufferedReader;import java.io.File;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.net.URL;import java.net.URLConnection;import java.util.HashMap;import java.util.Iterator;import java.util.Map;import java.util.Map.Entry;import java.util.Set;public class ScanTitleFromWebPage { private String website; private Map<String,String> recentShareCode=new HashMap<String,String>(); private Map<String,String> hotShareCode=new HashMap<String,String>(); public ScanTitleFromWebPage(String website) { this.website=website; } public String ScanWebForTitle() { InputStream inputStream=null; String title=null; try { //建立URL對象,例如:百度搜尋中國好聲音 //wd關鍵詞的值即為"中國好聲音"的UTF-8編碼, //可以使用URLEncoder對字元進行編 URL url = new URL(website); //建立URLConnection對象 URLConnection openConnection = url.openConnection(); //有些網站不允許java作為用戶端訪問 openConnection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)"); //擷取網頁資訊編碼類別型 String headerField = openConnection.getHeaderField("Content-Type"); int indexOf = headerField.indexOf("charset="); String encoding = headerField.substring(indexOf+8); //擷取URLConnection對象的輸入資料流 inputStream=openConnection.getInputStream(); //通過IO來讀取流,寫入檔案 String line=null; InputStreamReader inputStreamReader; inputStreamReader = new InputStreamReader(inputStream,encoding); BufferedReader bufferedReader = new BufferedReader(inputStreamReader); int flagOfRecentShareCode=0; int flagOfHotShareCode=0; String recentCode=null; String recentHref=null; String hotCode=null; String hotHref=null; while((line=bufferedReader.readLine())!=null) { int start=-1; int end=-1; if((start=line.indexOf("<title>"))!=-1) { end=line.indexOf("</title>"); title = line.substring(start+7, end); } else if(line.indexOf("NewCodeList")!=-1) { flagOfRecentShareCode=1; } else if(line.indexOf("HotCodeList")!=-1) { flagOfRecentShareCode=0; flagOfHotShareCode=1; } else if(line.indexOf( "</div>")!=-1) { flagOfHotShareCode=0; } else if((start= line.indexOf("href="))!=-1&&flagOfRecentShareCode==1) { end=line.indexOf(" target"); recentHref=line.substring(start+6, end-1); } else if((start= line.indexOf("href="))!=-1&&flagOfHotShareCode==1) { end=line.indexOf(" target"); hotHref=line.substring(start+6, end-1); } else if((start= line.indexOf("title="))!=-1&&flagOfRecentShareCode==1) { end=line.indexOf(">"); recentCode=line.substring(start+7, end-1); recentShareCode.put(recentCode, recentHref); } else if((start= line.indexOf("title="))!=-1&&flagOfHotShareCode==1) { end=line.indexOf(">"); hotCode=line.substring(start+7, end-1); hotShareCode.put(hotCode, hotHref); } } inputStreamReader.close(); } catch (IOException e) { System.err.println("無法下載"); e.printStackTrace(); } finally { if(inputStream!=null) { try { inputStream.close(); } catch(Exception ex) { //不處理 } } } return title; } public static void main(String[] args) throws InterruptedException, IOException { String website="http://www.oschina.net"; ScanTitleFromWebPage scanTitleFromWebPage; scanTitleFromWebPage=new ScanTitleFromWebPage(website); String title = scanTitleFromWebPage.ScanWebForTitle(); File file=new File("OSChomepage.html"); FileOutputStream fileOutputStream = new FileOutputStream(file); if(title!=null) { String str="網站標題為:"+title; byte[] bytes = str.getBytes(); fileOutputStream.write(bytes); fileOutputStream.write(‘\n‘); System.out.println(str); } Map<String,String> recentShareCode=scanTitleFromWebPage.recentShareCode; Map<String,String> hotShareCode=scanTitleFromWebPage.hotShareCode; Set<Entry<String, String>> entrySet; Iterator<Entry<String, String>> iterator; String key=null; String value=null; Entry<String, String> next=null; fileOutputStream.write("----------------最新分享代碼有如下----------------".getBytes()); fileOutputStream.write(‘\n‘); System.out.println("----------------最新分享代碼有如下----------------"); entrySet= recentShareCode.entrySet(); iterator= entrySet.iterator(); while(iterator.hasNext()) { next = iterator.next(); key=next.getKey(); value=next.getValue(); String str=key+"\t"+"("+value+")"; byte[] bytes = str.getBytes(); fileOutputStream.write(bytes); fileOutputStream.write(‘\n‘); System.out.println(key+"\t"+"("+value+")"); } fileOutputStream.write("----------------本周最熱門代碼有如下----------------".getBytes()); fileOutputStream.write(‘\n‘); System.out.println("------------------本周最熱門代碼有如下-----------------"); entrySet= hotShareCode.entrySet(); iterator= entrySet.iterator(); while(iterator.hasNext()) { next = iterator.next(); key=next.getKey(); value=next.getValue(); String str=key+"\t"+"("+value+")"; byte[] bytes = str.getBytes(); fileOutputStream.write(bytes); fileOutputStream.write(‘\n‘); System.out.println(key+"\t"+"("+value+")"); } fileOutputStream.close(); }} 運行結果
網站標題為:開源中國 - 找到您想要的開源項目,分享和交流----------------最新分享代碼有如下----------------iOS 一個函數同時返回多個參數的策略(http://www.oschina.net/code/snippet_865986_50059)jquery外掛程式--ajaxfileupload.js(http://www.oschina.net/code/snippet_105637_50057)計蒜客-挑戰難題-6(http://www.oschina.net/code/snippet_587996_50055)圖片消極式載入簡單原理(http://www.oschina.net/code/snippet_1590754_50058)我該如何書寫一段能實現早睡早起的代碼?(http://www.oschina.net/code/snippet_1168184_50061)shell擷取當前指令碼執行絕對路徑(http://www.oschina.net/code/snippet_1988965_50056)冒泡排序演算法java實現(http://www.oschina.net/code/snippet_587996_50052)js指令碼控制翻頁控制項概述。這個控制項主要用來翻頁的一個效果,如果有喜歡的可以那去參考,呵呵(http://www.oschina.net/code/snippet_1862064_50060)選擇排序算方法java實現(http://www.oschina.net/code/snippet_587996_50053)計蒜客-挑戰難題-5(http://www.oschina.net/code/snippet_587996_50054)------------------本周最熱門代碼有如下-----------------python實現爬圖,不要再爬妹子圖了,太沒品了(http://www.oschina.net/code/snippet_2371155_49889)通過銀行卡號取得銀行名字(http://www.oschina.net/code/snippet_1252640_49997)Java反射基礎,構建架構(重要)(http://www.oschina.net/code/snippet_2345495_49988)spring boot + mybatis+ spring mvc整合(http://www.oschina.net/code/snippet_2325859_49871)全醫通 - HTML5開發,單頁整合版(http://www.oschina.net/code/snippet_2287693_50012)公眾號支付(http://www.oschina.net/code/snippet_1754599_49966)我的Eclipse代碼格式化風格(http://www.oschina.net/code/snippet_1584959_49953)python簡單爬蟲(http://www.oschina.net/code/snippet_2391943_49998)擷取情敵電腦內照片神器(http://www.oschina.net/code/snippet_2425035_49995)12306火車票API介面QQ 89914505(http://www.oschina.net/code/snippet_811693_49880)
Java實現簡單地抓取開源中國首頁的相關資料