標籤:html csdn部落格 jsoup
最近想鼓搗一下CSDN用戶端,這篇部落客要介紹如何使用Jsoup解析html頁面通過標籤擷取所需內容,並下載指定圖片資源。
一、匯入Jsoup JAR包
JAR包:jsoup 1.6.1
注意匯入包到項目時,直接將解壓後的jar檔案全部複製到libs檔案目錄下即可,否則運行時會報錯。
二、下載html頁面並解析
代碼:
package com.example.testcsdn;import java.io.ByteArrayOutputStream;import java.io.IOException;import java.io.InputStream;import java.net.HttpURLConnection;import java.net.URL;import java.util.ArrayList;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;import android.util.Log;/** * 通過給定連結地址,解析擷取的html資源,返回封裝好的ArrayList<Blog>對象 */public class BlogsFetchr {private static final String TAG = "BlogsFetchr";/** * 下載URL指定的資源 * * @return 返回為類型byte[] * */public byte[] getUrlBytes(String urlSpec) throws IOException {URL url = new URL(urlSpec);HttpURLConnection conn = (HttpURLConnection) url.openConnection();// 這裡強制轉換,是因為下面要用到HttpURLConnection.getInputStreamtry {ByteArrayOutputStream out = new ByteArrayOutputStream();InputStream in = conn.getInputStream();if (conn.getResponseCode() != HttpURLConnection.HTTP_OK) {// 串連不成功Log.i(TAG, "串連不成功");return null;}byte[] buffer = new byte[1024];int len = 0;while ((len = in.read(buffer)) > 0) {out.write(buffer, 0, len);}out.close();return out.toByteArray();} finally {conn.disconnect();}}/** * 下載URL指定的資源(即將getUrlBytes方法的傳回值byte[]轉換成String類型) * * @return 傳回型別為String */private String getUrl(String urlSpec) {String result = null;try {result = new String(getUrlBytes(urlSpec));} catch (IOException e) {e.printStackTrace();}return result;}public ArrayList<Blog> downloadBlogItems(String urlSpec) {ArrayList<Blog> blogs = new ArrayList<>();String htmlString = getUrl(urlSpec);// 解析htmlStringparserItems(blogs, htmlString);return blogs;}private void parserItems(ArrayList<Blog> blogs, String htmlString) {Document doc = Jsoup.parse(htmlString);Elements units = doc.getElementsByClass("blog_list");for (int i = 0; i < units.size(); i++) {Blog blog = new Blog();Element unit_ele = units.get(i);Element dl_ele = unit_ele.getElementsByTag("dl").get(0);Element dl_dt_ele = dl_ele.getElementsByTag("dt").get(0);Element dt_a_ele = dl_dt_ele.child(0);String iconUrl = dt_a_ele.child(0).attr("src"); // 博主頭像連結Log.i(TAG, "文章" + i + "的博主頭像連結:" + iconUrl);Elements fls = unit_ele.getElementsByClass("fl");Element fl_ele = fls.get(0);Element fl_a1_ele = fl_ele.child(0);String bloggerId = fl_a1_ele.text(); // 博主IdLog.i(TAG, "文章" + i + "的" + bloggerId);blog.setBloggerIconUrl(iconUrl);blog.setBloggerId(bloggerId);blogs.add(blog);}}}
如代碼所示,使用Jsoup解析html十分簡單。
可以使用瀏覽器,右鍵審查元素,得到所示的工具框,可以很快的找到頁面中元素所對應的標籤,再使用Jsoup API擷取標籤的值。
三、下載指定圖片
如果想要下載部落格列表中子項,博主的頭像。可以先通過解析html擷取圖片的url,然後再使用HttpURLConnection直接下載。
下面建立一個ThumbnailDownloader<Token>類,繼承HandlerThread,用於等待並處理圖片下載請求,同時更新UI:
package com.example.testcsdn;import java.io.IOException;import java.util.Collections;import java.util.HashMap;import java.util.Map;import android.graphics.Bitmap;import android.graphics.BitmapFactory;import android.os.Handler;import android.os.HandlerThread;import android.os.Message;import android.support.v4.util.LruCache;import android.util.Log;import android.widget.ImageView;public class ThumbnailDownloader<Token> extends HandlerThread {// Token表示泛型,"類名<泛型>"以保證在類內可以使用Token,就像Token已經是定義好的類一樣private static final String TAG = "ThumbnailDownloader";private static final int MESSAGE_DOWNLOAD = 0;private Handler mHandler; // 發送下載圖片的指令,和處理下載圖片的指令的使者private Handler mResponseHandler; // 來自主線程的Handler,更新UIprivate Listener<Token> mListener;private Map<Token, String> requestMap = Collections.synchronizedMap(new HashMap<Token, String>());// 儲存ImageView和URL的索引值對,並是安全執行緒的private LruCache<String, Bitmap> mMemoryCache;// 緩衝圖片的類,當儲存圖片的大小大於LruCache設定的值,系統自動釋放記憶體public ThumbnailDownloader(Handler handler) {super(TAG);mResponseHandler = handler;// 建立一個名為TAG的HandlerThread,是擁有自己Looper的獨立線程// super(TAG) 相當於new HandlerThread(TAG)int maxMemory = (int) Runtime.getRuntime().maxMemory(); // 系統最大運行記憶體int mCacheSize = maxMemory / 8; // 分配給緩衝的記憶體大小mMemoryCache = new LruCache<String, Bitmap>(mCacheSize) {// 必須重寫此方法,來測量Bitmap的大小@Overrideprotected int sizeOf(String key, Bitmap value) {return value.getRowBytes() * value.getHeight();}};}public interface Listener<Token> { // 回調方法,在主線程中實現void onThumbnailDownloaded(Token token, Bitmap thumbnail);}public void setListener(Listener<Token> listener) {mListener = listener;}@Overridepublic void onLooperPrepared() {// 在此線程的Looper啟動迴圈準備時段啟動並執行方法mHandler = new Handler() { // 在當前線程建立的Handler,只會在當前線程運行@Overridepublic void handleMessage(Message message) {// 處理髮送過來的圖片下載訊息,下載圖片並更新UIif (message.what == MESSAGE_DOWNLOAD) {Token token = (Token) message.obj;try {handleRequest(token);// 處理訊息} catch (IOException e) {e.printStackTrace();}}}};}private void handleRequest(final Token token) throws IOException {final String url = requestMap.get(token);if (url == null)return;byte[] bitmapBytes = new BlogsFetchr().getUrlBytes(url);// 下載圖片final Bitmap bitmap = BitmapFactory.decodeByteArray(bitmapBytes, 0,bitmapBytes.length);String key = (String) ((ImageView) token).getTag();Log.i(TAG, "imageView的TAG是:" + key);mMemoryCache.put(key, bitmap); // 存入緩衝mResponseHandler.post(new Runnable() {@Overridepublic void run() {// 更新UIif (requestMap.get(token) != url)return;requestMap.remove(token);mListener.onThumbnailDownloaded(token, bitmap);// 更新UI}});}public void clearQueue() {mHandler.removeMessages(MESSAGE_DOWNLOAD);requestMap.clear();}public void queueThumbnail(Token token, String url) {// 將下載圖片命令加入"ThumbnailDownloader"訊息佇列,// 在PhotoGalleryFragment中被調用requestMap.put(token, url);Message message = mHandler.obtainMessage(MESSAGE_DOWNLOAD, token);// 擷取Message,並且自動與mHandler綁定在一起// 參數一: what,int型,用於描述訊息// 參數二: obj,隨訊息發送的指定對象// 參數三: target,處理訊息的Handler,這裡由於使用自動和mHandler綁定,故預設message.sendToTarget(); // 發送訊息給目標Handler}public Bitmap getCacheImage(String key) {// 擷取緩衝中的圖片Bitmap bitmap = mMemoryCache.get(key);return bitmap;}}
MainActivity:
package com.example.testcsdn;import java.util.ArrayList;import android.app.Activity;import android.graphics.Bitmap;import android.os.AsyncTask;import android.os.Bundle;import android.os.Handler;import android.util.Log;import android.view.View;import android.view.ViewGroup;import android.widget.ArrayAdapter;import android.widget.ImageView;import android.widget.ListView;import android.widget.TextView;public class MainActivity extends Activity {private static final String TAG = "MainActivity";private ListView mListView;private ArrayList<Blog> mBlogs; // 部落格列表private String testUrl = "http://blog.csdn.net/column.html"; // 訪問的連結,這裡測試的CSDN部落格專欄的首頁private BlogsFetchr fetchr; // 下載html頁面和解析它的工具對象private MyAdapter adapter;private ThumbnailDownloader<ImageView> mThumbnailDownloader; // 圖片下載器@Overrideprotected void onCreate(Bundle savedInstanceState) {super.onCreate(savedInstanceState);setContentView(R.layout.activity_main);fetchr = new BlogsFetchr();mBlogs = new ArrayList<Blog>();Log.i(TAG, "mBlogs.size:" + mBlogs.size());Blog blog = new Blog();blog.setBloggerId("hello");mBlogs.add(blog);update(testUrl);// 開啟響應下載圖片訊息的線程mThumbnailDownloader = new ThumbnailDownloader<ImageView>(new Handler());mThumbnailDownloader.setListener(new ThumbnailDownloader.Listener<ImageView>() {@Overridepublic void onThumbnailDownloaded(ImageView imageView,Bitmap thumbnail) {// 更新UI,imageView.setImageBitmap(thumbnail);}});mThumbnailDownloader.start();mThumbnailDownloader.getLooper(); // 必須要在start之後}private void update(final String testUrl) {new AsyncTask<Void, Void, Void>() {@Overrideprotected Void doInBackground(Void... params) {mBlogs = fetchr.downloadBlogItems(testUrl); // 下載部落格列表return null;};@Overrideprotected void onPostExecute(Void result) {// 更新ListViewmListView = (ListView) findViewById(R.id.listview_blogcolumn);adapter = new MyAdapter(mBlogs);mListView.setAdapter(adapter);}}.execute();}private class MyAdapter extends ArrayAdapter<Blog> {public MyAdapter(ArrayList<Blog> blogs) {super(MainActivity.this, 0, blogs);}@Overridepublic View getView(int position, View convertView, ViewGroup parent) {if (convertView == null) {convertView = getLayoutInflater().inflate(R.layout.listview_item, null);}ImageView imageView = (ImageView) convertView.findViewById(R.id.imageView);TextView textView = (TextView) convertView.findViewById(R.id.textView);textView.setText(getItem(position).getBloggerId());String imageUrl = getItem(position).getBloggerIconUrl();String imageTag = imageUrl.replaceAll("[^\\w]", "");imageView.setTag(imageTag);// 去掉字串中非(字母、數字、底線)// 給imageView設定一個標籤,用於存取於CacheBitmap bitmap = null;if ((bitmap = mThumbnailDownloader.getCacheImage(imageTag)) != null) {// 如果在緩衝中存在imageView.setImageBitmap(bitmap);} else {// 發送下載圖片訊息mThumbnailDownloader.queueThumbnail(imageView, imageUrl);}return convertView;}}}
運行效果:
源碼下載
Android 使用Jsoup解析html+下載圖片