標籤:class load text blank cti rda rar course 課程
代碼:
1 var http = require("http"); 2 3 var cheerio = require("cheerio"); 4 5 6 var url = ‘http://www.imooc.com/learn/348‘; 7 8 9 http.get(url, function(res){10 var html = ‘‘;11 12 res.on(‘data‘, function(data){13 html += data;14 });15 16 res.on(‘end‘, function(){17 var courseData = filterChapters(html);18 19 printCourseInfo(courseData);20 console.log(courseData);21 });22 }).on(‘error‘, function(){23 console.log("擷取課程資料出錯!");24 });25 26 function filterChapters(html)27 {28 var $ = cheerio.load(html);29 30 //所有章節31 var chapters = $(‘.chapter‘);32 33 var courseData = [];34 35 chapters.each(function(item){36 var chapter = $(this);37 var chapterTitle = chapter.find(‘h3 strong‘).text().replace(/\r|\n/ig,"").trim();38 var videos = chapter.find(".video").children(‘li‘);39 40 var chapterData = {41 chapterTitle: chapterTitle,42 videos: []43 };44 45 videos.each(function(index, item2) {46 var video = $(this).find(‘.J-media-item‘);47 var videoTitle = video.text().replace(/\r|\n/ig,"").trim();48 var id = video.attr(‘href‘).split(‘video/‘)[1];49 50 chapterData.videos.push({51 title: videoTitle,52 id: id53 })54 });55 56 courseData.push(chapterData);57 });58 59 return courseData;60 }61 62 63 function printCourseInfo(courseData)64 {65 courseData.forEach(function(item){66 var chapterTitle = item.chapterTitle;67 console.log(chapterTitle + ‘\n‘);68 69 item.videos.forEach(function(video){70 console.log(‘ [‘ + video.id+ ‘]‘ + video.title);71 });72 });73 }
運行:
----------------------------------------------------------------------
參考連結:
- http小爬蟲
- Node.js的學習--使用cheerio抓取網頁資料
nodejs .http模組, cheerio模組 實現 小爬蟲.