Code:
1 varHTTP = require ("http");2 3 varCheerio = require ("Cheerio");4 5 6 varurl = ' http://www.imooc.com/learn/348 ';7 8 9Http.get (URL,function(res) {Ten varhtml = ' '; One ARes.on (' Data ',function(data) { -HTML + =data; - }); the -Res.on (' End ',function(){ - varCoursedata =filterchapters (HTML); - + Printcourseinfo (coursedata); - Console.log (coursedata); + }); A}). On (' Error ',function(){ atConsole.log ("Error getting course data!")); - }); - - functionfilterchapters (HTML) - { - var$ =cheerio.load (HTML); in - //All Chapters to varChapters = $ ('. Chapter ')); + - varCoursedata = []; the *Chapters.each (function(item) { $ varChapter = $ ( This);Panax Notoginseng varChaptertitle = Chapter.find (' h3 strong '). Text (). Replace (/\r|\n/ig, ""). Trim (); - varVideos = Chapter.find (". Video"). Children (' Li ')); the + varChapterdata = { A Chaptertitle:chaptertitle, the videos: [] + }; - $Videos.each (function(index, item2) { $ varVideo = $ ( This). Find ('. J-media-item '); - varVideotitle = Video.text (). Replace (/\r|\n/ig, ""). Trim (); - varid = video.attr (' href '). Split (' video/') [1]; the - ChapterData.videos.push ({Wuyi Title:videotitle, the Id:id - }) Wu }); - About Coursedata.push (chapterdata); $ }); - - returnCoursedata; - } A + the functionprintcourseinfo (coursedata) - { $Coursedata.foreach (function(item) { the varChaptertitle =Item.chaptertitle; theConsole.log (chaptertitle + ' \ n ')); the theItem.videos.forEach (function(video) { -Console.log (' [' + video.id+ '] ' +video.title); in }); the }); the}
Run:
----------------------------------------------------------------------
Reference Links:
- HTTP small Reptile
- node. JS Learning-Crawl web data using Cheerio
Nodejs. HTTP module, Cheerio module implementation of small crawler.