varHttp=require (' http ');varCheerio=require (' Cheerio ');//The Data module that the page gets tovarUrl= ' http://www.jcpeixun.com/lesson/1512/';functionFilterdata (HTML) {/ * The target array to get to
var coursedata=[{chaptertitle: "", videosdata:{Videotitle:title, Videoid:id, Videoprice:price}}]*/ var$=cheerio.load (HTML); varCoursedata=[]; varchapters=$ (". List-collapse"); Chapters.each (function(item) {varchaptertitle=$ ( This). Find (". Collapse-head"). Find ("label"). text (); varvideos=$ ( This). Find (". Listview5"). Children ("Li")); varChaptersdata={chapterstitle:chaptertitle, videosdata:[]} videos.each (function(item) {varvideotitle=$ ( This). Find (". Ml10"). attr (' Data-lesson-name ')); varvideoid=$ ( This). Find (". Ml10"). attr (' Data-lesson-id ')); varvadeoprice=$ ( This). Find (". Colblue"). text (); ChaptersData.videosData.push ({title:videotitle, id:videoid, Price:vadeoprice })}) Coursedata.push (Chaptersdata)} )returnCoursedata}functionPrintcourseinfo (coursedata) {Coursedata.foreach (function(item) {Console.log (Item.chapterstitle+ ' \ n '); Item.videosData.forEach (function(item) {Console.log (Item.title+ ' ' ' +item.id+ ' "' +item.price+ ' \ n ')}) }http.get (URL,function(res) {HTML=""; Res.on ("Data",function(data) {HTML+=data}) Res.on (' End ',function(){ varCoursedata=filterdata (HTML); Printcourseinfo (Coursedata) })
Open cmd, node environment, run JS, you can see the data has been obtained;
Nodejs Crawler Get Data