Beginners Nodejs Write an HTTP crawler, crawler is to put the code on the page down.
Code:
var http = require (' http ')//load HTTP Module
var url = ' http://www.imooc.com/learn/713 '
Http.get (Url,function (res) {//get go to request URL, here is an example of Mu-class network
var html = ' '
Res.on (' Data ', function (data) {
HTML + = data//Request value assignment to the previously defined HTML
})
Res.on (' End ', function () {
Console.log (HTML)//print HTML
}). On (' Error ', function () {
Console.log (' Error getting course data! ‘)
})
})
Save, then run in node Environment, command: node file name
The success of the pro test.
Case TWO:
First you need to install the Cheerio module, Cheerio module can be used on the server side of the way jquery
Installation method:
NPM Install Cheerio
First, open a local server and test it.
JS Code
var http = require (' http ')
http
. Createserver (function (req,res) {
Res.writehead (200,{' content-type ': ' Text/plain});
Res.write (' started ');
Res.end ();
}). Listen (8080);
Paste the following code:
' use strict ';
var http = require (' http ')
var cheerio = require (' cheerio ')
var url = ' http://www.imooc.com/learn/348 '
function Filterchapters (HTML) {
var $ = cheerio.load (HTML)
var chapters = $ ('. Chapter ')
/*[{
Chaptertitle: ',
videos:[
Title: ',
ID: '
]
}]*/
var coursedata = []
Chapters.each (function (item) {
var Chapter = $ (this)
var chaptertitle = chapter.find (' strong '). Text ()
var videos = Chapter.find ('. Video '). Children ( ' Li ')
var chapterdata = {
Chaptertitle:chaptertitle,
videos:[]
}
Videos.each (function (item) {
var video = $ (this). Find ('. J-media-item ')
var videotitle = Video.text ()
var id = video.attr (' href '). Split (' video/') [1]
ChapterData.videos.push ({
Title:videotitle,
Id:id
})
})
Coursedata.push (Chapterdata)
})
Return Coursedata
}
function Printcourseinfo (coursedata) {
Console.log (' Get Course data 1! ‘)
Coursedata.foreach (function (item) {
var chaptertitle = Item.chaptertitle
Console.log (chaptertitle + ' \ n ')
Item.videos.forEach (function (video) {
Console.log (' "' + video.id + '" ' + video.title + ' \ n ')
})
Console.log (' Get Course data 2! ‘)
})
}
Http.get (Url,function (res) {
var html = ' '
Res.on (' Data ', function (data) {
HTML + = Data
})
Res.on (' End ', function () {
var coursedata = filterchapters (HTML)
Printcourseinfo (Coursedata)
}). On (' Error ', function () {
Console.log (' Error getting course data! ‘)
})
Console.log (' Get Course data 3! ‘)
})
亲测有效
HTTP small Reptile