node. JS (13)--promise refactoring crawler Code

Source: Internet
Author: User
Tags ssl certificate

Before refactoring the code, you need to know what is HTTPS?

HTTPS protocol: SSL/TLS-based HTTP protocol, all data is in the

SSL/TLS protocol, which means that the HTTPS protocol is based on the HTTP protocol

The SSL/TLS handshake and the data encryption transfer are added, so this is the biggest difference between the two.

The HTTPS module specifically handles encrypted access, except that SSL certificates are required to build an HTTPS server.

Simulating HTTPS Server Setup

var https = require (' https ') var fs = require (' FS ')//File system module var options = {//synchronous read-out SSL certificate __ Virtual Key:fs.readFileSync (' Ssh_ Key.pem ') cert:fs.readFileSync (' Ssh_cert.pem ')}//can run HTTPS server https.createserver after reading the certificate above (options, function (req , res) {Res.wirtehead (res.end) (' Hello Https ')}). Listen (8090)


Refactoring Crawler Code:

Var http = require (' http ') var cheerio = require (' cheerio ')//promise already built-in Var in the new version  promise = require (' Bluebird ') var baseurl =  ' http://www.imooc.com/learn/' var  videoids = [348,259,197,134,751]function filterchapters (HTML) {var $ =  Cheerio.load (HTML) var chapters = $ ('. Mod-chapters ')//title var title = $ (' #main  .path span '). Text ()///Number of Learners//var number = parseint ($ ($ (' Static-item ') [0]). Text (). Trim (),//var number = $ ($ ('. Static-item span ') [1]). Text ();//var number =  parseint ($ ('. Meta-value js-learn-num '). Text (). Trim (), ten) var number = $ (' .meta-value  Js-learn-num '). HTML () var coursedata = {title:title,number:number,videos:[]}// The inside of the traverse gets the data Chapters.each (function (item) {var chapter = $ (this);//Chapter title var chaptertitle =  chapter.find (' strong '). Text () Console.log (Chaptertitle) Var videos = chapter.find ('. Video '). Children (' Li ') var chapterdata =  {chaptertitle:chaptertitle,videos:[]}//Traversal Videosvideos.each (function (item) {var video = $ ( This). Find ('. J-media-item ') Var videotitle = video.text () var id = video.attr (' href '). Split (' video/') [1]chapterdata.videos.push ({title:videotitle,id:id})}) CourseData.videos.push (Chapterdata)}) return  coursedata}function printcourseinfo (coursesdata) {Coursesdata.foreach (function (courseData) { Console.log (coursedata.number +  '   people learned   ' +coursedata.title+ ' \ n ')})// Traversal Coursesdata.foreach (function (coursedata) {console.log (' ###  ' +coursedata.title+ ' \ n ') in the array CourseData.videos.forEach (function (item) {var chaptertitle =  Item.chapterTitleitem.videos.forEach (function (video) {Console.log (' "' +video.id+ '" ' +video.title ')})})} Function getpageasync (URL) {return new promise (function (resolve,reject) {Console.log (' fetching'  + url   ' Http.get (url, function (res) {var html =  ' res.on (' data '), function (data) {html += data}) Res.on (' End ', function () {////At the time the request is completed, pass through the Resolve resolve (HTML)}). On (' Error ', function (e) {///If error reject (e) console.log (' get page error ')})})})}var fetchcoursearray = [] Videoids.foreach (function (ID) {//Traversal result passed past  fetchcoursearray.push (GetPageAsync (Baseurl + id))})// Need to do concurrency control, all to crawl Promise.all (Fetchcoursearray). Then (pages) {//multi-page processing var coursesdata = []// Working with Page Pages.foreach (function (HTML) {//parsing HTML var courses = filterchapters (HTML) Coursesdata.push (Courses)})//Traverse Coursesdata.sort (function (A, b) {return a.number < b.number}) Printcourseinfo (Coursesdata)})

The results of the operation are as follows:

650) this.width=650; "src=" Https://s2.51cto.com/wyfs02/M01/8F/50/wKiom1jahlPyvW7uAABnQMXU45E114.jpg "title=" 36020170315204858576.jpg "alt=" Wkiom1jahlpyvw7uaabnqmxu45e114.jpg "/>

This article is from the "It Rookie" blog, make sure to keep this source http://mazongfei.blog.51cto.com/3174958/1911261

node. JS (13)--promise refactoring crawler Code

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.