The difference between this and the previous one is that the address is not regular, you need to find the address of the next page and then crawl over to find.
//======================================================//abaike Image Bulk download node. JS Crawler 1.00//November 9, 2017//======================================================//built-in HTTP modulevarHttp=require ("http");//built-in file processing module for creating directories and picture filesvarFs=require (' FS ');//The cheerio module provides jquery-like functionality for finding picture addresses and next pages from HTML codevarCheerio = require ("Cheerio");//request parameter JSON. Both HTTP and HTTPS are usedvaroptions;//Request Requestsvarreq;//image Array, the address of the image to be found will be placed herevarpictures=[];//--------------------------------------//Crawl Web pages, find image addresses, and then crawl//Pageurl sample:http://www.avbaike.net/27812.html//Pageurl SAMPLE:HTTP://WWW.AVBAIKE.NET/27812.HTML/2//--------------------------------------functionCrawl (pageurl) {Console.log ("Current Page=" +pageurl); //get hostname and path varCurrurl=pageurl.replace ("http//", "" ")); varPos=currurl.indexof ("/"); varHostname=currurl.slice (0, POS); varPath=Currurl.slice (POS); //Console.log ("hostname=" +hostname); //Console.log ("path=" +path); //Initialize Optionsoptions={hostname:hostname, port:80, Path:path,//Sub-PathMethod: ' GET ', }; Req=http.request (Options,function(RESP) {resp.setencoding (' UTF8 '); varBody= ""; Resp.on (' Data ',function(chunk) {body+=Chunk; }); Resp.on (' End ',function(){ //Console.log ("body=" +body); var$ =Cheerio.load (body); //find a picture into an array$ ("#post_content p a"). each (function(index,element) {varpicurl=$ (Element). attr ("href"); //Console.log (picurl);Pictures.push (Picurl); }) varNextpageurl=NULL; //find the next page$ (". PageList a"). each (function(index,element) {vartext=$ (Element). text (); if(Text.indexof (' next page ')!=-1) {Nextpageurl=$ (Element). attr ("href"); } }) if(nextpageurl==NULL) {Console.log (Pageurl+ "It's the last page."); Download (Pictures); }Else{ //console.log ("Next page is" +nextpageurl);Crawl (Nextpageurl); } }); }); //Timeout ProcessingReq.settimeout (10000,function() {req.abort (); }); //Error HandlingReq.on (' Error ',function(err) {if(err.code== "Econnreset") {Console.log (' [Crawl]socket port connection timed out. ‘); Console.log (ERR); }Else{Console.log (' Request error occurred, Err.code: ' +Err.code); } }); //End of Requestreq.end ();}//--------------------------------------//Download Image//--------------------------------------functionDownload (Pictures) {varfolder= ' Pictures '; //Create a directoryFs.mkdir ('./' +folder,function(err) {if(Err) {Console.log ("Directory" +folder+ "already exists"); } }); Console.log ("A total of" +pictures.length+ "pictures will be downloaded."); for(vari=0;i<pictures.length;i++){ varPicurl=Pictures[i]; Downloadpic (Picurl); }}//--------------------------------------//Download a single photo//Picurl sample:http://www.avbaike.net/wp-content/uploads/2016/08/108.jpg//--------------------------------------functionDownloadpic (Picurl) {Console.log ("Picture:" +picurl+ "Download Start"); //get hostname and path varCurrurl=picurl.replace ("http//", "" ")); varPos=currurl.indexof ("/"); varHostname=currurl.slice (0, POS); varPath=Currurl.slice (POS); //Console.log ("hostname=" +hostname); //Console.log ("path=" +path); varPicname=currurl.slice (Currurl.lastindexof ("/")); //Initialize Optionsoptions={hostname:hostname, port:80, Path:path,//Sub-PathMethod: ' GET ', }; Req=http.request (Options,function(resp) {varImgdata = ""; Resp.setencoding ("Binary"); Resp.on (' Data ',function(chunk) {Imgdata+=Chunk; }); Resp.on (' End ',function(){ //Create a file varFilename= "./pictures" +Picname; Fs.writefile (FileName, Imgdata,"Binary",function(err) {if(Err) {Console.log ("File" +filename+ "Download failed."); Console.log (ERR); }Else{Console.log ("File" +filename+ "Download succeeded"); } }); }); }); //Timeout ProcessingReq.settimeout (10000,function() {req.abort (); }); //Error HandlingReq.on (' Error ',function(err) {if(err.code== "Econnreset") {Console.log (' [Downloadpic]socket port connection timed out. ‘); Console.log (ERR); }Else{Console.log (' [Downloadpic] Request error occurred, Err.code: ' +Err.code); Console.log (ERR); } }); //End of Requestreq.end ();}//--------------------------------------//Program Entry//--------------------------------------functionGetInput () {Process.stdout.write ("\033[35m Please enter the first page url:\033[039m");//PurpleProcess.stdin.resume (); Process.stdin.setEncoding (' UTF8 '); Process.stdin.on (' Data ',function(text) {process.stdin.end ();//Exit input StatusCrawl (Text.trim ());//Trim () is a must! }); }//Call the GetInput function and the program startsGetInput ();
node. js abaike Image Bulk download node. JS Crawler version 1.00