node. js abaike Image Bulk download node. JS Crawler version 1.00

Source: Internet
Author: User

The difference between this and the previous one is that the address is not regular, you need to find the address of the next page and then crawl over to find.

//======================================================//abaike Image Bulk download node. JS Crawler 1.00//November 9, 2017//======================================================//built-in HTTP modulevarHttp=require ("http");//built-in file processing module for creating directories and picture filesvarFs=require (' FS ');//The cheerio module provides jquery-like functionality for finding picture addresses and next pages from HTML codevarCheerio = require ("Cheerio");//request parameter JSON. Both HTTP and HTTPS are usedvaroptions;//Request Requestsvarreq;//image Array, the address of the image to be found will be placed herevarpictures=[];//--------------------------------------//Crawl Web pages, find image addresses, and then crawl//Pageurl sample:http://www.avbaike.net/27812.html//Pageurl SAMPLE:HTTP://WWW.AVBAIKE.NET/27812.HTML/2//--------------------------------------functionCrawl (pageurl) {Console.log ("Current Page=" +pageurl); //get hostname and path    varCurrurl=pageurl.replace ("http//", "" ")); varPos=currurl.indexof ("/"); varHostname=currurl.slice (0, POS); varPath=Currurl.slice (POS); //Console.log ("hostname=" +hostname);    //Console.log ("path=" +path);        //Initialize Optionsoptions={hostname:hostname, port:80, Path:path,//Sub-PathMethod: ' GET ',    }; Req=http.request (Options,function(RESP) {resp.setencoding (' UTF8 '); varBody= ""; Resp.on (' Data ',function(chunk) {body+=Chunk;        }); Resp.on (' End ',function(){            //Console.log ("body=" +body);            var$ =Cheerio.load (body); //find a picture into an array$ ("#post_content p a"). each (function(index,element) {varpicurl=$ (Element). attr ("href"); //Console.log (picurl);Pictures.push (Picurl); })                               varNextpageurl=NULL; //find the next page$ (". PageList a"). each (function(index,element) {vartext=$ (Element). text (); if(Text.indexof (' next page ')!=-1) {Nextpageurl=$ (Element). attr ("href"); }                    })            if(nextpageurl==NULL) {Console.log (Pageurl+ "It's the last page.");            Download (Pictures); }Else{                //console.log ("Next page is" +nextpageurl);Crawl (Nextpageurl);    }        });    }); //Timeout ProcessingReq.settimeout (10000,function() {req.abort ();    }); //Error HandlingReq.on (' Error ',function(err) {if(err.code== "Econnreset") {Console.log (' [Crawl]socket port connection timed out. ‘);        Console.log (ERR); }Else{Console.log (' Request error occurred, Err.code: ' +Err.code);    }    }); //End of Requestreq.end ();}//--------------------------------------//Download Image//--------------------------------------functionDownload (Pictures) {varfolder= ' Pictures '; //Create a directoryFs.mkdir ('./' +folder,function(err) {if(Err) {Console.log ("Directory" +folder+ "already exists");    }    }); Console.log ("A total of" +pictures.length+ "pictures will be downloaded.");  for(vari=0;i<pictures.length;i++){        varPicurl=Pictures[i];    Downloadpic (Picurl); }}//--------------------------------------//Download a single photo//Picurl sample:http://www.avbaike.net/wp-content/uploads/2016/08/108.jpg//--------------------------------------functionDownloadpic (Picurl) {Console.log ("Picture:" +picurl+ "Download Start"); //get hostname and path    varCurrurl=picurl.replace ("http//", "" ")); varPos=currurl.indexof ("/"); varHostname=currurl.slice (0, POS); varPath=Currurl.slice (POS); //Console.log ("hostname=" +hostname);    //Console.log ("path=" +path);    varPicname=currurl.slice (Currurl.lastindexof ("/")); //Initialize Optionsoptions={hostname:hostname, port:80, Path:path,//Sub-PathMethod: ' GET ',    }; Req=http.request (Options,function(resp) {varImgdata = ""; Resp.setencoding ("Binary"); Resp.on (' Data ',function(chunk) {Imgdata+=Chunk;        }); Resp.on (' End ',function(){                    //Create a file            varFilename= "./pictures" +Picname; Fs.writefile (FileName, Imgdata,"Binary",function(err) {if(Err) {Console.log ("File" +filename+ "Download failed.");                Console.log (ERR); }Else{Console.log ("File" +filename+ "Download succeeded");            }            });    });    }); //Timeout ProcessingReq.settimeout (10000,function() {req.abort ();    }); //Error HandlingReq.on (' Error ',function(err) {if(err.code== "Econnreset") {Console.log (' [Downloadpic]socket port connection timed out. ‘);        Console.log (ERR); }Else{Console.log (' [Downloadpic] Request error occurred, Err.code: ' +Err.code);        Console.log (ERR);    }    }); //End of Requestreq.end ();}//--------------------------------------//Program Entry//--------------------------------------functionGetInput () {Process.stdout.write ("\033[35m Please enter the first page url:\033[039m");//PurpleProcess.stdin.resume (); Process.stdin.setEncoding (' UTF8 '); Process.stdin.on (' Data ',function(text) {process.stdin.end ();//Exit input StatusCrawl (Text.trim ());//Trim () is a must!     }); }//Call the GetInput function and the program startsGetInput ();

node. js abaike Image Bulk download node. JS Crawler version 1.00

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.