//======================================================//https://www.meitulu.com Image Bulk download node. JS Crawler 1.03//1.01 manually entering pages and directories//1.02 manually enter the page URL and then automatically parse//1.02 get the page URL from the command line and parse it automatically//November 6, 2017//======================================================//built-in HTTPS module provides HTTPS server and client functionalityvarHttps=require ("https");//built-in HTTP module that provides HTTP server and client functionalityvarHttp=require ("http");varZlib = require (' zlib ')); //cheerio module with jquery-like functionalityvarCheerio = require ("Cheerio");//built-in file processing modulevarFs=require (' FS ');//Request parameter JSONvaroptions;//Request Requestsvarreq;//--------------------------------------//Program Entry//--------------------------------------functionGetInput () {Console.log ("Please enter the page URL:"); Process.stdin.resume (); Process.stdin.setEncoding (' UTF8 '); Process.stdin.on (' Data ',function(text) {//Trim () is a must! Start (Text.trim ()); });}//--------------------------------------//Start Download//--------------------------------------functionStart (pageurl) {Console.log ("Page url=" +pageurl); varHostname= ""; varPath= ""; varArr=pageurl.split ("/"); HostName=arr[2]; Path= "/" +arr[3]+ "/" +arr[4]; //Initialize Optionsoptions={hostname:hostname, port:443, Path:path,//Sub-PathMethod: ' GET ', Agent:false, gzip:true, }; Req=https.request (Options,function(resp) {varHTML = []; Resp.on ("Data",function(data) {Html.push (data); }) Resp.on ("End",function() { varBuffer =Buffer.concat (HTML); Zlib.gunzip (Buffer,function(err, decoded) {//Console.log (decoded.tostring ());//gzip-Extracted HTML text varbody=decoded.tostring (); var$ =Cheerio.load (body); varEndindex=0; varFolder= ""; //Find all the p nodes below the node of class c_l$ (". c_l p"). each (function(index,element) {if(index==2){ vartext=$ (Element). text (); //Console.log (text);//number of pictures: 112 varArr=text.split (""); EndIndex=ARR[1];// the } }) //Find all the p nodes below the node of class c_l$ (". Content center img"). each (function(index,element) {if(index==0){ vartext=$ (Element). attr ("src"); //Console.log (text);//Http://mtl.ttsqgs.com/images/img/44/1.jpg " varArr=text.split ("/"); Folder=arr[arr.length-2]; }}) Console.log (EndIndex); Console.log (folder); Fs.mkdir ('./' +folder,function(err) {if(Err) {Console.log ("Directory" +folder+ "already exists"); } }); //Download Image for(vari=1;i<=endindex;i++) {downloadpic (folder,i); })}). On ("Error",function() {Console.log ("Get Failed") }) }); //Timeout ProcessingReq.settimeout (5000,function() {req.abort (); }); //Error HandlingReq.on (' Error ',function(err) {if(err.code== "Econnreset") {Console.log (' Socket port connection timed out. ‘); }Else{Console.log (' Request error occurred, Err.code: ' +Err.code); } }); //End of Requestreq.end ();}//--------------------------------------//Download Image//folder: The directory of the URL where the picture is located//Pinctureindex: Picture number//--------------------------------------functionDownloadpic (folder,pinctureindex) {Console.log ("Start Download" +Pinctureindex); //Initialize Optionsoptions={hostname:' Mtl.ttsqgs.com ',//do not add HTTP.//, otherwise there will be Enotfound errorsPort:80, Path:'/images/img/' +folder+ '/' +pinctureindex+ '. jpg ',//Sub-PathMethod: ' GET ', }; Req=http.request (Options,function(resp) {varImgdata = ""; Resp.setencoding ("Binary"); Resp.on (' Data ',function(chunk) {Imgdata+=Chunk; }); Resp.on (' End ',function(){ varFilename= "./" +folder+ "/" +pinctureindex+ ". jpg"; Fs.writefile (FileName, Imgdata,"Binary",function(err) {if(Err) {Console.log ("File" +filename+ "Download failed."); } console.log (FileName+ "Download succeeded"); }); }); }); //Timeout ProcessingReq.settimeout (5000,function() {req.abort (); }); //Error HandlingReq.on (' Error ',function(err) {if(err.code== "Econnreset") {Console.log (' Socket port connection timed out. ‘); }Else{Console.log (' Request error occurred, Err.code: ' +Err.code); } }); //End of Requestreq.end ();}//Call the GetInput function and the program startsGetInput ();
node. js meitulu Image bulk Download crawler version 1.03