Image sites are often numerous ads, using node. js to write a crawler download pictures, code is not long, save a lot more, than a manual a sheet of storage is the difference between heaven and land.
The following code is a download from the mtl.ttsqgs.com image of the program, the image address is to see the source of the Web site, a total of how many can be found in the source code, and then retrace.
//built-in HTTP module that provides HTTP server and client functionalityvarHttp=require ("http");//cheerio module with jquery-like functionalityvarCheerio = require ("Cheerio");//built-in file processing modulevarFs=require (' FS ');//Request parameter JSONvaroptions;//request and obtain datavarreq;//Store Page NumbersvarIndex=1;functiondownloadimg (pagenumber) {Console.log ("Start reading page" +pagenumber+ "); //http://mtl.ttsqgs.com/images/img/7957/4.jpg//Actual picture addressoptions={hostname:' Mtl.ttsqgs.com ',//do not add HTTP.//, otherwise there will be Enotfound errorsPort:80, Path:'/images/img/7957/' +pagenumber+ '. jpg ',//Sub-PathMethod: ' GET ', }; Req=http.request (Options,function(resp) {varImgdata = ""; Resp.setencoding ("Binary"); Resp.on (' Data ',function(chunk) {Imgdata+=Chunk; }); Resp.on (' End ',function() {Fs.writefile ("./imgs/" +pagenumber+ ". jpg", imgdata, "binary",function(err) {if(Err) {Console.log ("File download failed."); } console.log ("Download Succeeded"); }); }); }); //Timeout ProcessingReq.settimeout (5000,function() {req.abort (); }); //Error HandlingReq.on (' Error ',function(err) {if(err.code== "Econnreset") {Console.log (' Socket port connection timed out. ‘); }Else{Console.log (' Request error occurred, Err.code: ' +Err.code); } }); //End of RequestReq.end (); //49 Page Finish if(index<49) {Index++; Console.log (' Continue to page ' +index+ '); Start (index); }}//wrapping a layer of functionsfunctionStart (i) {downloadimg (i);}//Start TraversalStart (index);
September 30, 2017 10:43:27
node. JS uses crawlers to bulk download network images to local