Var node = { Async: require ('async '), Cheerio: require ('cheerio '), Fs: require ('fs '), Mkdirp: require ('mkdirp '), Path: require ('path '), Request: require ('request '), Url: require ('url '), Xml2js: require ('xml2js '), }; Var Me2SexImages = { /** * Configuration options */ Options :{ // Website map address Sitemap: 'http: // sexy.faceks.com/sitemap.xml ', // Save to this folder SaveTo: '/Users/Fay/Pictures/me2sex ', // Maximum number of parallel image downloads DownLimit: 5, }, Posts: [], /** * Start download (program entry function) */ Start: function (){ Var self = this; Var async = node. async; Async. waterfall ([ Self. wrapTask (self. sitemapXML ), Self. wrapTask (self. sitemapJSON ), Self. wrapTask (self. downAllImages ), ], Function (err, result ){ If (err ){ Console. log ('error: % s', err. message ); } Else { Console. log ('success: Download succeeded '); } }); }, /** * Package the task to ensure that the context of the original task points to a specific object * @ Param {Function} The task Function that meets the asycs. js call Method * @ Param {Any} context * @ Param {Array} exArgs additional parameters * @ Return {Function} refers to the task Function that meets the asycs. js call method. */ WrapTask: function (task, context, exArgs ){ Var self = this; Return function (){ Var args = []. slice. call (arguments ); Args = exArgs? ExArgs. concat (args): args; Task. apply (context | self, args ); }; }, /** * Retrieve site sitemap. xml */ SitemapXML: function (callback ){ Console. log ('start downloading sitemap. xml '); Node. request (this. options. sitemap, function (err, res, body ){ If (! Err) console. log ('Download sitemap. xml successfully '); Callback (err, body ); }); }, /** * Convert sitemap. xml to json */ SitemapJSON: function (sitemapXML, callback ){ Var self = this; Console. log ('start parsing sitemap. xml '); Node. xml2js. parseString (sitemapXML, {explicitArray: false}, function (err, json ){ If (! Err ){ Self. posts = json. urlset. url; Self. posts. shift (); Console. log ('Resolution of sitemap. xml succeeded, % d pages in total ', self. posts. length ); } Callback (err, self. posts ); }); }, /** * Download the whole site image */ DownAllImages: function (callback ){ Var self = this; Var async = node. async; Console. log ('start batch download '); Async. eachSeries (self. posts, self. wrapTask (self. downPostImages), callback ); }, /** * Download a single post Image * @ Param {Object} post article */ DownPostImages: function (post, callback ){ Var self = this; Var async = node. async; Async. waterfall ([ Self. wrapTask (self. mkdir, self, [post]), Self. wrapTask (self. getPost ), Self. wrapTask (self. parsePost ), Self. wrapTask (self. downImages ), ], Callback ); }, Mkdir: function (post, callback ){ Var path = node. path; Var url = node. url. parse (post. loc ); Post. dir = path. join (this. options. saveTo, path. basename (url. pathname )); Console. log ('Prepare to create directory: % s', post. dir ); If (node. fs. existsSync (post. dir )){ Callback (null, post ); Console. log ('Directory: % s already exists ', post. dir ); Return; } Node. mkdirp (post. dir, function (err ){ Callback (err, post ); Console. log ('Directory: % s created successfully ', post. dir ); }); }, /** * Get post content */ GetPost: function (post, callback ){ Console. log ('start request page: % s', post. loc ); Node. request (post. loc, function (err, res, body ){ If (! Err) post.html = body; Callback (err, post ); Console. log ('request page succeeded: % s', post. loc ); }); }, /** * Parse the post and obtain the image list in the post. */ ParsePost: function (post, callback ){ Var $ = post. $ = node.cheerio.load(post.html ); Post. images = $ ('. img ') . Map (function () {return $ (this). attr ('bigimgsrc ');}) . ToArray (); Callback (null, post ); }, /** * Download images from the post image list */ DownImages: function (post, callback ){ Console. log ('found % d sister pictures, ready to download... ', post. images. length ); Node. async. eachLimit ( Post. images, This. options. downLimit, This. wrapTask (this. downImage, this, [post]), Callback ); }, /** * Download a single image */ DownImage: function (post, imgsrc, callback ){ Var url = node. url. parse (imgsrc ); Var fileName = node. path. basename (url. pathname ); Var toPath = node. path. join (post. dir, fileName ); Console. log ('start downloading image: % s, save to: % s, file name: % s', imgsrc, post. dir, fileName ); Node. request (imgsrc) . Pipe (node. fs. createWriteStream (toPath )) . On ('close', function (){ Console. log ('image download succeeded: % s', imgsrc ); Callback (); }) . On ('error', callback ); } }; Me2SexImages. start (); |