Simulate the http://www.oschina.net/code/snippet_226106_23414 of this python Program
I just used node. js to write it again and practice it.
VaR baidu_base_url = 'HTTP: // tieba.baidu.com '; var HTTP = require ('http'); var url = require ('url'). parse (baidu_base_url +'/F? Ie = UTF-8 & kW = % E5 % A7 % 90% E8 % 84% B1 '); var iconv = require ('iconv-lite '); vaR bufferhelper = require ('bufferhelper '); var $ = require ('jquery'); VaR _ = require ('underscore '); var saveroot = "O: \ Baidu \ "; // save the file's root directory var FS = require ('fs'); string. prototype. replaceall = function (S1, S2) {var demo = thiswhile (demo. indexof (S1 )! =-1) demo = demo. replace (S1, S2); Return demo;} function htmlbufferget (URL, callback) {HTTP. get (URL, function (RES) {var bufferhelper = new bufferhelper (); Res. on ('data', function (chunk) {bufferhelper. concat (chunk) ;}); Res. on ('end', function () {callback (bufferhelper. tobuffer () ;};}) ;}// obtain the page function htmlget (URL, callback) {HTTP. get (URL, function (RES) {var bufferhelper = new bufferhelper (); Res. on ('data', Function (chunk) {bufferhelper. concat (chunk) ;}); Res. on ('end', function () {var html = iconv. decode (bufferhelper. tobuffer (), 'gbk'); callback ($ (HTML) ;}) ;}// obtain the image address in the post function gettieziimg (URL, fold) {htmlget (URL, function (HTML) {HTML. find ('img. bde_image '). each (function (I, El) {var $ IMG = $ (EL); var imgurl = $ IMG. ATTR ('src'); htmlbufferget (imgurl, function (buffer) {var filename = fold + "/" + I +". PNG "; FS. writefile (filename, buffer, 'binary ', function (ERR) {If (ERR) Throw err; console. log (filename + 'file saved. ');} // main method htmlget (URL, function (HTML) {HTML. find ('a. j_th_tit '). each (function (I, El) {var $ A = $ (EL); var tieziurl = baidu_base_url + $. ATTR ("href"); var foldid = $. ATTR ("href "). tostring (). replaceall ("/P/", ""); var Title = paia.html (); // special character Title = title in the directory. replaceall ("/", ""); Title = title. replaceall ("? "," "); Title = title. replaceall (",", ""); Title = title. replaceall (":", ""); // use the closure to simulate multiple threads in the for (function (tieziurl, title, foldid ){_. delay (function () {var fold = saveroot + title; var exists = FS. existssync (fold); console. log (fold); If (! Exists) {console. log ("directory does not exist" + fold); FS. mkdirsync (fold);} gettieziimg (tieziurl, fold) ;}, 10) ;}( tieziurl, title, foldid );});});
1) use node-jquery to support VC ++ 2010. Therefore, when downloading the NPM package, check that there is VC ++ 2010 (vcbuild. bat is used. Do not use netframework4.0, it is useless)
2) modify the Save directory