Probably, otaku or old drivers will like this tool in some way. In fact, the tool is actually written in response to the needs of older drivers, but it is relatively simple and may not be improved or enhanced in the short term (for example, bypassing the anti-theft chain processing).
Full reference command line: miniimagecrawler-numcrawlgoroutine=5-baseinterval=2-randominterval=5-tickerinterval=10-savepath= ""- imgwidthmin=500-imgheightmin=500 http://eladies.sina.com.cn/
or Direct: Miniimagecrawler http://eladies.sina.com.cn/
You can then wait for the program to fetch all (eligible) images from the specified website locally.
Package Mainimport ("Imagecrawler" "Flag" "FMT" "Helperutils" "image" _ "Image/jpeg" _ "image/png" "Log" "Math/rand" net/ url "os" "Runtime" "Sync/atomic" "Time") var numcrawlgoroutine intvar baseinterval, Randominterval intvar tickerinterval Intvar Savepath stringvar imgwidthmin, imgheightmin intvar urlhost stringfunc init () {rand. Seed (time. Now (). Unixnano ())}func Parseflag () {flag. Intvar (&numcrawlgoroutine, "Numcrawlgoroutine", 5, "Maximum number of fetch threads") flag. Intvar (&baseinterval, "Baseinterval", 2, "minimum crawl interval") flag. Intvar (&randominterval, "Randominterval", 5, "Crawl random Interval") flag. Intvar (&tickerinterval, "Tickerinterval", "Goroutine number reporting interval (unit: s)") flag. Stringvar (&savepath, "Savepath", "" "," Picture Save directory (default to program directory) ") flag. Intvar (&imgwidthmin, "Imgwidthmin", 0, "minimum picture width") flag. Intvar (&imgheightmin, "Imgheightmin", 0, "min picture height") flag. Parse () If Len (flag. Args ()) = = 0 {Panic ("Please specify the starting crawl Web address! ")} else {u, err: = URL. Parse (flag. Args () [0]) if err! = Nil {panic (err)}urlhost = U.host}if Numcrawlgoroutine < 1 {panic("Please set the maximum number of fetch threads not less than 1!") ")}if Baseinterval < 1 {Panic (" please set the minimum crawl interval of not less than 1! ") ")}if Randominterval < 2 {Panic (" Please set the legal crawl random interval! ")}if Tickerinterval < 5 {Panic (" Please set the report interval of not less than 5! ") ")}if Savepath = =" "{Savepath = helperutils. Getapppath () + Urlhost + ' if!helperutils. DirectoryExists (Savepath) {if err: = OS. Mkdir (Savepath, OS. Modeperm); Err! = Nil {panic (FMT. Sprintf ("Can not make dir:%s", Savepath)}}} else {if!helperutils. DirectoryExists (Savepath) {panic ("Illegal picture save directory settings! ")}savepath = Helperutils. Includetrailingbackslash (Savepath)}if Imgwidthmin < 0 {Panic ("please set the minimum picture width of not less than 0!") ")}if Imgheightmin < 0 {Panic (" please set the minimum picture height of not less than 0! ") ")}}func parsepage (URL, homepage string, Numchan chan<-string) []string {ret, err: = Imagecrawler.parsepage (URL, HOMEP Age, Savepath, Numchan) if err! = Nil {return nil}time. Sleep (time. Duration (Rand. INTN (randominterval) +baseinterval) * time. Second) return Ret}func checkimagesize (FileName string, minWidth, MinHeight int) bool {file, err: = OS. Open (fileName) if err! = Nil {Return false}img, _, Err: = image. Decode (file) if err! = Nil {file. Close () OS. Remove (FileName) return FALSE}PT: = img. Bounds (). Size () if Pt. X < MinWidth | | Pt. Y < minheight {file. Close () OS. Remove (FileName) return false}file. Close () return True}func main () {Parseflag () var imgnum, smallnum Int64namechan: = Make (Chan string) go func () {for s: = Rang E Namechan {imgnum + = 1go func (imgname string) {if! Checkimagesize (Imgname, Imgwidthmin, imgheightmin) {atomic. AddInt64 (&smallnum, 1)}} (s)}} () Worklist: = Make (chan []string) Pendingnum: = 1go func () {worklist <-[]string{flag . Args () [0]}} () Ticker: = time. Newticker (time. Duration (tickerinterval) * time. Second) go func () {for range ticker. C {log. Printf ("Num of Goroutines:%d\n", runtime.) Numgoroutine ())}} () Tokens: = Make (chan struct{}, Numcrawlgoroutine) Seenurls: = Make (Map[string]bool) log. Println ("Picture grab started ...") Timebegin: = time. Now () for; Pendingnum > 0; pendingnum--{list: = <-worklistfor _, Link: = Range list {if!seenurls[link] {SeenurlS[link] = truependingnum++go func (url string) {tokens <-struct{}{}defer func () {<-tokens} () worklist <-Parsepag E (URL, urlhost, Namechan)} (link)}}}log. Printf ("Picture fetch ends.") Duration:%s\n ", time. Since (Timebegin). String ()) log. PRINTLN ("Finishing stats ...") Close (Namechan) ticker. Stop () time. Sleep (Time.millisecond *) Invalidnum: = Atomic. LoadInt64 (&smallnum) log. PRINTF ("Fetch total: Total images%d, small images%d, valid pictures%d\n", Imgnum, Invalidnum, imgnum-invalidnum) log. Println ("the End.")}
Package Imagecrawlerimport ("FMT" "io" "net/http" "OS" "Path/filepath" "Strings" "golang.org/x/net/html") func parsepage (URL, homepage, savepath string, Namechan chan<-string) ([]string, error) {RESP, err: = http. Get (URL) if err! = Nil {return nil, Err}defer resp. Body.close () If Resp. StatusCode! = http. Statusok {return nil, fmt. Errorf ("Getting%s:%s", URL, resp. Status)}doc, err: = HTML. Parse (resp. Body) If err! = Nil {return nil, fmt. Errorf ("parsing%s as HTML:%v", url, err)}var links []stringvisitnode: = Func (n *html. Node) {if N.type = = html. Elementnode && N.data = = "A" {for _, A: = Range n.attr {if a.key! = "href" {continue}link, err: = resp. Request.URL.Parse (a.val) if err! = Nil {continue}addr: = link. String () if strings. Hassuffix (addr, ". jpg") | | Strings. Hassuffix (addr, ". jpeg") | | Strings. Hassuffix (addr, ". png") {downloadimage (addr, Savepath, Namechan)} else {if strings. Contains (addr, homepage) {links = append (links, addr)}}}} else if N.type = = html. Elementnode && N.data= = "img" {for _, A: = Range n.attr {if a.key! = "src" {continue}link, err: = resp. Request.URL.Parse (a.val) if err! = Nil {continue}addr: = link. String () if strings. Hassuffix (addr, ". jpg") | | Strings. Hassuffix (addr, ". jpeg") | | Strings. Hassuffix (addr, ". png") {downloadimage (addr, Savepath, Namechan)}}}}foreachnode (Doc, Visitnode, nil) return links, nil} Func downloadimage (addr, savepath string, Namechan chan<-string) {resp, err: = http. Get (addr) If err! = nil {Return}defer resp. Body.close () If Resp. StatusCode! = http. Statusok {return}filename: = Savepath + filepath. Base (addr) DST, err: = OS. Create (fileName) if err! = Nil {Return}io. Copy (DST, resp. Body) DST. Close () Namechan <-filename}func foreachnode (n *html. Node, Pre, post func (n *html. Node) {if pre! = nil {pre (n)}for c: = N.firstchild; c! = nil; c = c.nextsibling {Foreachnode (c, pre, POST)}if post! = Nil {post (n)}}
The executable file download link is here.
The simple version picture spider