go實現抓妹子圖片

來源:互聯網
上載者:User
這是一個建立於 的文章,其中的資訊可能已經有所發展或是發生改變。

1:學習練手用的
2:我也是看到http://www.oschina.net/code/snippet_2362573_48180這篇文章後發現妹子圖這個網站結構比較簡單,實現起來easy
3:代碼還有很多最佳化的地方,怕老闆看見只能回家搞了
4:歡迎指正

直接上代碼

package mainimport ("bytes""fmt""io/ioutil""net/http""os""regexp""strconv")const (PATH string = "~/meizi" //檔案儲存體路徑STARTURL string = "http://www.mzitu.com/model" //妹子圖模組列表頁urlCONCURRENCY int = 3 //並發下載數)var (c1 chan string //通道:負責STARTURL,以後可以做成從命令參數裡讀取哦c2 chan string //通道: 負責傳輸所有的模組urlc3 chan []string //通道:負責傳輸imgUrlc4 chan int //通道: 負責傳輸每張圖片的下載完成狀態c5 chan int //通道:負責傳輸當前下載數)func init() {c1 = make(chan string, 1)c2 = make(chan string, 100)c3 = make(chan []string, 1000)c4 = make(chan int, 3)c5 = make(chan int, 10)go CgetList()go Cdownload()}func main() {c1 <- STARTURLgo CgetModel()num := 0for count := range c5 {num = num + countfmt.Println("已下載:", num)}}//調度器, 拉取所有模組func CgetModel() {modelPage := getPage(<-c1)for i := 1; i <= modelPage; i++ {modelUrl := STARTURL + "/page/" + strconv.Itoa(i)c2 <- modelUrl}}//調度器拉取所有圖片url,這裡其實還可以多分一層func CgetList() {k := 0tmp := make([]string, 3)for modelUrl := range c2 {imgLists := getList(modelUrl)for _, imgList := range imgLists {imgPage := getPage(imgList)for j := 1; j <= imgPage; j++ {imgUrl := imgList + "/" + strconv.Itoa(j)if k < CONCURRENCY {tmp[k] = imgUrlk++} else {c3 <- tmpk = 0}}}if k != 0 {c3 <- tmpk = 0}}}//調度器, 下載圖片func Cdownload() {for imgUrls := range c3 {if len(imgUrls) > 0 {for _, imgUrl := range imgUrls {go func() {download(imgUrl)c4 <- 1}()}num := 0for k := range c4 {num = num + kif num == len(imgUrls) {c5 <- numbreak}}}}}//圖片列表func getList(url string) (l []string) {reg, _ := regexp.Compile(`<h2><a href="(http://www.mzitu.com/\d*)" title="(.*?)" target="_blank">.*?</a></h2>`)_, html, _ := getHtml(url)lists := reg.FindAllStringSubmatch(html, 1000)for _, list := range lists {l = append(l, list[1])}return}//下載htmlfunc getHtml(url string) (error, string, error) {response, err := http.Get(url)defer response.Body.Close()html, err1 := ioutil.ReadAll(response.Body)return err, string(html), err1}//擷取最大分頁func getPage(url string) (page int) {_, html, _ := getHtml(url)reg, _ := regexp.Compile(`<span>(\d*)</span>`)s := reg.FindAllStringSubmatch(html, 200)if len(s) < 2 {fmt.Println("擷取失敗")os.Exit(-1)}page, _ = strconv.Atoi(s[len(s)-1][1])return}//下載圖片func download(url string) {reg, _ := regexp.Compile(`<p><a href="http:\/\/www.mzitu.com/.*?" ><img src="(.*?)" alt="(.*?)" /></a></p>`)reg1, _ := regexp.Compile(`http:\/\/pic\.dofay\.com/(.*)`)_, html, _ := getHtml(url)iterms := reg.FindAllStringSubmatch(html, 100)for _, iterm := range iterms {imgUrl := iterm[1]imgPath := reg1.FindAllStringSubmatch(imgUrl, 100)imgPaths := bytes.Split([]byte(imgPath[0][1]), []byte("/"))path := PATH + "/" // + iterm[2]imgResponse, _ := http.Get(imgUrl)defer imgResponse.Body.Close()imgByte, _ := ioutil.ReadAll(imgResponse.Body)pInfo, pErr := os.Stat(path)if pErr != nil || pInfo.IsDir() == false {errDir := os.Mkdir(path, os.ModePerm)if errDir != nil {fmt.Println(errDir)os.Exit(-1)}}fn := path + "/" + string(imgPaths[len(imgPaths)-1])_, fErr := os.Stat(fn)var fh *os.Fileif fErr != nil {fh, _ = os.Create(fn)} else {fh, _ = os.Open(fn)}defer fh.Close()fh.Write(imgByte)}}
相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.