golang初學之goroutine---web爬蟲

來源:互聯網
上載者:User
這是一個建立於 的文章,其中的資訊可能已經有所發展或是發生改變。

go tour 練習 https://tour.go-zh.org/concurrency/10

package mainimport ("fmt""sync""time")type Fetcher interface {// Fetch 返回 URL 的 body 內容,並且將在這個頁面上找到的 URL 放到一個 slice 中。Fetch(url string) (body string, urls []string, err error)}// SafeCounter 的並發使用是安全的。type SafeUrlMap struct {v   map[string]boolmux sync.Mutex}func (c *SafeUrlMap) Put(key string) {c.mux.Lock()c.v[key] = truec.mux.Unlock()}func (c *SafeUrlMap) Contains(key string) bool {c.mux.Lock()defer c.mux.Unlock()_, ok := c.v[key]return ok}type Resp struct {url stringbody string}var urlMap *SafeUrlMap = &SafeUrlMap{v: make(map[string]bool)}// Crawl 使用 fetcher 從某個 URL 開始遞迴的爬取頁面,直到達到最大深度。func Crawl(url string, depth int, fetcher Fetcher, ch chan Resp) {if depth <= 0 {return}urlMap.Put(url)body, urls, err := fetcher.Fetch(url)if err != nil {fmt.Println(err)return}ch <- Resp{url:url, body:body}for _, u := range urls {if urlMap.Contains(u) {fmt.Printf("Have Processed: %s\n", u)continue}go Crawl(u, depth-1, fetcher, ch)}return}func main() {ch := make(chan Resp)go Crawl("http://golang.org/", 4, fetcher, ch)boom := time.After(3 * time.Second)for {select {case r := <-ch:fmt.Printf("found: %s %q\n", r.url, r.body)boom = time.After(3 * time.Second)case <-boom:fmt.Printf("time out\n")return}}}// fakeFetcher 是返回若干結果的 Fetcher。type fakeFetcher map[string]*fakeResulttype fakeResult struct {body stringurls []string}func (f fakeFetcher) Fetch(url string) (string, []string, error) {if res, ok := f[url]; ok {return res.body, res.urls, nil}return "", nil, fmt.Errorf("not found: %s", url)}// fetcher 是填充後的 fakeFetcher。var fetcher = fakeFetcher{"http://golang.org/": &fakeResult{"The Go Programming Language",[]string{"http://golang.org/pkg/","http://golang.org/cmd/",},},"http://golang.org/pkg/": &fakeResult{"Packages",[]string{"http://golang.org/","http://golang.org/cmd/","http://golang.org/pkg/fmt/","http://golang.org/pkg/os/",},},"http://golang.org/pkg/fmt/": &fakeResult{"Package fmt",[]string{"http://golang.org/","http://golang.org/pkg/",},},"http://golang.org/pkg/os/": &fakeResult{"Package os",[]string{"http://golang.org/","http://golang.org/pkg/",},},}

  

相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.