A Tour of GoExercise: Web Crawler
In this exercise you'll use Go's concurrency features to parallelize a web crawler.
Modify the Crawl
function to fetch URLs in parallel without fetching the same URL twice.
package mainimport ("fmt")type Fetcher interface {// Fetch returns the body of URL and// a slice of URLs found on that page.Fetch(url string) (body string, urls []string, err error)}// Crawl uses fetcher to recursively crawl// pages starting with url, to a maximum of depth.func Crawl(url string, depth int, fetcher Fetcher, out chan string, mutuex chan map[string]bool, end chan bool) {// TODO: Fetch URLs in parallel.// TODO: Don't fetch the same URL twice.// This implementation doesn't do either:if depth <= 0 {end <- truereturn}body, urls, err := fetcher.Fetch(url)if err != nil {out <- err.Error()end <- truereturn}fmt.Printf("found: %s %q\n", url, body)visited := <- mutuexsubEnd := make(chan bool)i := 0for _, u := range urls {if !visited[u] {visited[u] = truei ++go Crawl(u, depth-1, fetcher, out, mutuex, subEnd)}}mutuex <- visitedfor ; i == 0 ; i-- {<- subEnd}end <- truereturn}func main() {out := make(chan string)mutuex := make(chan map[string]bool) visited := make(map[string]bool)end := make(chan bool)visited["http://golang.org/"] = truego Crawl("http://golang.org/", 4, fetcher, out, mutuex, end)mutuex <- visitedfor {select {case t:= <- out:fmt.Println(t)case <- end:return}}}// fakeFetcher is Fetcher that returns canned results.type fakeFetcher map[string]*fakeResulttype fakeResult struct {body stringurls []string}func (f *fakeFetcher) Fetch(url string) (string, []string, error) {if res, ok := (*f)[url]; ok {return res.body, res.urls, nil}return "", nil, fmt.Errorf("not found: %s", url)}// fetcher is a populated fakeFetcher.var fetcher = &fakeFetcher{"http://golang.org/": &fakeResult{"The Go Programming Language",[]string{"http://golang.org/pkg/","http://golang.org/cmd/",},},"http://golang.org/pkg/": &fakeResult{"Packages",[]string{"http://golang.org/","http://golang.org/cmd/","http://golang.org/pkg/fmt/","http://golang.org/pkg/os/",},},"http://golang.org/pkg/fmt/": &fakeResult{"Package fmt",[]string{"http://golang.org/","http://golang.org/pkg/",},},"http://golang.org/pkg/os/": &fakeResult{"Package os",[]string{"http://golang.org/","http://golang.org/pkg/",},},}