A tour of goexercise: Web Crawler
In this exercise you'll use go's concurrency features to parallelize a web crawler.
ModifyCrawl
Function to fetch URLs in parallel without fetching the same URL twice.
Package mainimport ("FMT") type fetcher interface {// fetch returns the body of URL and // a slice of URLs found on that page. fetch (URL string) (body string, URLs [] string, err error)} // crawl uses fetcher to recursively crawl // pages starting with URL, to a maximum of depth. func crawl (URL string, depth int, Fetcher fetcher, out Chan string, mutuex Chan map [String] bool, end Chan bool) {// todo: Fetch URLs in parallel. // todo: Don't fetch the same URL twice. // This implementation doesn't do either: If depth <= 0 {end <-truereturn} body, URLs, err: = Fetcher. fetch (URL) If Err! = Nil {out <-err. error () end <-truereturn} FMT. printf ("found: % S % Q \ n", URL, body) visited: = <-mutuexsubend: = make (Chan bool) I: = 0for _, u: = range URLs {If! Visited [u] {visited [u] = truei ++ go crawl (u, depth-1, Fetcher, out, mutuex, subend) }}mutuex <-visitedfor; I = 0; I -- {<-subend} end <-truereturn} func main () {out: = make (Chan string) mutuex: = make (Chan map [String] bool) visited: = make (Map [String] bool) end: = make (Chan bool) visited ["http://golang.org/"] = truego crawl ("http://golang.org/", 4, Fetcher, out, mutuex, end) mutuex <-visitedfor {select {Case T: = <-out: FMT. println (t) Case <-end: Return }}// fakefetcher is fetcher that returns canned results. type fakefetcher map [String] * fakeresulttype fakeresult struct {body stringurls [] string} func (F * fakefetcher) Fetch (URL string) (string, [] string, error) {If res, OK: = (* f) [url]; OK {return res. body, Res. URLs, nil} return "", nil, FMT. errorf ("not found: % s", URL)} // fetcher is a populated fakefetcher. vaR fetcher = & fakefetcher {"http://golang.org/": & fakeresult {"The go programming language", [] string {"http://golang.org/pkg/", "http://golang.org/cmd ",},}, "http://golang.org/pkg/": & fakeresult {"packages", [] string {"http://golang.org/", "http://golang.org/cmd/", "http://golang.org/pkg/fmt/", "http://golang.org/pkg/ OS/" ,},}, "http://golang.org/pkg/fmt ": & fakeresult {"package FMT", [] string {"http://golang.org/", "http://golang.org/pkg/", },}, "http://golang.org/pkg/ OS/": & fakeresult {"package OS ", [] string {"http://golang.org/", "http://golang.org/pkg ",},},}