標籤:注意 adt name lse read bubuko AC filename 重點
練習5.13: 修改crawl,使其能儲存發現的頁面,必要時,可以建立目錄來儲存這些頁面。只儲存來自原始網域名稱下的頁面。假設初始頁面在golang.org下,就不
要儲存vimeo.com下的頁面。
package mainimport ( "fmt" "io" "io/ioutil" "links" "log" "net/http" "net/url" "os")/*練習5.13: 修改crawl,使其能儲存發現的頁面,必要時,可以建立目錄來儲存這些頁面。只儲存來自原始網域名稱下的頁面。假設初始頁面在golang.org下,就不要儲存vimeo.com下的頁面。*/var sum intfunc main() { breadthFirst(crawl, os.Args[1:])}/*抓取頁面的所有串連*/func crawl(url string) []string { sum++ go save(url) fmt.Printf("%d|%s\n", sum, url) list, err := links.Extract(url) if err != nil { log.Print(err) } return list}/*儲存頁面到檔案*/func save(u string) bool { urlObj, _ := url.Parse(u) path := "/tmp/crawl/" + urlObj.Host if urlObj.Path == "" || urlObj.Path == "/" { urlObj.Path = "/index.html" } filename := path + urlObj.Path //重點注意檔案名稱 fmt.Println(filename) //開啟檔案 f, _ := os.OpenFile(filename, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0755) //讀取連結 resp, geterr := http.Get(u) if geterr != nil || resp.StatusCode != http.StatusOK { //resp.Body.Close() return false } body, _ := ioutil.ReadAll(resp.Body) //fmt.Println(body) //建立儲存目錄 _, err := os.Stat(path) if err != nil { os.MkdirAll(path, 0755) } io.WriteString(f, string(body)) resp.Body.Close() body = nil return true}/*廣度優先演算法*/// breadthFirst calls f for each item in the worklist.// Any items returned by f are added to the worklist.// f is called at most once for each item.func breadthFirst(f func(item string) []string, worklist []string) { seen := make(map[string]bool) for len(worklist) > 0 { items := worklist worklist = nil for _, item := range items { if !seen[item] { seen[item] = true worklist = append(worklist, f(item)...) } } }}
[日常] Go語言聖經-匿名函數習題2