這是一個建立於 的文章,其中的資訊可能已經有所發展或是發生改變。
通過xpath
未加入線程,channel機制,自行加一下即可
使用sqlite
package mainimport ( "database/sql" "fmt" "io/ioutil" "log" "net/http" "os" "strings" _ "github.com/mattn/go-sqlite3" "github.com/moovweb/gokogiri" "github.com/moovweb/gokogiri/xpath")type Doub struct { id int name string url string info string}func main() { // 擷取並讀取網頁 resp, _ := http.Get("http://movie.douban.com/tv/") page, _ := ioutil.ReadAll(resp.Body) // 解析web doc, err := gokogiri.ParseHtml(page) if err != nil { fmt.Println("Parsing err") return } // 提供xpath xps1 := xpath.Compile("//div/table/tr[@class='item']/td[2]/div[@class='pl2']/a") // 按xpath搜尋 ss1, err := doc.Root().Search(xps1) // sqlite 3 os.Remove("./foo.db") db, err := sql.Open("sqlite3", "./foo.db") if err != nil { log.Fatal(err) } defer db.Close() // 建表 sql := `create table douban(id integer primary key autoincrement , name text, url text, info text);` db.Exec(sql) tx, err := db.Begin() if err != nil { log.Fatal(err) } // 插入準備 stmt, err := tx.Prepare("insert into douban(id, name, url, info) values(?,?,?,?);") if err != nil { log.Fatal(err) } defer stmt.Close() // 執行插入 for _, s1 := range ss1 { resp, _ := http.Get(s1.Attributes()["href"].String()) page, _ := ioutil.ReadAll(resp.Body) doc, _ := gokogiri.ParseHtml(page) xps2 := xpath.Compile("//div[@id='info']") ss2, _ := doc.Root().Search(xps2) for _, s2 := range ss2 { _, err = stmt.Exec(nil, strings.Split(s1.Content(), "/")[0], s1.Attributes()["href"].String(), s2.Content()) if err != nil { log.Fatal(err) } } } tx.Commit() // 查詢 var douban []Doub = make([]Doub, 0) rows, err := db.Query("select id, name, url, info from douban") if err != nil { log.Fatal(err) } defer rows.Close() for rows.Next() { var doub Doub rows.Scan(&doub.id, &doub.name, &doub.url, &doub.info) douban = append(douban, doub) } fmt.Println(douban) rows.Close() defer doc.Free()}
使用mysql
package mainimport ( "database/sql" "fmt" "io/ioutil" "log" "net/http" "strings" _ "github.com/go-sql-driver/mysql" "github.com/moovweb/gokogiri" "github.com/moovweb/gokogiri/xpath")type Doub struct { id int name string url string info string}func main() { // 擷取並讀取網頁 resp, _ := http.Get("http://movie.douban.com/tv/") page, _ := ioutil.ReadAll(resp.Body) // 解析web doc, err := gokogiri.ParseHtml(page) if err != nil { fmt.Println("Parsing err") return } // 提供xpath xps1 := xpath.Compile("//div/table/tr[@class='item']/td[2]/div[@class='pl2']/a") // 按xpath搜尋 ss1, err := doc.Root().Search(xps1) // mysql db, err := sql.Open("mysql", "root:root@/test") if err != nil { log.Fatalf("Open database error: %s\n", err) } defer db.Close() tx, err := db.Begin() if err != nil { log.Fatal(err) } // 插入準備 stmt, err := db.Prepare("insert into douban(name, url, info) values(?,?,?);") defer stmt.Close() if err != nil { log.Println(err) return } // 執行插入 for _, s1 := range ss1 { resp, _ := http.Get(s1.Attributes()["href"].String()) page, _ := ioutil.ReadAll(resp.Body) doc, _ := gokogiri.ParseHtml(page) xps2 := xpath.Compile("//div[@id='info']") ss2, _ := doc.Root().Search(xps2) for _, s2 := range ss2 { _, err = stmt.Exec(strings.Split(s1.Content(), "/")[0], s1.Attributes()["href"].String(), s2.Content()) if err != nil { log.Fatal(err) } } } tx.Commit() // 查詢 var douban []Doub = make([]Doub, 0) rows, err := db.Query("select id, name, url, info from douban") if err != nil { log.Fatal(err) } defer rows.Close() for rows.Next() { var doub Doub rows.Scan(&doub.id, &doub.name, &doub.url, &doub.info) douban = append(douban, doub) } fmt.Println(douban) rows.Close() defer doc.Free()}