使用golang抓取京東商品資料

來源:互聯網
上載者:User
這是一個建立於 的文章,其中的資訊可能已經有所發展或是發生改變。
package mainimport ("encoding/json""fmt""io/ioutil""net/http""os"//"reflect""errors""regexp""strconv""strings"//"unicode/utf8""database/sql"_ "github.com/go-sql-driver/mysql")type JdProduct struct {skuid        stringname         stringskuidkey     stringhref         stringsrc          stringcat          []int64catName      []stringbrand        stringpType        stringvenderId     stringshopId       stringspecialAttrs stringprice        string}/*type JdPrice struct {id string `json: "id"`p  string `json: "p"`m  string `json: "m"`}*/const (offTheShelf = "SaleNo")var db *sql.DBvar productSkuidFetch = regexp.MustCompile(`[[:space:]]*skuid:[[:space:]]+(?P<skuid>[0-9]+),`)var productNameFetch = regexp.MustCompile(`[[:space:]]*name:[[:space:]]+\'(?P<name>.*)\',`)var productSkuidkeyFetch = regexp.MustCompile(`[[:space:]]*skuidkey:[[:space:]]*\'(?P<skuidkey>.*)\',`)var productHrefFetch = regexp.MustCompile(`[[:space:]]*href:[[:space:]]+\'(?P<href>.*)\',`)var productSrcFetch = regexp.MustCompile(`[[:space:]]*src:[[:space:]]+\'(?P<src>.*)\',`)var productCatFetch = regexp.MustCompile(`[[:space:]]*cat:[[:space:]]+\[(?P<cat>.*)\],`)var productCatNameFetch = regexp.MustCompile(`[[:space:]]*catName:[[:space:]]+\[(?P<catname>.*)\],`)var productBrandFetch = regexp.MustCompile(`[[:space:]]*brand:[[:space:]]+(?P<brand>[0-9]+),`)var productPTypeFetch = regexp.MustCompile(`[[:space:]]*pType:[[:space:]]+(?P<ptype>[0-9]+),`)var productVenderIdFetch = regexp.MustCompile(`[[:space:]]*venderId:[[:space:]]*(?P<venderId>[0-9]+),`)var productShopIdFetch = regexp.MustCompile(`[[:space:]]*shopId:[[:space:]]*\'(?P<shopId>[0-9]+)\',`)var productSpecialAttrsFetch = regexp.MustCompile(`[[:space:]]*specialAttrs:[[:space:]]*\[(?P<specialAttrs>.*)\],`)func connectDB() (db *sql.DB, err error) {db, err = sql.Open("mysql", "root:leeweop@/jd")if err != nil {panic(err.Error())return nil, errors.New("Connect to db failed")}return db, nil}func createDatabase(db *sql.DB, name string) error {_, err := db.Exec("CREATE DATABASE IF NOT EXISTS jd DEFAULT CHARSET utf8 COLLATE utf8_general_ci")if err != nil {panic(err.Error())return errors.New("Create database failed")}return nil}func createTable(db *sql.DB, name string) error {command := "CREATE TABLE IF NOT EXISTS " + name + " (skuid BIGINT(64) NOT NULL PRIMARY KEY, name VARCHAR(256) NOT NULL, skuidkey VARCHAR(64) NOT NULL, href VARCHAR(128) NOT NULL, src VARCHAR(128) NOT NULL, cat1 INT(32) NOT NULL, cat2 INT(32) NOT NULL, cat3 INT(32) NOT NULL, brand VARCHAR(128) NOT NULL, pType INT(32) NOT NULL, venderId VARCHAR(64) NOT NULL, shopId VARCHAR(64) NOT NULL, specialAttrs VARCHAR(256) NULL, price DOUBLE NOT NULL)"fmt.Println(command)_, err := db.Exec(command)if err != nil {panic(err.Error())return errors.New("Create table failed")}return nil}func insertIntoDB(db *sql.DB, product *JdProduct) error {stmt, err := db.Prepare("INSERT INTO test (skuid, name, skuidkey, href, src, cat1, cat2, cat3, brand, pType, venderId, shopId, specialAttrs, price)values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")defer stmt.Close()if err != nil {panic(err.Error())return errors.New("Command error")}_, err = stmt.Exec(product.skuid, product.name, product.skuidkey, product.href, product.src, product.cat[0], product.cat[1], product.cat[2], product.brand, product.pType, product.venderId, product.shopId, product.specialAttrs, product.price)if err != nil {panic(err.Error())return errors.New("Excute command error")}return nil}func dumpDatabase(db *sql.DB) {_, err := db.Exec("show databases")if err != nil {panic(err.Error())}var p JdProductp.cat = make([]int64, 3)rows, err := db.Query("select * from test")for rows.Next() {rows.Scan(&p.skuid, &p.name, &p.skuidkey, &p.href, &p.src, &p.cat[0], &p.cat[1], &p.cat[2], &p.brand, &p.pType, &p.venderId, &p.shopId, &p.specialAttrs, &p.price)}fmt.Printf("%+v\n", p)}func flushDatabase(db *sql.DB) {_, _ = db.Exec("drop table test")}/*func init() {db, err := connectDB()if err != nil {panic(err.Error())}flushDatabase(db)createDatabase(db, "jd")createTable(db, "test")//dumpDatabase(db)//db.Close()}*/func main() {//for i := 260000; i < 2611111; i++ {//u := fmt.Sprintf("http://item.jd.com/%d.html", i)//fmt.Println(u)db, err := connectDB()if err != nil {panic(err.Error())}flushDatabase(db)createDatabase(db, "jd")createTable(db, "test")resp, err := http.Get("http://item.jd.com/2788767.html")//resp, err := http.Get(u)if err != nil {fmt.Println(err.Error())}defer resp.Body.Close()body, err := ioutil.ReadAll(resp.Body)if err != nil {panic(err.Error())}var product JdProductif productSkuidFetch.MatchString(string(body)) {fmt.Println(productSkuidFetch.SubexpNames()[1])fmt.Println(productSkuidFetch.FindStringSubmatch(string(body))[1])product.skuid = productSkuidFetch.FindStringSubmatch(string(body))[1]}if productNameFetch.MatchString(string(body)) {fmt.Println(productNameFetch.SubexpNames()[1])fmt.Println(productNameFetch.FindStringSubmatch(string(body))[1])product.name = productNameFetch.FindStringSubmatch(string(body))[1]}if productSkuidkeyFetch.MatchString(string(body)) {fmt.Println(productSkuidkeyFetch.SubexpNames()[1])fmt.Println(productSkuidkeyFetch.FindStringSubmatch(string(body))[1])product.skuidkey = productSkuidkeyFetch.FindStringSubmatch(string(body))[1]}if productHrefFetch.MatchString(string(body)) {fmt.Println(productHrefFetch.SubexpNames()[1])fmt.Println(productHrefFetch.FindStringSubmatch(string(body))[1])product.href = productHrefFetch.FindStringSubmatch(string(body))[1]}if productSrcFetch.MatchString(string(body)) {fmt.Println(productSrcFetch.SubexpNames()[1])fmt.Println(productSrcFetch.FindStringSubmatch(string(body))[1])product.src = productSrcFetch.FindStringSubmatch(string(body))[1]}if productCatFetch.MatchString(string(body)) {fmt.Println(productCatFetch.SubexpNames()[1])fmt.Println(productCatFetch.FindStringSubmatch(string(body))[1])//product.cat = productCatFetch.FindStringSubmatch(string(body))[1]s := strings.Split(productCatFetch.FindStringSubmatch(string(body))[1], ",")for _, val := range s {//fmt.Println(val)v, _ := strconv.ParseInt(val, 10, 32)//fmt.Printf("%+c\n", v)product.cat = append(product.cat, v)}}if productCatNameFetch.MatchString(string(body)) {fmt.Println(productCatNameFetch.SubexpNames()[1])fmt.Println(productCatNameFetch.FindStringSubmatch(string(body))[1])//product.catName = productCatNameFetch.FindStringSubmatch(string(body))[1]s := strings.Split(productCatNameFetch.FindStringSubmatch(string(body))[1], ",")for _, val := range s {product.catName = append(product.catName, val)}}if productBrandFetch.MatchString(string(body)) {fmt.Println(productBrandFetch.SubexpNames()[1])fmt.Println(productBrandFetch.FindStringSubmatch(string(body))[1])product.brand = productBrandFetch.FindStringSubmatch(string(body))[1]}if productPTypeFetch.MatchString(string(body)) {fmt.Println(productPTypeFetch.SubexpNames()[1])fmt.Println(productPTypeFetch.FindStringSubmatch(string(body))[1])product.pType = productPTypeFetch.FindStringSubmatch(string(body))[1]}if productVenderIdFetch.MatchString(string(body)) {fmt.Println(productVenderIdFetch.SubexpNames()[1])fmt.Println(productVenderIdFetch.FindStringSubmatch(string(body))[1])product.venderId = productVenderIdFetch.FindStringSubmatch(string(body))[1]}if productShopIdFetch.MatchString(string(body)) {fmt.Println(productShopIdFetch.SubexpNames()[1])fmt.Println(productShopIdFetch.FindStringSubmatch(string(body))[1])product.shopId = productShopIdFetch.FindStringSubmatch(string(body))[1]}if productSpecialAttrsFetch.MatchString(string(body)) {fmt.Println(productSpecialAttrsFetch.SubexpNames()[1])fmt.Println(productSpecialAttrsFetch.FindStringSubmatch(string(body))[1])product.specialAttrs = productSpecialAttrsFetch.FindStringSubmatch(string(body))[1]if strings.Contains(product.specialAttrs, offTheShelf) {//continuefmt.Println("")}}url := "http://p.3.cn/prices/mgets?skuIds=J_" + product.skuid + "&type=" + product.pTypefmt.Println(url)r, err := http.Get(url)if err != nil {panic(err.Error())}p, err := ioutil.ReadAll(r.Body)if err != nil {panic(err.Error())}fmt.Println(string(p))m := make([]map[string]interface{}, 10)e := json.Unmarshal([]byte(p), &m)if e != nil {panic(e.Error())}if val, ok := m[0]["p"].(string); ok {product.price = val}var name strings := strings.Split(product.name, "\\u")for _, val := range s {//fmt.Println(val)v, _ := strconv.ParseInt(val, 16, 32)//fmt.Printf("%+c\n", v)name += fmt.Sprintf("%c", v)}fmt.Println(name)product.name = name//fmt.Printf("%+v\n", product)insertIntoDB(db, &product)dumpDatabase(db)file, err := os.Create("jd.html")if err != nil {panic(err.Error())}_, err = file.Write(body)if err != nil {panic(err.Error())}//}//fmt.Println(n, " byte has been write to jd.html")}


聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.