This is a creation in Article, where the information may have evolved or changed.
Golang web crawler frame gocolly/colly three
familiar with the Golang web crawler framework gocolly/colly andgolang web crawler framework gocolly/colly Two, you can crawl most of the data on the network. This article will then crawl the industry price-to-earnings ratio offered by the Certificate Index Company Limited. (http://www.csindex.com.cn/zh-CN/downloads/industry-price-earnings-ratio)
Define the data structure body,
SFC Industry price/earnings type zhjhhyshyl struct { hydm string ' JSON: ' Industry code ' ' HYMC string ' JSON: ' Industry name ' ' zxsj *float64 ' JSON: "Latest data" ' Gpjs int ' JSON: "Stock home Number" ' Ksjs int ' JSON: "Loss of Home" ' jygy *float64 ' JSON: "Nearly one months" ' jsgy *float64 ' JSON: "Nearly three months" ' jlgy *float64 ' JSON: "Nearly six months" ' Jyn *float64 ' JSON: "Nearly a year" ' Zhy []*zhjhhyshyl ' JSON: ' Subdivision Industry '}
next , prepare for the gocolly call, set the user agent to Chrome , which can be Fiddler Tools View
C.useragent = "mozilla/5.0 (Windows NT 10.0; Win64; x64) applewebkit/537.36 (khtml, like Gecko) chrome/58.0.3029.110 safari/537.36 edge/16.16299 "
You can also use Fiddler Set More Request headersto disguise the crawler as a browser.
Next F12 invokes the browser debugger to view the elements of the target data, copies the jQuery Selector, and then changes to a relative path.
complete all the data fetch code :
Package main import ("Encoding/json" "FMT" "Log" "StrConv" "Strings" "Github.com/puerkitobio/goquery" "github.com/gocolly/colly")//SFC Industry price/earnings type zhjhhyshyl struct {hydm string ' JSON: "Industry Code" ' HYMC string ' JSON: "Industry name "' Zxsj *float64 ' JSON:" Latest data "' Gpjs int ' JSON:" Stock house number "' Ksjs int ' json:" Loss of Home "' Jygy *float64 ' JSON:" Nearly one months "' Js GY *float64 ' JSON: "Nearly three months" ' Jlgy *float64 ' JSON: "Nearly six months" ' Jyn *float64 ' JSON: "Nearly a year" ' Zhy []*zhjhhyshyl ' JSON: ' Segment Industry '} Func main () {var err error c: = colly. Newcollector () c.useragent = "mozilla/5.0 (Windows NT 10.0; Win64; x64) applewebkit/537.36 (khtml, like Gecko) chrome/58.0.3029.110 safari/537.36 edge/16.16299 ' zjhhyshyl: = make ([]*Zhj Hhyshyl, 0) C.onrequest (func (R *colly). Request) {fmt. Printf ("%+v\r\n%+v\r\n", *r, * (r.headers))}) c.onhtml ("Td>table.list-div-table>tbody>tr", func (E *colly.h tmlelement) {hyshy: = zhjhhyshyl{hydm:e.childtext ("Td:first-child"), Hymc:e.childtext ("Td:nth-child (2)"),} ZXSJ, err: = StrConv. Parsefloat (E.childtext ("Td:nth-child (3)"), if Err = = Nil {hyshy.zxsj = &ZXSJ} g PJs, err: = StrConv. parseint (E.childtext ("Td:nth-child (4)"), (+) if Err = = Nil {Hyshy.gpjs = Int (GPJS)} Ksjs, err: = StrConv. parseint (E.childtext ("Td:nth-child (5)"), (+) if Err = = Nil {hyshy.ksjs = Int (KSJS)} Jygy, err: = StrConv. Parsefloat (E.childtext ("Td:nth-child (6)"), if Err = = Nil {hyshy.jygy = &jygy} js GY, err: = StrConv. Parsefloat (E.childtext ("Td:nth-child (7)"), if Err = = Nil {hyshy.jsgy = &jsgy} JL GY, err: = StrConv. Parsefloat (E.childtext ("Td:nth-child (8)"), if Err = = Nil {hyshy.jlgy = &jlgy} JY N, Err: = StrConv. Parsefloat (E.childtext ("Td:nth-child (9)"), If err= = Nil {Hyshy.jyn = &jyn} zjhhyshyl = Append (Zjhhyshyl, &hyshy) Hyshy.zhy = Mak E ([]*zhjhhyshyl, 0) e.dom. Parent (). Parent (). Next (). Find ("Table.list-div-table>tbody>tr"). Each (func (_ int, S *goquery. Selection) {zhy: = zhjhhyshyl{hydm:strings. Trim (S.find ("Td:nth-child (1)"). Text (), "\r\n\t"), hymc:strings. Trim (S.find ("Td:nth-child (2)"). Text (), "\r\n\t"),} ZXSJ, err: = StrConv. Parsefloat (Strings. Trim (S.find ("Td:nth-child (3)"). Text (), "\r\n\t"), if Err = = Nil {Zhy. ZXSJ = &ZXSJ} Gpjs, err: = StrConv. parseint (Strings. Trim (S.find ("Td:nth-child (4)"). Text (), "\r\n\t"), ten, +) if Err = = Nil {Zhy. Gpjs = Int (Gpjs)} Ksjs, err: = StrConv. parseint (Strings. Trim (S.find ("Td:nth-child (5)"). Text (), "\r\n\t"), ten, +) if Err = = Nil {Zhy. KSJS = int(KSJS)} Jygy, err: = StrConv. Parsefloat (Strings. Trim (S.find ("Td:nth-child (6)"). Text (), "\r\n\t"), if Err = = Nil {Zhy. Jygy = &jygy} jsgy, err: = StrConv. Parsefloat (Strings. Trim (S.find ("Td:nth-child (7)"). Text (), "\r\n\t"), if Err = = Nil {Zhy. Jsgy = &jsgy} jlgy, err: = StrConv. Parsefloat (Strings. Trim (S.find ("Td:nth-child (8)"). Text (), "\r\n\t"), if Err = = Nil {Zhy. Jlgy = &jlgy} Jyn, err: = StrConv. Parsefloat (Strings. Trim (S.find ("Td:nth-child (9)"). Text (), "\r\n\t"), if Err = = Nil {Zhy. Jyn = &jyn} hyshy.zhy = Append (Hyshy.zhy, &zhy)}) c.onscraped (func (_ *c Olly. Response) {Bdata, _: = json. Marshalindent (Zjhhyshyl, "", "\ T") fmt. Println (String (bdata))}) Err = C.visit ("http://www.csindex.com.Cn/zh-cn/downloads/industry-price-earnings-ratio?date=2017-12-27&type=zjh1 ") if err! = Nil {log. Fatal (ERR)}}
Part of the result after running:
{ "Industry Code": "D", "Industry name": "Electricity, heat, gas and water production and supply industry", "Latest data": 20.12, "Number of shares": 107, "Loss of Home": 5, "Nearly one months": 19.51, "Nearly three months": 19.7, "Nearly six months": 19.87, "Nearly one year": 18.9, "Industry segments": [ { "Industry Code": "44", "Industry name": "Power, thermal production and supply industry", "Latest data": 18.75, "Number of shares": 70, "Loss of Home": 3, "Nearly one months": 18.28, "Nearly three months": 18.43, "Nearly six months": 18.55, "Nearly one year": 17.44, "Industry segments":NULL }, { "Industry Code": "45", "Industry name": "Gas production and supply industry", "Latest data": 28.4, "Number of shares": 22, "Loss of Home": 2, "Nearly one months": 25.71, "Nearly three months": 25.33, "Nearly six months": 25.38, "Nearly one year": 27.24, "Industry segments":NULL }, { "Industry Code": "46", "Industry name": "Water production and supply industry", "Latest data": 27.78, "Number of shares": 15, "Loss of Home": 0, "Nearly one months": 27.88, "Nearly three months": 29.33, "Nearly six months": 30.56, "Nearly one year": 29.64, "Industry segments":NULL } ] },
Reprint Please specify source: http://www.cnblogs.com/majianguo/p/8150060.html