Here is the price data I crawled with Python, the program can run out, but somehow. No data received in the database? Please help us to see
#这里导入我们需要用的模块, and connect SQLyog, and create cursors
Import Requestsimport re fromlxml import etreeimport pymysqlimport timeconn=pymysql.connect (host='localhost', user='Root', passwd='1234', db='mydatabase1', port=3306, charset='UTF8') Cursor=conn.cursor () #获取头命令, to spoof access to the browser, to avoid the crawl failure is blocked Ip:headers={'user-agent':'mozilla/5.0 (Windows NT 10.0; WOW64) applewebkit/537.36 (khtml, like Gecko) chrome/55.0.2883.87 safari/537.36'} #创建一个获取网址的函数: def get_house_url (URL): HTML=requests.Get(url,headers=headers) #利用头命令进行伪装访问网址 selector=etree. HTML (Html.text) #解析源代码 so that it becomes the text document we need house_hrefs=selector.xpath ('//div[@class = "House-title"]/a/@href') #获取连接 forHouse_hrefinchHouse_hrefs:get_house_info (HOUSE_HREF) def get_house_info (URL): #获取连接里面的具体信息 HTML=requests.Get(url,headers=headers) Selector=etree. HTML (html.text) #利用头命令进行访问浏览器并把源代码解析成文本文档Try: Name=selector.xpath ('//*[@id = "Content"]/div[2]/h3/text ()')[0] #以下依次都是进行抓取有效的数据, village=selector.xpath ('//*[@id = "Content"]/div[3]/div[1]/div[3]/div/div[1]/div/div[1]/dl[1]/dd/a/text ()')[0] Price=selector.xpath ('//*[@id = "Content"]/div[3]/div[1]/div[1]/span[1]/em/text ()')[0] Style=selector.xpath ('//*[@id = "Content"]/div[3]/div[1]/div[3]/div/div[1]/div/div[2]/dl[1]/dd/text ()')[0] Area=selector.xpath ('//*[@id = "Content"]/div[3]/div[1]/div[1]/span[3]/em/text ()')[0] Unit_price=selector.xpath ('//*[@id = "Content"]/div[3]/div[1]/div[3]/div/div[1]/div/div[3]/dl[2]/dd/text ()')[0] Cursor.execute ("INSERT INTO Suzhou_house (Name,village,price,style,area,unit_price) VALUES (%s,%s,%s,%s,%s,%s)", (str (name), str (village), str (price), str (style), str (area), str (unit_price))) except Indexerror:pass
If __name__== ' __main__ ':
urls=[' https://suzhou.anjuke.com/sale/p{}-rd1/?kw=%e8%8b%8f%e5%b7%9e '. Format (str (i)) for I in Range (1,6)]
For URL in URLs:
Get_house_url (URL)
Time.sleep (2)
Conn.commit ()
The above is the crawler I wrote. But somehow. The data is not received in the database SQLyog, please help the reader to see.
Use Python to crawl home room rates data for Suzhou