#-*-coding:utf-8-*-"""Created on Sat June 2 15:56:29 2018@author:administrator""" fromBs4ImportBeautifulSoupImportRequestsImportTime#Import the appropriate library fileImportPymysql#Import MySQLImportJSON headers= { 'user-agent':'mozilla/5.0 (Windows NT 6.1; WOW64) applewebkit/537.36 (khtml, like Gecko) chrome/53.0.2785.143 safari/537.36'} #Join Request Header defGet_info (URL):#define a function to get web page informationWb_data = Requests.get (url,headers=headers) Soup= BeautifulSoup (Wb_data.text,'lxml') CityName= Soup.select ('H1')#City NameAverprice = Soup.select ('Div.columnbox')#Average room rate #Aversumprice = Soup.select (' div.city-price clearfix > Div.u_area > Ul.u_list > Li > Span.u_list_num ') # Average Total Price forCityname,averpriceinchZip (cityname,averprice): Data= { 'CityName': Cityname.get_text (). Strip (),'Averprice': Averprice.get_text (). Strip ()}#parsing JSONJson_str =json.dumps (data)Print("Python raw Data:", repr (data))Print("JSON object:", Json_str)#Convert a JSON object to a Python dictionaryData2 =json.loads (JSON_STR)Print("data2[' CityName ']:", data2['CityName']) DB=Conn ()#Inserting Data #Sql_insert = "" INSERT into House (CityName) VALUES (' Xuchang rate ') ' ""Sql_insert ="""INSERT INTO House (cityname,totalproduction) VALUES (' {} ', ' {} ')""". Format (data2['CityName'],data2['Averprice']) Print(Sql_insert)#get an operation cursor using the cursor () methodcursor =db.cursor ()Try: #Execute SQL statementCursor.execute (Sql_insert)#commit to database executionDb.commit ()except: #Rollback If an error occursDb.rollback ()#To close a database connectiondb.close ()#Performing a connectiondefConn ():#Open a database connectiondb= Pymysql.connect (host="localhost", user="Root", Password="Root", db="Houseprice", port=3306,charset='UTF8') returnDB#Execute SQLdefMysqlexcuit (db,sql):#get an operation cursor using the cursor () methodCur =db.cursor ()Try: Cur.execute (SQL)#SubmitDb.commit ()exceptException as E:#Error RollbackDb.rollback ()finally: Db.close ()if __name__=='__main__':#for the main entrance of the program #http://bj.xiaozhu.com/search-duanzufang-p{}-0/Cities = ['NP','Gy','HK','YC','xn','ZZ','XC','TL']#nanping, Guiyang, Haikou, Yinchuan, Xining, Zaozhuang, Xuchang, TongliaoURLs = ['http://www.creprice.cn/market/{}/forsale/all/11.html'. Format (city) forCityinchCities#constructing a multi-page URL forSingle_urlinchurls:get_info (Single_url)#Loop call get_links functionTime.sleep (2)#Sleep 2 seconds
Insert Chinese to pay attention to the character set problem, or insert will fail
Port to open, many port settings on the network are not 3306 ports, to change to their own in the Mysql.ini set the port, or will error "the computer can not respond positively"
#Performing a connectiondefConn ():#Open a database connectiondb= Pymysql.connect (host="localhost", user="Root", Password="Root", db="Houseprice", port=3306,charset='UTF8') returnDb
Python connects to MySQL database