2. Crawl data 1. Get the province's bar information get links to provinces: # get the fraction def get_score (URL): web_data = Requests.get (URL, headers=header) soup = BeautifulSoup (Web_data.content, ' lxml ') # get province Info provice = Soup.select ('. Col-nav span ') [0].text[0:-5] # Get the liberal arts section categories = Soup.select ( ' h3.ft14 ') category_list = [] for item in Categories:category_list.append (Item.text.strip (). Replace (', ')) #替换空格 # get Score tables = Soup.select (' h3 ~ table ') for index, table in enumerate (tables): TR = table.find_all (' tr ', attrs={' class ': re.co Mpile (' ^c_\s* ')}) #使用正则匹配 for j in tr:td = J.select (' td ') Score_list = [] for k in TD: # Gets the annual score if ' class ' not in K.attrs : score = K.text.strip () score_list.append (score) # Gets the fraction category Elif ' class ' in k.attrs:score_line = K.text.strip () score_dat A = {' Provice ': Provice.strip (), #省份 ' category ': Category_list[index], #文理科分类 ' score_line ': score_line, #分数线类别 ' score_ List ': score_list# fractions} score_detail.insert_one (Score_data) #插入数据库 3. Data visualization crawling data is just the first step, and the next thing to do is show the data. Looking for data from MongoDB, cleaning the data, because I have a bit of pyecharts here, so use EchaRTS Show 1). Filter the provinces and other information directly through the Find function of MongoDB, restrict the contents of the Search Import Pymongo import Charts client = Pymongo. Mongoclient (' localhost ', 27017) gaokao = client[' gaokao '] score_detail = gaokao[' Score_detail '] # filter bar, province, science and Liberal def GET_SC Ore (line,pro,cate): score_list=[] for I in Score_detail.find ({"$and": [{"Score_line": line},{"Provice":p ro},{' Category ': Cate}]}): Score_list = i[' score_list '] score_list.remove ('-') #去掉没有数据的栏目 score_list = list (map (int, score_ list)) Score_list.reverse () return score_list 2). Define related data # Get the Liberal arts section score line = ' one ' pro = ' Beijing ' Cate_wen = ' liberal arts ' Cate_li = ' science ' Wen =[] Li = [] Wen=get_score (Line,pro,cate_wen) #文科 Li=get_score (line,pro,cate_li) #理科 # define year = [ 2017,2016,2015,2014,2013,2012,2011,2010,2009] Year.reverse () 3). Line chart Display series = [{' Name ': ' liberal arts ', ' data ': Wen, ' type ': ' Lin E '}, {' Name ': ' Science ', ' data ': Li, ' type ': ' line ', ' Color ': ' #ff0066 '}] options = {' chart ': {' zoomtype ': ' XY '}, ' title ': {' text ': ' {} province {} fraction '. Format (pro,line)}, ' subtitle ': {' text ': ' Source:gaokao.com '}, ' Xaxis ': {' CategorIES ': Year}, ' YAxis ': {' title ': {' text ': ' Score '}}} charts.plot (series, options=options,show= ' inline ') Private messages Small 01 can get the source code and mystery package Oh!
Today you teach the college entrance exam to give birth to a force God skill! Take advantage of Python to climb the exam results of the year!