From BS4 import BeautifulSoup
Import requests
Import BS4 #bs4. Element.tag
#获取网页页面HTML
def gethtmltext (URL):
Try
r = requests.request ("Get", url,timeout=30)
R.raise_for_status () #如不是200报错
r.encoding = r.apparent_encoding #猜测编码然后赋予给解码 encoding mode
Demo = R.text
Soup = BeautifulSoup (demo, "Html.parser") #做汤
Return soup
Except
Return ""
#分析并返回列表
def fillunivlist (ulist,html):
Soup = html
for TR in Soup.find ("Tbody"). Children: #从汤里找tbody标签的儿子遍历
If Isinstance (tr,bs4.element.tag): #如果标签是标签名字
TDS = TR ("TD") #寻找tr里所有td列表 and then give TDs
Ulist.append ([tds[0].string,tds[1].string,tds[3].string]) #加入013号
Pass #???
#打印表格
def printunivlist (Ulist,num):
Tplt = "{0:^10}\t{1:{3}^8}\t{2:^10}" #模板
Print (Tplt.format ("Rank", "School name", "Total Score", Chr (12288))) #3号中文空格
For I in range (num):
u = ulist[i] #列表中的0是一个拥有三个元素的列表
Print (Tplt.format (u[0], u[1], U[2],CHR (12288))) #拥有三个元素的列表按模板打印
Print ("Suc" + str (num))
#主函数
def main ():
Uinfo = [] #创建一个列表
url = "Http://www.zuihaodaxue.com/zuihaodaxuepaiming2016.html"
html = gethtmltext (URL)
Fillunivlist (uinfo,html)
Printunivlist (uinfo,20) #打印20个大学
Main ()
"Python crawler" crawls Chinese university rankings from HTML