標籤:src 豆瓣 head nes ike toolbar obj demo python執行個體
豆瓣
# coding:utf - 8from urllib.request import urlopenfrom bs4 import BeautifulSouphtml = urlopen("https://movie.douban.com/")bsObj = BeautifulSoup(html, "lxml") # 將html對象轉化為BeautifulSoup對象liList = bsObj.findAll("li", {"class": "title"}) # 找到所有符合此class屬性的li標籤for li in liList: name = li.a.get_text() # 擷取標籤<a>中文字 print(name)
簡書
# -*- coding:utf-8 -*-from urllib import requestfrom bs4 import BeautifulSoupurl = r‘http://www.jianshu.com‘# 類比真實瀏覽器進行訪問headers = {‘User-Agent‘:‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36‘}page = request.Request(url, headers=headers)page_info = request.urlopen(page).read()page_info = page_info.decode(‘utf-8‘)# 將擷取到的內容轉換成BeautifulSoup格式,並將html.parser作為解析器soup = BeautifulSoup(page_info, ‘lxml‘)# 以格式化的形式列印html# print(soup.prettify())titles = soup.find_all(‘a‘, ‘title‘) # 尋找所有a標籤中class=‘title‘的語句# 列印尋找到的每一個a標籤的stringfor title in titles: print(title.string)
酷狗
def getInfo(self, url): html = requests.get(url, headers=self.header) soup = BeautifulSoup(html.text, ‘html.parser‘) # print(soup.prettify()) ranks = soup.select(‘.pc_temp_num‘) titles = soup.select(‘.pc_temp_songlist > ul > li > a‘) # 層層標籤尋找 times = soup.select(‘.pc_temp_time‘) for rank, title, songTime in zip(ranks, titles, times): data = { # rank 全列印就是帶HTML標籤的 ‘rank‘: rank.get_text().strip(), ‘title‘: title.get_text().split(‘-‘)[1].strip(), ‘singer‘: title.get_text().split(‘-‘)[0].strip(), ‘songTime‘: songTime.get_text().strip() } s = str(data) print(‘rank:%2s\t‘ % data[‘rank‘], ‘title:%2s\t‘ % data[‘title‘], ‘singer:%2s\t‘ %data[‘singer‘], ‘songTime:%2s\t‘ % data[‘songTime‘]) with open(‘hhh.txt‘, ‘a‘, encoding=‘utf8‘) as f: f.writelines(s + ‘\n‘)
【更多參考】
https://www.cnblogs.com/ftl1012/p/9614146.html
https://www.cnblogs.com/ftl1012/p/9611334.html
Python執行個體---beautifulsoup小Demo