標籤:
import pymongoclient = pymongo.MongoClient(‘localhost‘, 27017) # MongoDB 用戶端walden = client[‘walden‘] # 資料庫中建立的名稱sheet_tab = walden[‘sheet_tab‘] # 建立Table# 示範代碼1# path = ‘/Users/qiongyanzhu/Documents/Plan-for-combating-master/week2/2_1/2_1code_of_video/walden.txt‘# with open(path, ‘r‘) as f:# lines = f.readlines()# for index, line in enumerate(lines):# data = {# ‘index‘: index,# ‘line‘: line,# ‘words‘: len(line.split())# }# print(data)# sheet_tab.insert_one(data)# 示範代碼2# for item in sheet_tab.find({‘words‘: 0}):# print(item)# 示範代碼3# $lt/$lte/$gt/$gte/$nefor item in sheet_tab.find({‘words‘: {‘$lt‘: 5}}): print(item)# 示範代碼4for item in sheet_tab.find(): print(item[‘line‘])
from bs4 import BeautifulSoupimport requestsimport pymongoclient = pymongo.MongoClient(‘localhost‘, 27017)xiaozhu = client[‘xiaozhu‘]sheet_tab = xiaozhu[‘sheet_tab‘]url_as = [‘http://bj.xiaozhu.com/search-duanzufang-p{}-0/‘.format(str(number)) for number in range(1, 4)]def insert_house_info(url_s): for url_a in url_s: # 擷取頁面資料 wb_data = requests.get(url_a) # 採用lxml解析引擎,解析資料 soup = BeautifulSoup(wb_data.text, ‘lxml‘) prices = soup.select(‘span.result_price‘) titles = soup.select(‘#page_list > ul > li > div.result_btm_con.lodgeunitname > div > a > span‘) urls = soup.select(‘#page_list > ul > li > div.result_btm_con.lodgeunitname‘) for price, title, url in zip(prices, titles, urls): info = { ‘price‘: int(price.get_text()[1:len(price.get_text())-2]), ‘title‘: title.get_text(), ‘url‘: url.get(‘detailurl‘) } # print(info) sheet_tab.insert_one(info)def find_house(): for info in sheet_tab.find({‘price‘: {‘$gt‘: 500}}): print(info)insert_house_info(url_as)find_house()
2.1 python使用MongoDB 範例程式碼