Import pymongoclient = Pymongo. Mongoclient (' localhost ', 27017) # MongoDB Client Walden = client[' Walden ') # name created in database Sheet_tab = walden[' sheet_ Tab '] # Create table# Demo code # Path = '/users/qiongyanzhu/documents/plan-for-combating-master/week2/2_1/2_1code_of_ Video/walden.txt ' # with open (path, ' R ') as f:# lines = F.readlines () # for index, line in enumerate (lines): # D ATA = {# ' index ': index,# ' line ': line,# ' words ': Len (Line.split ()) # }# Print (data) # Sheet_tab.insert_one (data) # Demo code * for item in Sheet_tab.find ({' Words ': 0}): # Print (item) # Demo Code 3# $LT/$lte/$GT/$ gte/$nefor Item in Sheet_tab.find ({' words ': {' $lt ': 5}}): print (item) # Demo Code 4for item in Sheet_tab.find (): Print (item[' line ')
From BS4 import beautifulsoupimport requestsimport pymongoclient = Pymongo. Mongoclient (' localhost ', 27017) Xiaozhu = client[' Xiaozhu ']sheet_tab = xiaozhu[' sheet_tab ']url_as = ['/HTTP/ bj.xiaozhu.com/search-duanzufang-p{}-0/'. Format (str (number)) for number in range (1, 4)]def Insert_house_info (url_s): For url_a in url_s: # get page Data Wb_data = Requests.get (url_a) # with lxml parsing engine, parse data soup = beautifuls OUP (Wb_data.text, ' lxml ') prices = Soup.select (' span.result_price ') titles = Soup.select (' #page_list > UL > li > Div.result_btm_con.lodgeunitname > div > A > span ') urls = soup.select (' #page_list > UL & Gt li > Div.result_btm_con.lodgeunitname ') for price, title, URL in Zip (prices, titles, URLs): info = { ' Price ': Int (Price.get_text () [1:len (Price.get_text ())-2]), ' title ': Title.get_text (), ' URL ': Url.get (' Detailurl ')} # print (info) Sheet_tab.insert_one (info) def find_house (): For info in Sheet_tab.find ({' price ': {' $GT ': $}}): Print (info) i Nsert_house_info (url_as) Find_house ()
2.1 Python using the MongoDB sample code