#-*-Coding:utf-8-*-"" "Created on Sat Oct 21:01:23 2016 @author: Hhxsym" "ImportRequestsImportJsonImportOsImportPymongoImportTime fromBs4ImportBeautifulSoup fromMultiprocessingImportPool #进程调用的包 inpath="C:\\Users\\Hhxsym\\Desktop\\Course GroupPythonReptiles" Inpath = Unicode (Inpath,"UTF8"Os.chdir (Inpath) #不做编码转换后, the Chinese path cannot be opened, changing #连接数据库 client =pymongo. Mongoclient (' localhost ', 27017) #连接数据库 sense = client[' Sense '] #创建数据库 url_list = sense[' Url_list '] #创建数据库表defGet_city_urls (): url =' http://www.senseluxury.com ' withOpen' city.html ') asF: #本地读取 response = F.read () #直接读取到文本 soup = beautifulsoup (Response,' lxml ') URL = soup.select (' #destination_nav > div > div > div > dl.dl-list > dt > A ') #CSS结构类型, note the space return[Url.get ("href") forUrl inURLsdefGet_page_list (city, page=1): now = Time.strftime ('%y-%m-%d%h:%m:%s ', Time.localtime (Time.time ())) URL =' http://www.senseluxury.com/destinations_list/%s '% City.split ('/') [-1] payload = {' page ':p Age,' Callback ':' Jsonp 'Responses = Requests.get (url,payload) #请求网页, get the content of the response, Requests.get (url address, keyword URL parameter) #print Responses.urlPrintResponses.status_code #print responses.text[6:-1] #打印json格式的 "string" (1) wb_data = Json.loads (responses.text[6:-1)) # Converts a string to a Python dictionary (2)PrintType (Responses.text), type (Wb_data) #对比两种类型 (1) (2) Contrast #print json.dumps (wb_data, encoding= ' Utf-8 ', ensure_ascii=false) #json. Dumps method, transform into Chinese print #通过循环获取数据 forI inwb_data[' Val '][' Data ']: title = i[' title '] URL =' http://www.senseluxury.com '+i[' URL '#数据拼接, get the data id we want = i[' id '] Server=i[' Server '].replace (' ',' '). Split () Memo = i[' Memo '] Price = i[' Price '] Address = i[' Address '] Subject =i[' Subject '] Data = {' title ': Title,' id ': ID,' Server ': Server,' Memo ': Memo,' Prie ':p Rice,' Address ': Address,' Subject ': Subject,' Create_time ': now} url_list.insert_one (data) #插入数据 (dictionary) # Note: _id is automatically generated in the generated list of data #print title, URLPrintDataif__name__==' __main__ ': #get_page_list (1) #print get_city_urls () #get_page_list (' Http://www.senseluxury.com/destinations/2 ', page=1) City_urls = Get_city_urls ()PrintCity_urls pool = Pool (processes=4) #设置进程数量 pool.map (get_page_list, City_urls) #pool. Map (function name, iteration object) Pool.close ( # Wait for all processes in the process to finish before closing the Pool.join () #关闭之后要计入它, to prevent the main program from closing before the child process ends the # page JSON type of view: Browser-> Right-check-> network-> XHR-> page Trigger (jump page)-> name tick-> Response-> See if JSON format string # http://jsoneditoronline.org/online format Web site, view transfer nested format