Crawls e-commerce sales information and e-commerce sales
1 #! /Usr/bin/env python 2 # encoding = 'utf-8' 3 # Filename: spider_58center_sth.py 4 5 from bs4 import BeautifulSoup 6 import time 7 import requests 8 9 url_58 = 'HTTP: // nj.58.com /? PGTID = 0d000000--0c5c-ffba-71f8f3f7039e & ClickID = 1 '10 11''' 12 used to crawl e-commerce sales information: for example, 58 local computer sales information ''' 13 14 15 def get_url_list (url ): 16 web_data = requests. get (url) 17 soup = BeautifulSoup (web_data.text, 'lxml') 18 url = soup. select ('td. t> a [class = "t"] ') 19 url_list = ''20 for link in url: 21 link_n = link. get ('href ') 22 if 'zhuanzhua' in link_n: 23 pass24 else: 25 if 'jump' in link_n: 26 pass27 else: 28 url_list = url_list + '\ n' + link_n29 30 print ('url _ list: % s' % url_list) 31 return url_list32 33 34 # Obtain target information by category 35 def get_url_info (): 36 url_list = get_url_list (url_58) 37 38 for url in url_list.split (): 39 time. sleep (1) 40 web_datas = requests. get (url) 41 soup = BeautifulSoup (web_datas.text, 'lxml') 42 43 type = soup. select ('# head> div. breadCrumb. f12> span: nth-of-type (3)> A') 44 title = soup. select ('div. col_sub.mainTitle> h1 ') 45 date = soup. select ('Li. time ') 46 price = soup. select ('div. person_add_top.no_ident_top> div. per_ad_left> div. col_sub.summary> ul> '47 'li: nth-of-type (1)> div. su_con> span. price. c_f50') 48 fineness = soup. select ('div. col_sub.summary> u1> li: nth-of-type (2)> div. su_con> span ') 49 area = soup. select ('div. col_sub.summary> u1> li: nth-of-type (3)> div. su_con> span ') 50 51 for typei, titlei, datei, pricei, finenessi, areai in zip (type, title, date, price, fineness, area ): 52 53 # create a dictionary 54 data = {55 'type': typei. get_text (), 56 'title': titlei. get_text (), 57 'date': datei. get_text (), 58 'price': pricei. get_text (), 59 'fineness ': (finenessi. get_text ()). strip (), 60 'region': list (areai. stripped_strings) 61} 62 print (data) 63 64 get_url_info ()
Crawling mall product sales information