Import reimport requestsimport threadingimport timefrom time import ctime,sleepfrom queue import queuekeywords_a=[' ELP LP80 ', ' ELPLP23 ', ' ELPLP29 ', ' np14lp ', ' poa-lmp126 ', ' ELPLP66 ',]keywords_b=[' vip230w0.8e20.8 ', ' VIP2 40w0.8e20.9n ', ' np30lp ', ' lmp-c162 ', ' VT70LP ',]keywords_c= [' TLPLV4 ', ' poa-lmp131 ', ' bl-fp240a ', ' VL T-xd3200lp ', ' et-lad35 ', ' bl-fu240a ', ' 20-01032-20 ',]keywords_d =[' ELPLP76 ', ' vlt-hc3800lp ', ' bl-fp240c ', ' 5811116765-s ', ' ELPLP69 ', ' bl-fp200h ',]keywords_e = [' 5100MP ', ' RLC-057 ', ' ELPLP71 ', ' ELPLP64 ', ' bl-fs300b ',]re_rule = ' <span class= ' a-size-base a-color-price s-price a-text-bold ' > (. *?) </span> ' # Regular expression match price Headers_am = {' user-agent ': ' mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) applewebkit/537.36 (khtml, like Gecko) chrome/50.0.2661.86 safari/537.36 '}# browser page headersdef Scrape R (kw): for i in Kw:url_keyword = ' https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3daps&field-keywords={} '. Format (i)
# Amazon page link Url_epharos = ' http://www.amazon.com/s/ref=nb_sb_noss?url=srs%3D9143518011%26search-alias% 3dspecialty-aps&field-keywords={} '. Format (i) # Store link response = Requests.get (url_keyword,headers=headers_a m) #requests.get (url,params=params) can submit form alone Request.post (Url,params=params)
Response_a = Requests.get (url_epharos,headers=headers_am) Price = Re.findall (re_rule,response.text) price_e = Re.findall (re_rule,response_a.text) print ('--------------------------{0} crawled to completion at{1}j result: \ n \ nthe market price: {2}\n\nepharos:{3 } '. Format (I,time.ctime (), price[0],price_e[0]) time.sleep (1) threads = []T1 = Threading. Thread (target=scraper,args= (keywords_a,)) #args is Ganso threads.append (t1) t2 = Threading. Thread (target=scraper,args= (Keywords_b,)) threads.append (t2) t3 = threading. Thread (target=scraper,args= (Keywords_c,)) threads.append (t3) T4 = Threading. Thread (target=scraper,args= (Keywords_d,)) threads.append (T4) T5 = threading. Thread (target=scraper,args= (Keywords_e,)) Threads.append (t5) if __name__ = = ' __main__ ': for T in Threads:t.setdae Mon (True) #守护线程 T.start () for T-threads:t.join () #等待子线程 print ("All over%s"%ctime ())
Python multi-threaded crawler: Amazon price