#!/usr/bin/env python# encoding: utf-8 "" "@version: ?? @author: phpergao@license: apache licence @file:  BAIDU_PAIMING.PY@TIME: 2016/8/1 11:10 "" "import requests,re,urllib,codeop,urllib.request,nturl2path,macurl2pathurllist = [ "F7a2bee997ef68e8", # " 3b246a0864597e50 ", # " 0ebf88697141f32f ", # citychamp " Eff209d4a7f538ca ", # li Gang" 57f9e38e087acf61 ", # Purchase book ]def chapaiming (urllist): user_agent = "mozilla/5.0 (Windows NT  10.0; WOW64) AppleWebKit/537.36 (khtml, Like gecko) chrome/45.0.2454.101 safari/537.36 "#伪装成浏览器访问 headers = (' user-agent ', user_agent) opener = urllib.request.build_opener () opener.addheaders = [headers] num0=1 num=1 flag=True while flag: url= "http://waimai.baidu.com/waimai/shoplist/{}?display=json&page={}& count=40 ". Format (URLLIST,NUM0) num0 += 1 ret = opener.open (URL) #ret =urllib.request.urlopen (URL) ret=ret.read (). Decode (' Unicode_escape ') html = re.findall (R ' "" Shop_ Name ":" (. *?) "," Shop_announCement ":", ret) address=re.findall (R ' "" Poi_address ":" (. *?) "}," SortBy ":", ret) for i in html: num += 1 if ' in str ' (i): print (i, "ranking in: {}, locating address: {}". Format (num, Address))                 NUM0 = 1 flag=false break #print (i) if __name__== "__ main__ ": for i in urllist: chapaiming (i)
650) this.width=650; "Src=" Http://s1.51cto.com/wyfs02/M02/85/44/wKiom1ee2OuyD71JAABPNgdUPPE331.png-wh_500x0-wm_3 -wmp_4-s_3327659994.png "title=" qq picture 20160801130620.png "alt=" Wkiom1ee2ouyd71jaabpngduppe331.png-wh_50 "/>
Crawl Baidu's takeaway store rankings with Python crawlers