This article mainly introduces the Python crawler implementation of the national list of broken down query function, involving Python crawler related network interface calls and JSON data conversion and other related operational skills, the need for friends can refer to the next
This article describes the Python crawler implementation of the national list of broken down by the query function. Share to everyone for your reference, as follows:
I. Description of requirements
The use of Baidu interface, to achieve a national list of broken down by the execution of the query function. Enter the name and whether the query is in the national list of people who have been discredited.
Second, Python implementation
Version 1:
#-*-Coding:utf-8*-import sysreload (SYS) sys.setdefaultencoding (' utf-8 ') import Timeimport requeststime1=time.time () Import pandas as Pdimport jsoniname=[]icard=[]def person_executed (name): For I in Range (0,30): try:url= "https:// sp0.baidu.com/8aqdcjqpaav3otqbppnn2djv/api.php?resource_id=6899 "\" &query=%e5%a4%b1%e4%bf%a1%e8%a2%ab%e6%89 %a7%e8%a1%8c%e4%ba%ba%e5%90%8d%e5%8d%95 "\" &cardNum=& "\" Iname= "+STR (name) + \" &areaname= "\ "&pn=" +str (i*10) + \ "&rn=10" \ "&ie=utf-8&oe=utf-8&format=json" Html=requests.get ( URL). Content html_json=json.loads (HTML) html_data=html_json[' data ' for each in html_data:k=each[' r Esult '] for each in K:print each[' iname '],each[' Cardnum '] iname.append (each[' iname ')) I Card.append (each[' cardnum ')) except:passif __name__ = = ' __main__ ': name= "Guo * *" person_executed (name) print Len ( Iname) #################### #将数据组织成数According to the box ########################### DATA=PD. DataFrame ({"Name": Iname, "Idcard": Icard}) ################ #数据框去重 #################################### data1= Data.drop_duplicates () Print data1 print len (data1) ######################## #写出数据到excel ############################ ############# PD. Dataframe.to_excel (data1, "f:\\iname_icard_query.xlsx", header=true,encoding= ' GBK ', Index=false) time2=time.time () Print U ' OK, spider end! ' Print U ' total time: ' +str (time2-time1) + ' s '
Third, the effect shows
"D:\Program Files\python27\python.exe" d:/pycharmprojects/learn2017/National breach of faith by the executive person query. PY
Guo * * 34122319790****5119
Guo * * 32032119881****2419
Guo * * 32032119881****2419
3
Idcard Name
0 34122319790****5119 Guo * *
1 32032119881****2419 Guo * *
2
OK, the crawler is over!
Total time: 7.72000002861s
Process finished with exit code 0
Version 2:
#-*-Coding:utf-8*-import sysreload (SYS) sys.setdefaultencoding (' utf-8 ') import Timeimport requeststime1=time.time () Import pandas as Pdimport jsoniname=[]icard=[]courtname=[]areaname=[]casecode=[]duty=[]performance=[] Disrupttypename=[]publishdate=[]def person_executed (name): For I in Range (0,30): Try:url= "https://sp0.baidu.com/ 8aqdcjqpaav3otqbppnn2djv/api.php?resource_id=6899 "\" &query=%e5%a4%b1%e4%bf%a1%e8%a2%ab%e6%89%a7%e8%a1%8c% E4%ba%ba%e5%90%8d%e5%8d%95 "\" &cardNum=& "\" Iname= "+STR (name) + \" &areaname= "\" &pn = "+str (i*10) + \" &rn=10 "\" &ie=utf-8&oe=utf-8&format=json "Html=requests.get (URL). Content Html_json=json.loads (HTML) html_data=html_json[' data ' for each in html_data:k=each[' result '] For each in K:print each[' iname '],each[' cardnum '],each[' courtname ', '],each[', ' areaname ',],each[' casecode '],each[' du Ty '],each[' performance '],each[' disrupttypename '],each[' pubLishdate '] iname.append (each[' iname ') icard.append (each[' cardnum ']) courtname.append (each[' cou Rtname ']) areaname.append (each[' areaname ') casecode.append (each[' Casecode ']) duty.append (each[ ' Duty ']) Performance.append (each[' performance ') Disrupttypename.append (each[' disrupttypename ']) Publishdate.append (each[' publishdate ')) except:passif __name__ = = ' __main__ ': name= "Guo * *" person_executed (name ) Print Len (iname) #################### #将数据组织成数据框 ########################### # DATA=PD. DataFrame ({"Name": Iname, "Idcard": Icard}) detail_data=pd. DataFrame ({"Name": Iname, "Idcard": Icard, "Courtname": Courtname, "AreaName": AreaName, "Casecode": Casecode, "Duty": Duty, "Performance":p erformance,\ "Disrupttypename":d isrupttypename, "Publishdate":p ublishdate}) ############ # # # # # # # # # #################################### # data1=data.drop_duplicates () # print Data1 # print len (data1) Detail_ Data1=detail_Data.drop_duplicates () # print Detail_data1 # print len (detail_data1) ######################## #写出数据到excel ############ ############################# PD. Dataframe.to_excel (detail_data1, "f:\\iname_icard_query.xlsx", header=true,encoding= ' GBK ', Index=False) time2= Time.time () print u ' OK, end of crawler! ' Print U ' total time: ' +str (time2-time1) + ' s '