Background:
Because to buy a ticket, so have been searching, reptiles can help me solve this problem; using Python to crawl Ctrip ticket information process (Part one)
Explained in super detail.
So through this process, a basic understanding of some; Check out the tickets for Shanghai to Xian 4.29~05.02:
#coding: Utf-8 import urllib2 from lxml import etree import JSON import random import sys reload (SYS) Sys.setdefaultencodi Ng (' UTF8 ') def get_json2 (date,rk,ck,r): ' Get flight data based on constructed URL ' url= http://flights.ctrip.com/domesticsearch/sear Ch/searchfirstrouteflights? dcity1=sha&acity1=sia&searchtype=s&ddate1=%s&isnearairportrecommond=0&rk=%s&ck=%s& r=%s "% (date,rk,ck,r) headers={' Host ':" flights.ctrip.com ", ' user-agent ':" mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.0) gecko/20100101 firefox/45.0 ", ' Referer ':" http://flights.ctrip.com/booking/hrb-sha-day-1.html?ddate1= 2017-04-29 "} headers[' Referer ']=" http://flights.ctrip.com/booking/hrb-sha-day-1.html?ddate1=%s "%date req=urllib2 . Request (url,headers=headers) res=urllib2.urlopen (req) content=res.read () dict_content=json.loads (content,encod ing= "gb2312") length = Len (dict_content[' FIS ']) # print Length i = 0 for-I in range (length): if ( (dict_content[' FIS '][i][u ' LP ']) < : Print (dict_content[' FIS '][i][u ' LP ']), print (dict_content[' fis '][i][u ' dt ')), p
Rint (dict_content[' fis '][i][u ' at ') "#print (dict_content[' fis '"][i][u ' ' Dpbn ']) def get_parameter (date): "' Get Important parameters Date: date, format example: 2016-05-13 ' url= ' http://flights.ctrip.com/booking/hrb-sha-day-1.html?ddate1=%s '%d Ate Res=urllib2.urlopen (URL). Read () Tree=etree. HTML (res) Pp=tree.xpath ('//body/script[1]/text () ') [0].split () ck_original=pp[3][-34:-2] Ck=ck_original[0:5 ]+CK_ORIGINAL[13]+CK_ORIGINAL[5:13]+CK_ORIGINAL[14:] rk=pp[-1][18:24] num=random.random () *10 num_str= "%.15f"% Num RK=NUM_STR+RK R=pp[-1][27:len (pp[-1])-3] return rk,ck,r if __name__== ' __main__ ': dates=[' 2017-04-29 ' , ' 2017-04-30 ', ' 2017-05-01 ', ' 2017-05-02 '] for date in Dates:rk,ck,r=get_parameter (date) get_json2 (dat E,RK,CK,R) print "-----"
Send mail program (I found the source to put it up):
#-*-Coding:utf-8-*-from
email import encoders to
email.header import header from
email.mime.text import Mimetext
from email.utils import parseaddr, formataddr
import smtplib
def _format_addr (s):
name, addr = PARSEADDR (s) return
formataddr (\
Header (name, ' Utf-8 '). Encode (), \
addr.encode (' utf-8 ') if isinstance (addr, Unicode) Else addr))
from_addr = Raw_input (' From: ')
password = raw_input (' Password: ')
to_addr = R Aw_input (' to: ')
smtp_server = raw_input (' SMTP server: ')
msg = Mimetext (' Not just fly fight ... ', ' plain ', ' utf- 8 '
msg['] = _format_addr (U ' Air <%s> '% from_addr)
msg[' to '] = _format_addr (U ' 126. Air <%s> '% to_addr)
msg[' Subject '] = Header (U ' flight ... ', ' utf-8 '). Encode ()
server = Smtplib. SMTP (smtp_server)
server.set_debuglevel (1) # officially changed to 0.
server.login (from_addr, password)
Server.sendmail (FROM_ADDR, [to_addr], msg.as_string ())
server.quit ()
The
Combines query information with the process of sending mail, which is probably the case,
#!/usr/bin/python #-*-coding:utf-8-*-import urllib2 from lxml import etree import JSON import random from email impo RT encoders from Email.header Import header to Email.mime.text import mimetext from email.utils import parseaddr, format
Addr Import smtplib Import sys reload (SYS) sys.setdefaultencoding (' utf8 ') from_addr = "****@126.com" #raw_input (' From: ') Password = "Hu Jintao" #raw_input (' Password: ') to_addr = "******** @qq. com" #raw_input (' to: ') Smtp_server = "Smtp.126.com" # Raw_input (' SMTP server: ') def _format_addr (s): name, addr = parseaddr (s) return formataddr (\ Header (NA Me, ' utf-8 '). Encode (), \ Addr.encode (' Utf-8 ') if isinstance (addr, Unicode) Else addr) def get_json2 (date,rk,ck,r ): "" "url=" http://flights.ctrip.com/domesticsearch/search/SearchFirstRouteFlights, according to the constructed URL, to the flight data? dcity1=sha&acity1=sia&searchtype=s&ddate1=%s&isnearairportrecommond=0&rk=%s&ck=%s& r=%s "% (date,rk,ck,r) headers={' Host ':" FlighTs.ctrip.com ", ' user-agent ':" mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.0) gecko/20100101 firefox/45.0 ", ' Referer ':" http://flights.ctrip.com/booking/hrb-sha-day-1.html?ddate1= 2017-04-29 "} headers[' Referer ']=" http://flights.ctrip.com/booking/hrb-sha-day-1.html?ddate1=%s "%date req=urllib2 . Request (url,headers=headers) res=urllib2.urlopen (req) content=res.read () dict_content=json.loads (content,encod ing= "gb2312") length = Len (dict_content[' FIS ']) # print Length i = 0 for-I in range (length): if ( (dict_content[' FIS '][i][u ' LP ']) <: print (dict_content[' FIS '][i][u ' LP ')), print (Dict_con tent[' fis '][i][u ' DT ']), print (dict_content[' fis '][i][u ' at ']), print (dict_content[' fis '][i][u ' N ']) if ((dict_content[' FIS '][i][u ' LP ')) <= 450): msg = Mimetext (('%r at%s '% () (di ct_content[' FIS '][i][u ' LP ']), (dict_content[' fis '][i][u ' DT ']), (dict_content[' fis ' "][i][u ')")), ' plain ', ' utf-8 ') msg[' from '] = _format_addr (U ' Air <%s> '% from_addr) msg[' to '] = _ FORMAT_ADDR (U ' 126. Air <%s> '% to_addr) msg[' Subject '] = Header (u ' flight...%r '% (dict_content[' FIS '][i][u ' LP ')), ' Utf-8 ') . Encode () Server = Smtplib.
SMTP (smtp_server) server.set_debuglevel (0) server.login (from_addr, password)
Server.sendmail (FROM_ADDR, [to_addr], msg.as_string ()) server.quit () def get_parameter (date): "' Get Important parameters Date: date, format example: 2016-05-13 ' url= ' http://flights.ctrip.com/booking/hrb-sha-day-1.html?ddate1=%s '% Date Res=urllib2.urlopen (URL). Read () Tree=etree. HTML (res) Pp=tree.xpath ('//body/script[1]/text () ') [0].split () ck_original=pp[3][-34:-2] Ck=ck_original[0:5 ]+CK_ORIGINAL[13]+CK_ORIGINAL[5:13]+CK_ORIGINAL[14:] rk=pp[-1][18:24] num=random.random () *10 num_str= "%.15f"% Num RK=NUM_STR+RK r=pP[-1][27:len (Pp[-1])-3] return rk,ck,r if __name__== ' __main__ ': dates=[' 2017-04-29 ', ' 2017-04-30 ', ' 2017-05-01 ']
For date in Dates:rk,ck,r=get_parameter (date) get_json2 (date,rk,ck,r) print "-----"
Then use Crontab to do a timed task, every 20mins to execute once;
Basic format:
* * * * command
So
0,20,40 * * * python ~/test.py
Although there are many problems, are studying.