This article mainly introduces a train ticket Transfer Information Collector implemented by python. The source of the collected information is 58 Tongcheng or Ganji. If you need it, please refer to it, I admit that I felt a headache when I saw a suitable ticket for transfer at night but called to say that I had been taken away. Directly upload the file.
# Coding: UTF-8 ''' Spring Festival train ticket transfer information Author: piglei2007@gmail.comDate: 1.01.01.25 ''' import reimport osimport timeimport urlparseimport datetimeimport tracebackimport urllib2import socketsocket. setdefatimetimeout (20) BLANK_RE = re. compile (r "\ s +") opener = urllib2.build _ opener (urllib2.HTTPCookieProcessor () opener. addheaders = [("User-agent", "Mozilla/5.0 (X11; U; FreeBSD i386; en-US; rv: 1.9.1) Gecko/20090704 Firefox/3.5 "), ("Accept", "*/*"),] urllib2.install _ opener (opener) from BeautifulSoup import BeautifulSoupSOURCE = {"58 ":" http://bj.58.com/huochepiao/?Num=% (Train) s & StartTime = % (date) s00 "," ganji ":" http://bj.ganji.com/piao/cc_% (Train) s/% (date) s/",} RECORD_FILE ="/tmp/ticket_records.txt "def parse_record (): try: return set ([x. strip () for x in open (RECORD_FILE, "r "). readlines ()]) handle T IOError: open (RECORD_FILE, "w") return set () def flush_record (records): open (RECORD_FILE, "w "). write ("\ n ". join (records) def main (config): "" Start capturing "existed = parse_record () to_email = [] for train config [" trains "]: for date in config ["d Ates "]: for type, _ url in SOURCE. items (): url = _ url % dict (train = train, date = date) content = urllib2.urlopen (url ). read () soup = BeautifulSoup (content) result = parse_content (type, soup, train) for url, text in result: url = urlparse. urljoin (_ url, url) # As long as the sleeper! If url not in existed and u "" in text: to_email.append ([text, url]) existed. add (url) if to_email: content = "". join ([x for x in ["| ". join (y) for y in to_email]). encode ("UTF-8") simple_mail (config ["people"], content) flush_record (existed) def parse_content (type, soup, train ): "Get Vehicle Information" "result = [] if type =" 58 ": info_table = soup. find ("table", id = "infolist") if info_table: for x in info _ Table. findAll ("tr", text = re. compile (ur "% s (?! Timetable) "% train, re. i): a = x. parent _ text = BLANK_RE.sub ("",. text) result. append ([a ["href"], _ text]) if type = "ganji": for x in soup. findAll ("dl", {"class": "list_piao"}): a = x. dt. a result. append ([a ["href"],. text]) return resultEMAIL_HOST = 'smtp .sohu.com 'EMAIL _ HOST_USER = 'yourname @ sohu.com' EMAIL _ HOST_PASSWORD = 'yourpassword' EMAIL _ PORT = 25def simple_mail (to, content ): "Send email" "import Smtplib from email. mime. text import MIMEText msgRoot = MIMEText (content, 'html', 'utf-8') msgRoot ['subobject'] = "[% s] You have a ticket !!!! "% Datetime. datetime. today (). isoformat ("") msgRoot ['from'] = EMAIL_HOST_USER msgRoot ['to'] = ",". join (to) s = smtplib. SMTP (EMAIL_HOST, EMAIL_PORT) s. login (EMAIL_HOST_USER, EMAIL_HOST_PASSWORD) s. sendmail (EMAIL_HOST_USER, to, msgRoot. as_string () s. close () def switch_time_zone (): "switch time zone" "OS. environ ["TZ"] = "Asia/Shanghai" time. tzset () switch_time_zone () if _ name _ = '_ main _': config = {"trains": ("k471",), "dates ": ("20110129",), "people": ("youremail@sohu.com",)} try: main (config) print "% s: OK" % datetime. datetime. today () failed t Exception, e: print traceback. format_exc ()
Put it in cron, you know.