Well, I admit that I was looking at the night to see a suitable ticket transfer but called to say that has been taken away this thing feels the egg hurts. Go directly to the file.
#coding: Utf-8 ' "' Spring Transportation Inquiry train ticket transfer information author:piglei2007@gmail.com date:2011.01.25 ' import re import OS import time import u Rlparse Import datetime Import traceback import urllib2 import socket Socket.setdefaulttimeout (m) blank_re = Re.compile ( R "\s+") opener = Urllib2.build_opener (urllib2. Httpcookieprocessor ()) opener.addheaders = [("User-agent", "mozilla/5.0" (X11; U FreeBSD i386; En-us; rv:1.9.1) gecko/20090704 firefox/3.5 "), (" Accept "," */* "),] Urllib2.install_opener (opener) from BeautifulSoup import B Eautifulsoup SOURCE = {"http://bj.58.com/huochepiao/": " num=% (train) s&starttime=% (date) s00 ", Ganji": "http://bj.ganji.com/piao/cc_% (train) s/% (date) s/",} record_file = "/tmp/ticket_records.txt" Def Parse_record (): Try:return set ([X.strip () for x in open (Record_file, "R"). ReadLine S ()]) except Ioerror:open (Record_file, "W") Return set () def Flush_record (Records): Open (Record_file, "W"). W
Rite ("\ n". Join (Records)) def main (config): "" "Start crawl "" "existed = Parse_record () To_email = [] for train in config[' trains ']: for date in config[" dates "]: F or type, _url in Source.items (): url = _url% dict (train=train, date=date) content = Urllib2.urlopen (URL). Read () soup = beautifulsoup (content) result = Parse_content (type, soup, train) for URL, text in re
Sult:url = Urlparse.urljoin (_url, URL) # Just a sleeper!
If URL not in existed and U "lie" in Text:to_email.append ([text, url]) existed.add (URL) if To_email: Content = "". Join ([x for x in [' |]. Join (y) for y in To_email]]). Encode ("Utf-8") Simple_mail (config["People"], content) Flush_record (existed) def Parse_content (type, soup, train): "" "" "to get the train information" "" result = [] if type = = "": info_table = soup.find ("table" , id= "Infolist") if Info_table:for x in Info_table.findall ("tr", Text=re.compile (Ur "%s"). Timetable) "% train, re. I): a = X.pareNT _text = Blank_re.sub ("", A.text) result.append ([a["href"], _text]) if type = = "Ganji": for X in S Oup.findall ("DL", {"Class": "List_piao"}): A = X.dt.a result.append ([a["href"], A.text]) return result Emai L_host = ' smtp.sohu.com ' email_host_user = ' yourname@sohu.com ' Email_host_password = ' yourpassword ' Email_port = def s Imple_mail (To, content): "" "" "" "" "" "the import Smtplib from email.mime.text import mimetext msgroot = Mimetext ( Content, ' HTML ', ' UTF-8 ') msgroot[' Subject ' = ' [%s] there's a ticket,!!!!. "% Datetime.datetime.today (). Isoformat (" ") msgroot[' from '] = Email_host_user msgroot[' to '] =", ". Join (TO) s = SM Tplib. SMTP (Email_host, Email_port) s.login (Email_host_user, Email_host_password) S.sendmail (Email_host_user, to, Msgroot.as_string ()) s.close () def switch_time_zone (): "" Switch Time Zone "" "os.environ[" TZ "] =" Asia/shanghai "time.t Zset () Switch_time_zone () if __name__ = = ' __main__ ': config = {"Trains": ("k471",), "dates": ("20110129",), "People": ("youremail@sohu.com",)} try:main (config) print% S:ok "% datetime.datetime.today () except Exception, E:print Traceback.format_exc ()
And then put it in cron, you know.