python實現的一個火車票轉讓資訊採集器

來源:互聯網
上載者:User
好吧,我承認我是對晚上看到一張合適的票轉讓但打過電話去說已經被搞走了這件事情感到蛋疼。直接上檔案吧。

#coding: utf-8'''春運查詢火車票轉讓資訊Author: piglei2007@gmail.comDate: 2011.01.25'''import reimport osimport timeimport urlparseimport datetimeimport tracebackimport urllib2import socketsocket.setdefaulttimeout(20)BLANK_RE = re.compile(r"\s+")opener = urllib2.build_opener(urllib2.HTTPCookieProcessor())opener.addheaders = [  ("User-agent", "Mozilla/5.0 (X11; U; FreeBSD i386; en-US; rv:1.9.1) Gecko/20090704 Firefox/3.5"),  ("Accept", "*/*"),]urllib2.install_opener(opener)from BeautifulSoup import BeautifulSoupSOURCE = {  "58": "http://bj.58.com/huochepiao/?Num=%(train)s&StartTime=%(date)s00",  "ganji": "http://bj.ganji.com/piao/cc_%(train)s/%(date)s/",}RECORD_FILE = "/tmp/ticket_records.txt"def parse_record():  try:    return set([x.strip() for x in open(RECORD_FILE, "r").readlines()])  except IOError:    open(RECORD_FILE, "w")    return set()def flush_record(records):  open(RECORD_FILE, "w").write("\n".join(records))def main(config):  """  開始抓取  """  existed = parse_record()  to_email = []  for train in config["trains"]:    for date in config["dates"]:      for type, _url in SOURCE.items():        url = _url % dict(train=train, date=date)        content = urllib2.urlopen(url).read()        soup = BeautifulSoup(content)        result = parse_content(type, soup, train)        for url, text in result:          url = urlparse.urljoin(_url, url)          # 只要臥鋪!          if url not in existed and u"臥" in text:            to_email.append([text, url])          existed.add(url)  if to_email:    content = "".join(      [x for x in [" | ".join(y) for y in to_email]]    ).encode("utf-8")    simple_mail(config["people"], content)  flush_record(existed)def parse_content(type, soup, train):  """  獲得車次資訊  """  result = []  if type == "58":    info_table = soup.find("table", id="infolist")    if info_table:      for x in info_table.findAll("tr", text=re.compile(ur"%s(?!時刻表)" % train, re.I)):        a = x.parent        _text = BLANK_RE.sub("", a.text)        result.append([a["href"], _text])  if type == "ganji":    for x in soup.findAll("dl", {"class": "list_piao"}):      a = x.dt.a      result.append([a["href"], a.text])  return resultEMAIL_HOST = 'smtp.sohu.com'EMAIL_HOST_USER = 'yourname@sohu.com'EMAIL_HOST_PASSWORD = 'yourpassword'EMAIL_PORT = 25def simple_mail(to, content):  """  發送郵件  """  import smtplib  from email.mime.text import MIMEText  msgRoot = MIMEText(content, 'html', 'UTF-8')  msgRoot['Subject'] = "[%s]有票來啦!!!!" % datetime.datetime.today().isoformat(" ")  msgRoot['From'] = EMAIL_HOST_USER  msgRoot['To'] = ", ".join(to)  s = smtplib.SMTP(EMAIL_HOST, EMAIL_PORT)  s.login(EMAIL_HOST_USER, EMAIL_HOST_PASSWORD)  s.sendmail(EMAIL_HOST_USER, to, msgRoot.as_string())  s.close()def switch_time_zone():  """  切換時區  """  os.environ["TZ"] = "Asia/Shanghai"  time.tzset()switch_time_zone()if __name__ == '__main__':  config = {    "trains": ("k471",),    "dates": ("20110129",),    "people": (      "youremail@sohu.com",    )  }  try:    main(config)    print "%s: ok" % datetime.datetime.today()  except Exception, e:    print traceback.format_exc()

然後放入cron,你懂的。

  • 聯繫我們

    該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

    如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

    A Free Trial That Lets You Build Big!

    Start building with 50+ products and up to 12 months usage for Elastic Compute Service

    • Sales Support

      1 on 1 presale consultation

    • After-Sales Support

      24/7 Technical Support 6 Free Tickets per Quarter Faster Response

    • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.