Python simulate the landing Taobao and statistics Taobao consumption of code examples to share _python

Source: Internet
Author: User
Tags datetime urlencode pprint

Alipay, the number on the ten-year bill is a little scary, but it counts too many items, just want to see exactly how much on Taobao, so write a script, statistics any time period Taobao order consumption situation, see the results in fact Taobao on I still quite frugal said.
The main task of the script is to simulate the browser login, parse the "bought Baby" page to get the specified order and baby information.

Use the method see Code or EXECUTE command plus parameter-H, also need BEAUTIFULSOUP4 support, BeautifulSoup's official Project list page: https://www.crummy.com/software/BeautifulSoup/bs4/download/

First, the code uses the method:

Python taobao.py-u username-p password-s start-date-e end-date--verbose

All parameters are optional, such as:

Python taobao.py-u Jinnlynn 

Statistics User Jinnlynn All orders

Python taobao.py-s 2014-12-12-e 2014-12-12

Statistics User (user name will be required to enter when the command is executed) on the day of 2014-12-12 orders

Python taobao.py--verbose

This allows you to count and output order details.

Well, that's all we have to look at the code:

From __future__ import unicode_literals, Print_function, Absolute_import, Division import urllib Import urllib2 import ur Lparse Import cookielib Import re import sys import OS import JSON import subprocess import argparse import platform from Getpass Import Getpass from datetime import datetime from Pprint import pprint try:from bs4 import BeautifulSoup excep

T ImportError:sys.exit (' BeautifulSoup4 missing. ') __version__ = ' 1.0.0 ' __author__ = ' Jinnlynn ' __copyright__ = ' Copyright (c) 2014 Jinnlynn ' __license__ = ' the MIT license ' HEADERS = {' X-requestted-with ': ' XMLHttpRequest ', ' accept-language ': ' ZH-CN ', ' accept-encoding ': ' gzip, DEFLA Te ', ' ContentType ': ' application/x-www-form-urlencoded; Chartset=utf-8 ', ' cache-control ': ' No-cache ', ' user-agent ': ' mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) applewebkit/537.36 (khtml, like Gecko) chrome/40.0.2214.38 safari/537.36 ', ' Connection ': ' Keep-a Live '} default_post_data = {' Tpl_username ': ', #用户名 ' TPL_password ': ', #密码 ' tpl_checkcode ': ', ' Need_check_code ': ' false ', ' callback ': ' 0 ', # have value return JSON} # Invalid order status INV Alid_order_states = [' Create_closed_of_taobao ', # Cancel ' trade_closed ', # Order closed] Login_url = ' https://login.taobao.com/ member/login.jhtml ' raw_imput_encoding = ' GBK ' if platform.system () = = ' Windows ' Else ' utf-8 ' def _request (URL, data, ME Thod= ' POST '): If Data:data = Urllib.urlencode (data) If method = = ' Get ': if Data:url = ' {}? {} '. Format (URL, data) data = None # print (URL) # print (data) req = Urllib2. Request (URL, data, HEADERS) return Urllib2.urlopen (req) def STDOUT_CR (msg= '): Sys.stdout.write (' \r{:10} '. Format (") ) Sys.stdout.write (' \r{} '. Format (msg)) Sys.stdout.flush () def get (URL, data=none): Return _request (URL, data, Metho D= ' Get '] def post (URL, data=none): Return _request (URL, data, method= ' Post ') def login_post (data): Login_data = DEFA Ult_post_data login_data.update (DATA) res = POST (Login_url, Login_datA) return Json.load (res, encoding= ' GBK ') def login (usr, pwd): data = {' Tpl_username ': Usr.encode (' utf-8 ' if PLA Tform.system () = = ' Windows ' Else ' GB18030 '), ' Tpl_password ': pwd} # 1. Attempt to log on to RET = login_post (data) while not ret.get (' state ', False): Code = ret.get (' data ', {}). Get (' code ', 0) if Co De = = 3425 or code = = 1000:print (' INFO: {} '. Format (ret.get (' message ')) Check_code = Checkcode (' Data ' , {}). Get (' Ccurl ') data.update ({' Tpl_checkcode ': Check_code, ' Need_check_code ': ' true '}) ret = Login_post (d ATA) else:sys.exit (' ERROR. Code: {}, message:{} '. Format (code, ret.get (' message ', ')) token = ret.get (' Data ', {}]. Get (' token ') print (' LOGIN SUCCESS. Token: {} '. Format (token)) # 2. REDIRECT # 2.1 St Value res = get (' Https://passport.alipay.com/mini_apply_st.js ', {' site ': ' 0 ', ' token ': token, '
Callback ': ' StCallback4 '} content = Res.read () st = Re.search (R ' "St": "(\s*)" (|}) ', content). Group (1)  # 2.1 redirect Get (' http://login.taobao.com/member/vst.htm ', {' st ': St, ' tpl_uesrname ': Usr.encode (' GB18030 ')}) def Checkcode (URL): filename, _ = Urllib.urlretrieve (URL) if not filename.endswith ('. jpg '): old_fn = filename File Name = ' {}.jpg '. Format (filename) os.rename (OLD_FN, filename) if platform.system () = = ' Darwin ': # Mac Direct preview hit Open Subprocess.call ([' Open ', filename]) elif platform.system () = = ' Windows ': # Windows execution file opens subprocess with the default program. Call (filename, shell=true) Else: # Other system output file name print (' Open the file for authentication code: {} '. Format (filename)) return raw_input (' Input captcha : '. Encode (raw_imput_encoding)) def parse_bought_list (Start_date=none, end_date=none): url = ' http:// Buyer.trade.taobao.com/trade/itemlist/list_bought_items.htm ' # Freight insurance Value-added Service subsection payment (deposit, end) Extra_service = [ ' Freight-info ', ' service-info ', ' Stage-item '] STDOUT_CR (' Working ... {:. 0%} '. Format (0)) # 1. Resolves the first page res = urllib2.urlopen (URL) soup = BeautifulSoup (Res.read (). Decode (' GBK ') # 2. Get page Correlation page_jump = soup.find (' span ', id= ' j_jumpto ') Jump_url = page_jump.attrs[' Data-url '] url_parts = URLPARSE.URLP
  Arse (jump_url) Query_data = Dict (URLPARSE.PARSE_QSL (url_parts.query)) total_pages = Int (query_data[' tPage ']) # parsing orders = [] Cur_page = 1 Out_date = False errors = [] While true:bought_items = Soup.find_all (' tbody ', attrs
      ={' Data-orderid ': True}) # Pprint (Len (bought_items)) Count = 0 for item in Bought_items:count + 1 # Pprint (' {}.{} '. Format (cur_page, count)) Try:info = {} # The number of places on the page where the order is placed. Sort numbers info[' pos ' = ' {} '. {} '. Format (cur_page, count) info[' orderid '] = item.attrs[' Data-orderid '] info[' status ' = item.attrs[' data -status '] # Store node = item.select (' Tr.order-hd a.shopname ') If not node: # Store does not exist, may be complimentary lottery
     Orders, ignoring # print (' Ignore ') continue info[' shop_name ' = node[0].attrs[' title '].strip ()   info[' shop_url '] = node[0].attrs[' href '] # date node = Item.select (' Tr.order-hd span.dealtime ') [0] info[' Date '] = Datetime.strptime (node.attrs[' title '), '%y-%m-%d%h:%m ') if end_date and info[' Date '].toordinal ( ) > End_date.toordinal (): Continue if start_date and info[' Date '].toordinal () < Start_date.toordi NAL (): Out_date = True Break # Baby Baobei = [] node = Item.find_all (' tr ', Clas s_= ' ORDER-BD ') # Pprint (Len (node)) for n in node:try:bb = {} if [True For ex in Extra_service if ex in n.attrs[' class ']: # Extra Service Processing # print (' Extra service processing ') n
              Ame_node = N.find (' TD ', class_= ' Baobei ') # Baby address bb[' name ' = Name_node.text.strip () bb[' url ' = ' bb[' spec '] = ' # Baby snapshot bb[' snapshot ' = ' # baby Price bb[' priCe ' = 0.0 # baby number bb[' quantity ' = 1 bb[' is_goods '] = False try: bb[' url '] = Name_node.find (' a '). attrs[' href '] bb[' price ' = float (n.find (' TD ', class_= ' pric E '). Text Except:pass Else:name_node = N.select (' P.baobei-name a # babe Address bb[' name ' = Name_node[0].text.strip () bb[' url ' = name_node[0].attrs[ ' href '] # baby snapshot bb[' snapshot ' = ' If Len (name_node) > 1:bb[ 
              ' Snapshot '] = name_node[1].attrs[' href '] # Baby spec bb[' spec ' = N.select ('. Spec ') [0].text.strip ()
              # Baby Prices bb[' price ' = float (n.find (' TD ', class_= ' value '). attrs[' title ') # Baby number
            bb[' Quantity ' = Int (n.find (' TD ', class_= ' quantity '). attrs[' title '] bb[' is_goods '] = True Baobei.append (bb) # Try to get the real payment # the node on which the real payment is made may span multiple tr td Amount_node = N.select (' Td.amount em.real-price ')
            If amount_node:info[' Amount ' = float (amount_node[0].text) except Exception as E: errors.append {' type ': ' Baobei ', ' id ': ' {}.{} '.
      Format (cur_page, count), ' node ': ' {} '. Format (n), ' error ': ' {} '. Format (e)}) Except Exception as E:errors.append ({' type ': ' Order ', ' id ': ' {}.{} '). Format (cur_page, count), ' node ': ' {} '. Format (item), ' ERROR ': ' {} '. Format (e)}) info[' Baobei '] = Baobei orders.append (info) STDOUT_CR (' Working ... {:. 0%} '. Format (cur_page/total_pages) # next page Cur_page + + 1 if cur_page > total_pages or Out_date:b Reak query_data.update ({' Pagenum ': cur_page}) Page_url = ' {}? {} '. Format (URL, Urllib.urlencode (query_data)) res = UrlliB2.urlopen (page_url) soup = BeautifulSoup (Res.read (). Decode (' GBK ')) STDOUT_CR () If Errors:print (' INFO. Error occurred , the statistical results may not be accurate. # pprint (Errors) return orders def output (orders, start_date, end_date): Amount = 0.0 org_amount = 0 Baobe I_count = 0 Order_count = 0 invaild_order_count = 0 for order in orders:if order[' status ' in Invalid_order_sta Tes:invaild_order_count = 1 Continue amount + = order[' Amount '] order_count + = 1 for Baobei in or Der.get (' Baobei ', []): If not baobei[' is_goods ': Continue org_amount + = baobei[' price '] * baobei[' qua Ntity '] Baobei_count + + = baobei[' quantity '] print (' {: <9} {} '. Format (' Cumulative consumption: ', amount) ' Print (' {: <9} {}/{} '). Format (' Order/Baby: ', Order_count, Baobei_count)) if Invaild_order_count:print (' {: <9} {} (return or cancel, etc., not within the above order) '. Format (' Invalid order: ', Invaild_order_count)] Print (' {: <7} {} '. Format (' Baby original Total Price: ', Org_amount) ') print (' {: <7} {:. 2f} '. Format (') Baby average price: ', 0 if BaoBei_count = = 0 Else org_amount/baobei_count)) print (' {: <9}} {} ({:. 2%}) '. Format (' Saved (?) : ', Org_amount-amount, 0 if Org_amount = 0 Else (org_amount-amount)/Org_amoun
  T)) From_date = start_date if start_date else orders[-1][' Date '] to_date = end_date if end_date else DateTime.Now () Print (' {: <9} {:%y-%m-%d}-{:%y-%m-%d} '. Format (' Statistical interval: ', from_date, to_date) ' If not Start_date:print (' {: <9} { :%y-%m-%d%h:%m} '. Format (' Prodigal starts with: ', orders[-1][' Date ')) def ouput_orders (orders): Print (' All orders: ') if not ORDERS:PR Int ('--') return for order in Orders:print (' {:-^20} '. Format (') ') print (' * Order number: {OrderID} Real Payment: {amount}
        Shop: {Shop_name} time: {date:%y-%m-%d%h:%m} '. Format (**order)) for BB in order[' Baobei ': If not bb[' Is_goods ']: Continue print ('-{name} '. Format (**BB)) if bb[' spec ': print (' {spec} '. Format (**BB)) p Rint (' {price} X {quantity} '. Format (**BB)) def MaiN (): parser = Argparse. Argumentparser (prog= ' python {} '. Format (__file__)) parser.add_argument ('-u ', '--username ', help= ' Taobao username ') parser . add_argument (' P ', '--password ', help= ' taobao password ') parser.add_argument (' s ', '--start ', help= ' start time, optional, format such as: 2014-11-11 ') p Arser.add_argument (' e ', '--end ', help= ' end time, optional, format such as: 2014-11-11 ') parser.add_argument ('--verbose ', action= ' store_ True ', Default=false, help= ' Order detail output ') parser.add_argument ('-V ', '--version ', action= ' version ', version= ' v{} '. Format (__version__), help= ' version number ') args = Parser.parse_args () usr = args.username if not usr:usr = raw_input (' Input Taobao user name: '. Encode (raw_imput_encoding)) usr = usr.decode (' utf-8 ') # Chinese input question pwd = Args.password if not pwd:if PLATF Orm.system () = = ' windows ': # There is a problem with the Chinese output under windows pwd = Getpass () else:pwd = Getpass (' Enter Taobao password: '. Encode (' Utf-8 ')) pwd = Pwd.decode (' utf-8 ') verbose = args.verbose start_date = None if Args.start:try:start _date = Datetime.strptime (Args.start, '%y-%m-%d ') except Exception as E:sys.exit (' ERROR. {} '. Format (e)) end_date = None if args.end:try:end_date = Datetime.strptime (args.end, '%y-%m-%d ') exc EPT Exception as E:sys.exit (' ERROR. {} '. Format (e)) if start_date and end_date and start_date > End_date:sys.exit (' ERROR, end date must be later than or equal to start date ') Cj_fi Le = './{}.tmp '. Format (usr) CJ = cookielib. Lwpcookiejar () try:cj.load (cj_file) Except:pass opener = Urllib2.build_opener (urllib2. Httpcookieprocessor (CJ), Urllib2.  HttpHandler) Urllib2.install_opener (opener) login (usr, pwd) try:cj.save (cj_file) Except:pass orders = Parse_bought_list (start_date, end_date) output (orders, start_date, end_date) # Export Order Details if Verbose:ouput_orde
 RS (orders) if __name__ = = ' __main__ ': Main ()

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.