#-*-Coding:utf-8-*-import urllib2import reimport osimport sqlite3import winsound# Open database File ppdai_db = Sqlite3.connect (R ' C:\Users\Jian fang\desktop\ppdai.db ') cursor = Ppdai_db.cursor () # Build Table # Cursor.execute (' DROP table IF EXISTS tradelog ') # Cursor.execute (' CREATE TABLE tradelog (user_id varchar), Money varchar (+), rate varchar (), date varchar, time VA Rchar (10)) # define Regular expression pattern = Re.compile (r "" "<tr>[^<]*" "" "" "<td>[^<]*" "" "" "<a\ W*href= '/user/[^>]*> (? p<user>[^<]*) </a>[^<]* "" "" "" </td>[^<]* "" "" "" <td>\s* "" " """ (? p<rate>\s*) [^<]* "" "" "" "</td>[^<]* "" "" "" <td>\s* """ """ (? p<amount>\s*) [^<]* "" "" "" "</td>[^<]* "" "" "" <td>\s* "" " """ (? p<date>\s*) \s* (? p<time>\s*) [^<]* "" "" "" "</td>[^<]* "" "" "" </tr> "" ", Re. VERBOSE | Re. MULTILINE) # Defines a lookup function that returns a dict type Def parse (URL): req = urllib2. Request (URL, None, {' user-agent ': ' mozilla/5.0 '}) #pretend to is a browser try:html = Urllib2.urlopen (req). Rea D () return [M.groupdict () for M in Pattern.finditer (HTML)] Except:return Nonepage_start = 226153page_en D = 300000index = 1try:for page_index in range (page_start,page_end): SName = '%d '%page_indexsurl = ' http://www.ppdai.com/list/' +sn AMEMAT1 = Parse (surl) print Snameif MAT1! = none:for x in mat1:# loop print out each of these elements if x[' amount '][6:]! = ' 0 ': index = index + 1CU Rsor.execute (' INSERT into Tradelog (user_id, money, rate, date, time) VALUES (?,?,?,?,?) ', (x[' user '), x[' Amount '][6:], x[ ' Rate '], x[' Date '], x[' time ')) if index = = 1000:index = 1ppdai_db.commit () # Save database print ' + Records has been submitted!! !!!!!‘ Ppdai_db.commit () # Save database print ' Jobes done! ' Except:print ' There is a error at ' +sname
Peer Crawler-PAT loan