Copy codeThe Code is as follows:
#-*-Coding: UTF-8 -*-
'''
Created on 2013-12-5
@ Author: good-temper
'''
Import urllib2
Import bs4
Import time
Def getPage (urlStr ):
'''
Get Page Content
'''
Content = urllib2.urlopen (urlStr). read ()
Return content
Def getNextPageUrl (currPageNum ):
# Http://list.jd.com/9987-653-655-0-0-0-0-0-0-0-1-1-page code -1-1-72-4137-33.html
Url = u 'HTTP: // list.jd.com/9987-653-655-0-0-0-0-0-0-0-1-'{str (currpagenum=1#'-1-72-4137-33.html'
# Whether the next page exists
Content = getPage (url );
Soup = bs4.BeautifulSoup (content)
List = soup. findAll ('span ', {'class': 'next-disabled '});
If (len (list) = 0 ):
Return url
Return''
Def analyzeList ():
PageNum = 0
List = []
Url = getNextPageUrl (pageNum)
While url! = '':
Soup = bs4.BeautifulSoup (getPage (url ))
Pagelist = soup. findAll ('div ', {'class': 'P-name '})
For elem in pagelist:
Soup1 = bs4.BeautifulSoup (str (elem ))
List. append (soup1.find ('A') ['href '])
PageNum = pageNum + 1
Print pageNum
Url = getNextPageUrl (pageNum)
Return list
Def analyzeContent (url ):
Return''
Def writeToFile (list, path ):
F = open (path, 'A ')
For elem in list:
F. write (elem + '\ n ')
F. close ()
If _ name _ = '_ main __':
List = analyzeList ()
Print 'capture all '+ str (len (list) + 'entries \ N'
WriteToFile (list, u'e: \ jd_phone_list.dat ');