Initial Python2 notation: #!/usr/bin/env python
#-*-coding:utf-8-*-
#-*-author:nancy-*-
# Python2 Crawl all background images of Bing homepage
Import urllib,re,sys,OS
def Get_bing_backphoto():
if (os. Path. Exists(' photos ') = = False):
Os. mkdir(' photos ')
for I in range(0,£):
URL = ' http://cn.bing.com/HPImageArchive.aspx?format=js&idx= '+str(i)
+' &n=1&nc=1361089515117&form=hylh1 '
HTML = urllib. Urlopen(url). Read()
if html = = ' null ':
Print ' Open & Read Bing error! '
SYS. Exit(-1)
Reg = re. Compile(' "url": "(. *?)", "urlbase" ',re. S)
Text = re. FindAll(reg,html)
#http://s.cn.bing.net/az/hprichbg/rb/longji_zh-cn8658435963_1366x768.jpg
for Imgurl in text:
Right = imgurl. Rindex('/')
Name = Imgurl. Replace(imgurl[:right+1],")
Savepath = ' photos/'+ Name
Urllib. Urlretrieve(imgurl, savepath)
Print name + ' Save success! '
Get_bing_backphoto()
Python3 and Python2 error adjustment: Typeerror:can ' t use a string pattern on a Bytes-like object because Python3 findall data type with bytes type, so in regular expressions html = Html.decode (' utf-8 ') should be added Before. "attributeerror: ' Module ' object has no attribute ' Urlopen '" because the Urllib module in Python3 has changed, and the urllib here should be changed to Urllib.request. Since the image JSON format for the external interface of Bing Pictures has changed, the import format of the Python Third-party Library has changed, so the code is adjusted as Follows:
#!/usr/bin/env python
#-*-coding:utf-8-*-
#-*-author:nancy-*-
# Python3 Crawl All background images of Bing homepage
import urllib. Request,re,sys,OS
def get_bing_backphoto():
if (os. Path. Exists(' photos ') = = False):
Os. mkdir(' photos ')
for I in range(0,ten):
URL = 'http://cn.bing.com/HPImageArchive.aspx?format=js&idx='+str (i) +' &n=1&nc=1361089515117&form=hylh1 '
HTML = urllib. Request. Urlopen(url). Read()
if html = = ' null ':
Print ( ' Open & read Bing error! ' )
SYS. Exit(-1)
HTML = html. Decode(' utf-8 ')
HTML = html. Replace('/az/','http://s.cn.bing.net/az/')
Reg = re. Compile(' "url": "(. *?)", "urlbase" ',re. S)
Text = re. FindAll(reg,html)
for Imgurl in text :
Right = imgurl. Rindex('/')
Print (imgurl)
Name = Imgurl. Replace(imgurl[:right+1],")
Savepath = ' photos/'+ name
Urllib. Request. Urlretrieve(imgurl, savepath)
Print (name + ' Save success! ') )
Get_bing_backphoto()
Python crawl Bing Home background image