Crawling Web Images Using python
Import reimport stringimport sysimport osimport urlliburl = "http://tieba.baidu.com/p/2521298181" # This is a post address imgcontent = urllib. urlopen (url ). read () # capture webpage content reg = r'src = "(. +? \. Jpg) "pic_ext 'imgre = re. compile (reg) urllist = imgre. findall (imgcontent) # urllist = re. findall (r 'src =" (http. +? \. Jpg) "', imgcontent, re. i) # extract the image link if not urllist: print 'not found... 'else: # download the image and save it to the pythonimg folder in the current directory. filepath = OS. getcwd () + '\ pythonimg' if OS. path. exists (filepath) is False: OS. mkdir (filepath) x = 1 print U' crawler ready... 'for imgurl in urllist: temp = filepath +' \ developers.jpg '% xprint U' is downloading the % s image' % xprint imgurlurllib. urlretrieve (imgurl, temp) x + = 1 print U' after the image is downloaded, the SAVE path is '+ filepath