python完成網頁下載(包括圖片和css)

來源:互聯網
上載者:User
import urllib2import osfrom bs4 import BeautifulSoup import resiteUrls = " "url = "http://www.sina.com.cn"def getContent(url):content = urllib2.urlopen(url).read()content = writeCss(url,content)content = writefileName(url,content)fileNames = re.findall(r'/[^\?]*\?([^/|^\?]*)$',url)fileName = fileNames[0]print fileNamef = file(fileName+".html",'w')f.write(content)f.close()def writeCss(url,content):soup = BeautifulSoup(content)csss = soup.findAll('link',attrs={'type':'text/css'})css_pat = re.compile('.*/(.*)\.css')fileNames = re.findall(r'/[^\?]*\?([^/|^\?]*)$',url)fileName = fileNames[0]print fileNamefor css in csss:cssnames = re.findall(r'.*/(.*)\.css',str(css))cssurls = re.findall(r'.*href=\"([^\"]*)\"',str(css))#print cssnames[0]#print cssurls[0]cssurl = "http://review.artintern.net/" + cssurls[0]#print cssurlcontent = content.replace(cssurls[0],fileName + "/" + cssnames[0]+".css")print os.path.isdir(fileName)if not os.path.isdir(fileName):os.mkdir(fileName)csscontent = urllib2.urlopen(cssurl).read()cssNewName = fileName+"/"+cssnames[0]+".css"cssfile = file(cssNewName,'w')cssfile.write(csscontent)cssfile.close()return contentdef writefileName(url,content):soup = BeautifulSoup(content)imgs = soup.findAll('img')img_pat = re.compile('.*/(.*)\.[jpg|gif]')fileNames = re.findall(r'/[^\?]*\?([^/|^\?]*)$',url)fileName = fileNames[0]for img in imgs:imgNames = re.findall(r'.*/(.*)\.[jpg|gif]',str(img))imgType = re.findall(r'.*/.*\.([^ ]*)"',str(img))imgUrls = re.findall(r'.*src=\"([^\"]*)\"',str(img))#print imgNames[0]#print imgType[0]#print imgUrls[0]imgUrl = "http://review.artintern.net/" + imgUrls[0]#print imgUrlcontent = content.replace(imgUrls[0],fileName+"/"+imgNames[0]+"."+imgType[0])if not os.path.isdir(fileName):os.mkdir(fileName)imgContent = urllib2.urlopen(imgUrl).read()imgNewName = fileName+"/"+imgNames[0]+"."+imgType[0]imgfile = file(imgNewName,'w')imgfile.write(imgContent)imgfile.close()return contentgetContent(url)

相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.