Analyze the html of all mp3 (regular expression matching) file URLs on the Douban music website page, and then use urllib. urlretrieve to directly download the file to the local device. the code for downloading the file using multithreading technology is as follows:
#! /Usr/bin/python2.7
# -- Coding: UTF-8 --
Import OS, urllib, urllib2, thread, threading
Import re
# Match music URLs
Reg = re. compile ('{"name": "(. + ?) ". +? "RawUrl": "(. + ?) ",. + ?} ', Re. I)
Class downloader (threading. Thread ):
Def _ init _ (self, url, name ):
Threading. Thread. _ init _ (self)
Self. url = url
Self. name = name
Def run (self ):
Print 'downloading from % s' % self. url
Urllib. urlretrieve (self. url, self. name)
Threads = []
# Multi-threaded file download
Def main (url ):
Response = urllib. urlopen (url)
Text = response. read ()
Groups = re. finditer (reg, text)
For g in groups:
Name = g. group (1). strip () + ". mp3"
Path = g. group (2). replace ('\\','')
T = downloader (path, name)
Threads. append (t)
T. start ()
If _ name _ = '_ main __':
Main ("http://site.douban.com/huazhou ")
For t in threads:
T. join ()