#Coding=utf-8ImportOSImportUrllibImportUrllib2ImportRe fromBs4ImportBeautifulSoup#download using the Urllib.urlretrieve () function. Very convenientImportSocket#Timeout periodSocket.setdefaulttimeout (5)#an empty folderBasedir=r"E:\spring". Decode ('Utf-8') Os.chdir (basedir) host="http://www.springframework.org"defgetfoldername (URL): Pattern= Re.compile (r'.*/(.*)') Match=pattern.match (URL)#returns the last/subsequent character ifmatch:returnMatch.group (1)#storing a collection of springList = ['Http://www.springframework.org/schema/beans', 'HTTP://WWW.SPRINGFRAMEWORK.ORG/SCHEMA/AOP', 'Http://www.springframework.org/schema/mvc', 'http://www.springframework.org/schema/p', 'Http://www.springframework.org/schema/context', 'Http://www.springframework.org/schema/tx' ]#get the file name on the pagedefgetfilesbyurl (URL):#Return to listlist=[] Try: HTML=urllib2.urlopen (URL)exceptException as err:PrintErr Soup= BeautifulSoup (HTML,"lxml") forLinkinchSoup.find_all ('a'): FileName= link['href'] if notFilename.startswith ('/') and(Filename.endswith ('xsd')orFilename.endswith ('/'): List.append (fileName)returnlistlist= Getfilesbyurl ("Http://www.springframework.org/schema")PrintList forUrlinchlist:PrintUrl[:-1] URL="http://www.springframework.org/schema/"+url[:-1] Folder=getfoldername (URL)Printfolderif notos.path.exists (folder): Os.mkdir (folder) files=getfilesbyurl (URL)PrintFiles forFileNameinchFiles:Try: Printurl+"/"+fileName urllib.urlretrieve (URL+"/"+filename,folder+"/"+fileName)exceptException as err:PrintErr
Python bulk download of spring XSD