First to download a thing called SWFToImage.dll.
Then create a bat file and run:
Copy Code code as follows:
COPY SWFToImage.dll%windir%\system32
regsvr32%windir%\system32\swftoimage.dll
Copy Code code as follows:
#用python下载百度文库的代码, need comrades please modify, below are prompted
#http://www.cnblogs.com/dearplain/
#code by Plain
Import Urllib2
Import Win32com.client
Import OS
Import Sys
If __name__== ' __main__ ':
#os. System (');
Os.chdir (' D:\my project\pywenku ') #保存到哪个文件夹
Swftoimage=win32com.client.dispatch ("{479a1aac-c148-40bb-9868-a9773da66af9}");
'''
Allfile=os.listdir (".")
Findrecord=0
For file in Allfile:
If file== ". Record":
Record=open (file, ' RW ')
Findrecord=1
Break
If findrecord==0:
Record=open ('. Record ', ' W ')
'''
#url = "Http://wenku.baidu.com/view/8d3ed840be1e650e52ea9938.html?from=rec&pos=1&weight=2&lastweight =2&count=5 "
#url = "Http://wenku.baidu.com/view/f2fe7a3987c24028915fc37a.html?from=related&hasrec=1"
#url就是你要下载的文档的地址
URL=SYS.ARGV[1]
If Url.find ("http://")!=0:
Print "error! The URL is not correct "
Sys.exit ()
print ' Downloading%s '%url
Try
Urlreferer=url[url.index (' http '): Url.index (' V ')]
Print Urlreferer
#urlbody =url[url.index ('/V ')-1:]
Urlnum=url[url.index (' ew/') +3:url.index ('. htm ')]
Except ValueError:
Print "Parse URL error"
Sys.exit ()
#print Urlnum
wenku= ' wenku.baidu.com '
Reurl= '/play/'
Pagefrom= '? pn= '
downnum= ' &rn= '
#try to get title and make Dir
Req=urllib2. Request (URL)
Res=urllib2.urlopen (req)
Data=res.read ()
Try
Sfrom=data.index (' <title> ') +len (' <title> ')
#print Sfrom
Sbefore=sfrom+data[sfrom:].index (' </title> ')
#print Sbefore
Title=data[sfrom:sbefore]
Title=title[:title.rindex ('_')]
print ' Downloading ' +title
Except ValueError:
Print "Get title error"
Sys.exit ()
Allfile=os.listdir (".")
if (title in Allfile) ==false:
Os.mkdir (title)
Os.chdir ('./' +title)
#get the
Req=urllib2. Request (' http://wenku.baidu.com ' +reurl+urlnum+pagefrom+ ' 1 ' +downnum+ ' 1 ')
Req.add_header ("Referer", Urlreferer)
Res=urllib2.urlopen (req)
Data=res.read ()
Res.close ()
HEAD=DATA[0:45]
Pagenum=0
Sfrom=head.index (' \ ": \") +len (' \ ": \")
Sbefore=sfrom+head[sfrom:].index (' \ ")
Pagenum=int (Head[sfrom:sbefore])
print ' Pagenum: ' +str (pagenum)
If Pagenum<=0 or pagenum>2000:
Print "Error!!! Pagenum<0 or pagenum>2000 "
Sys.exit ()
DATA=DATA[106:]
Swf=open ("1.pywenku", ' WB ')
Swf.write (data)
Swf.close ()
I=1
Swftoimage.inputswffilename= "%d.pywenku"%i
Swftoimage.imageoutputtype = 1
swftoimage.imagewidth=1048
swftoimage.imageheight=1478
Swftoimage.execute_begin ()
Swftoimage.frameindex = 1
Swftoimage.execute_getimage ()
Swftoimage.savetofile ("%d.jpg"%i)
Swftoimage.execute_end ()
Os.rename ("%d.pywenku"%i, "%d.swf"%i)
Allfile=os.listdir (".")
#从第二页下到最后一页
For I in Range (2,pagenum+1):
If '%d.swf '%i in Allfile:
Continue
#not Find in the Dir mean
Req=urllib2. Request (' http://wenku.baidu.com ' +reurl+urlnum+pagefrom+str (i) +downnum+ ' 1 ')
Res=urllib2.urlopen (req)
Data=res.read ()
DATA=DATA[106:]
Swf=open ("%d.pywenku"%i, ' WB ')
Swf.write (data)
Swf.close ()
Swftoimage.inputswffilename= "%d.pywenku"%i
Swftoimage.imageoutputtype = 1
Swftoimage.execute_begin ()
Swftoimage.frameindex = 1
Swftoimage.execute_getimage ()
Swftoimage.savetofile ("%d.jpg"%i)
Swftoimage.execute_end ()
Os.rename ("%d.pywenku"%i, "%d.swf"%i)
Res.close ()
print ' Task complete '