Download the code for Baidu Library in Python _perl

Source: Internet
Author: User
First to download a thing called SWFToImage.dll.
Then create a bat file and run:
Copy Code code as follows:

COPY SWFToImage.dll%windir%\system32
regsvr32%windir%\system32\swftoimage.dll

Copy Code code as follows:

#用python下载百度文库的代码, need comrades please modify, below are prompted
#http://www.cnblogs.com/dearplain/
#code by Plain
Import Urllib2
Import Win32com.client
Import OS
Import Sys

If __name__== ' __main__ ':
#os. System (');
Os.chdir (' D:\my project\pywenku ') #保存到哪个文件夹
Swftoimage=win32com.client.dispatch ("{479a1aac-c148-40bb-9868-a9773da66af9}");
'''
Allfile=os.listdir (".")
Findrecord=0
For file in Allfile:
If file== ". Record":
Record=open (file, ' RW ')
Findrecord=1
Break
If findrecord==0:
Record=open ('. Record ', ' W ')
'''
#url = "Http://wenku.baidu.com/view/8d3ed840be1e650e52ea9938.html?from=rec&pos=1&weight=2&lastweight =2&count=5 "
#url = "Http://wenku.baidu.com/view/f2fe7a3987c24028915fc37a.html?from=related&hasrec=1"
#url就是你要下载的文档的地址
URL=SYS.ARGV[1]
If Url.find ("http://")!=0:
Print "error! The URL is not correct "
Sys.exit ()
print ' Downloading%s '%url
Try
Urlreferer=url[url.index (' http '): Url.index (' V ')]
Print Urlreferer
#urlbody =url[url.index ('/V ')-1:]
Urlnum=url[url.index (' ew/') +3:url.index ('. htm ')]
Except ValueError:
Print "Parse URL error"
Sys.exit ()
#print Urlnum
wenku= ' wenku.baidu.com '
Reurl= '/play/'
Pagefrom= '? pn= '
downnum= ' &rn= '
#try to get title and make Dir
Req=urllib2. Request (URL)
Res=urllib2.urlopen (req)
Data=res.read ()
Try
Sfrom=data.index (' <title> ') +len (' <title> ')
#print Sfrom
Sbefore=sfrom+data[sfrom:].index (' </title> ')
#print Sbefore
Title=data[sfrom:sbefore]
Title=title[:title.rindex ('_')]
print ' Downloading ' +title
Except ValueError:
Print "Get title error"
Sys.exit ()
Allfile=os.listdir (".")
if (title in Allfile) ==false:
Os.mkdir (title)
Os.chdir ('./' +title)
#get the
Req=urllib2. Request (' http://wenku.baidu.com ' +reurl+urlnum+pagefrom+ ' 1 ' +downnum+ ' 1 ')
Req.add_header ("Referer", Urlreferer)
Res=urllib2.urlopen (req)
Data=res.read ()
Res.close ()
HEAD=DATA[0:45]
Pagenum=0
Sfrom=head.index (' \ ": \") +len (' \ ": \")
Sbefore=sfrom+head[sfrom:].index (' \ ")
Pagenum=int (Head[sfrom:sbefore])
print ' Pagenum: ' +str (pagenum)
If Pagenum<=0 or pagenum>2000:
Print "Error!!! Pagenum<0 or pagenum>2000 "
Sys.exit ()
DATA=DATA[106:]

Swf=open ("1.pywenku", ' WB ')
Swf.write (data)
Swf.close ()
I=1
Swftoimage.inputswffilename= "%d.pywenku"%i
Swftoimage.imageoutputtype = 1
swftoimage.imagewidth=1048
swftoimage.imageheight=1478
Swftoimage.execute_begin ()
Swftoimage.frameindex = 1
Swftoimage.execute_getimage ()
Swftoimage.savetofile ("%d.jpg"%i)
Swftoimage.execute_end ()
Os.rename ("%d.pywenku"%i, "%d.swf"%i)
Allfile=os.listdir (".")
#从第二页下到最后一页
For I in Range (2,pagenum+1):

If '%d.swf '%i in Allfile:
Continue
#not Find in the Dir mean
Req=urllib2. Request (' http://wenku.baidu.com ' +reurl+urlnum+pagefrom+str (i) +downnum+ ' 1 ')
Res=urllib2.urlopen (req)
Data=res.read ()
DATA=DATA[106:]
Swf=open ("%d.pywenku"%i, ' WB ')
Swf.write (data)
Swf.close ()
Swftoimage.inputswffilename= "%d.pywenku"%i
Swftoimage.imageoutputtype = 1
Swftoimage.execute_begin ()
Swftoimage.frameindex = 1
Swftoimage.execute_getimage ()
Swftoimage.savetofile ("%d.jpg"%i)
Swftoimage.execute_end ()
Os.rename ("%d.pywenku"%i, "%d.swf"%i)
Res.close ()
print ' Task complete '

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.