可以支援zip,rar,7zip,tar.gz等等各式
輸入壓縮檔名稱,返回一個xml檔案結構
#!usr/bin/python
#coding=utf-8
import uuid
import os
import zipfile
import re
import sys
import officetoimage
import rarfile
import tarfile
import subprocess
########SET CODE TO DEAL CHINESE############
default_encoding = 'utf-8'
if sys.getdefaultencoding()!=default_encoding:
reload(sys)
sys.setdefaultencoding(default_encoding)
#######TO GENERATE XML USING LIST##########
#######DIR IS LIST OF FILE STRUCT##########
def xml_generate(dir):
body = ''
for i in dir:
if i.partition('/')[2] == '':
if(i.endswith('/')):
body +="<directory name=\""+i.partition('/')[0] +'\">'
tmp =[ j for j in dir if j.startswith(i) and j!=i]
for j in range(len(tmp)):
tmp[j] = re.sub('^'+i,'',tmp[j])
if len(tmp)>0:
body += xml_generate(tmp)
body +="</directory>"
else:
body +="<file>"+i.partition('/')[0]+"</file>"
return body
#######THIS IS A START TO GENERATE XML##########
#######XML WILL BE WRITE TO FILE NAMED ZIP FILENAME##########
def xml_start(dir,outname):
body = xml_generate(dir)
head = """<?xml version="1.0" encoding="UTF-8" ?>
<source>%s</source>""" %body
tmp = outname.split('/')[0]
if os.path.exists(tmp) is not True:
os.makedirs(tmp,0755)
out = open(outname,'w')
out.write(head)
out.close()
#######TO START DEAL ZIP##########
def zip_deal(filename,readname,pwd):
try:
file_to_deal = zipfile.ZipFile(filename,'r')
except IOError:
print 'error zip'
sys.exit(0)
file_to_deal.setpassword(pwd)
try:
document=file_to_deal.read(readname)
except:
out = str(uuid.uuid1())
command = '7z e -otmp/'+out+' -p'+pwd+' '+filename+ ' '+readname
os.popen(command)
return 'tmp/'+out+'/'+readname.split('/')[-1]
file_to_deal.close()
return document
#######TO START DEAL RAR##########
def rar_deal(filename,readname,pwd):
try:
file_to_deal = rarfile.RarFile(filename)
except IOError:
print 'error rar open'
sys.exit(0)
if file_to_deal.needs_password():
file_to_deal.setpassword(pwd)
try:
document=file_to_deal.read(readname)
except:
print 'error rar read'
sys.exit(0)
file_to_deal.close()
return document
#######TO START DEAL TAR##########
def tar_deal(filename,readname):
try:
file_to_deal = tarfile.open(filename)
except IOError:
print 'error'
sys.exit(0)
try:
do = file_to_deal.extractfile(readname)
except:
print 'error'
sys.exit(0)
document = do.read()
file_to_deal.close()
return document
#######TO START DEAL ZIP##########
def z7_deal(filename,readname,pwd):
uuid_s = str(uuid.uuid1())
command = '7z e -otmp/'+uuid_s+' -p'+pwd+' '+filename+ ' '+readname
os.popen(command)
return 'tmp/'+uuid_s+'/'+readname.split('/')[-1]
#######FILE TO ZIP AND GET THE FILE STRUCT##########
#######IF IS FILE RETURN PDF##########
def file_to_zip(filename,pwd):
out = 'tmp/' + str(uuid.uuid1())+'.xml'
file_in = filename.partition('/')
try:
file_to_deal = zipfile.ZipFile(file_in[0],'r')
except IOError:
print 'error'
sys.exit(0)
search_list = file_to_deal.namelist()
file_to_deal.close()
if filename.endswith('.zip'):
xml_start(search_list,out)
return out,'success'
else :
dir = 'tmp/'+str(uuid.uuid1())
outname = dir+'/'+file_in[-1].split('/')[-1]
if os.path.exists(dir) is not True:
os.makedirs(dir,0755)
path= file_in[-1]
file_content = zip_deal(file_in[0],path.decode('utf-8'),pwd)
out = open(outname,'w')
out.write(file_content)
out.close()
name_pdf = officetoimage.file_to_pdf(outname)
return outname,name_pdf
def file_to_rar(filename,pwd):
out = 'tmp/'+str(uuid.uuid1())+'.xml'
file_in = filename.partition('/')
try:
file_to_deal = rarfile.RarFile(file_in[0],'r')
except IOError:
print 'error rar'
sys.exit(0)
if file_to_deal.needs_password():
file_to_deal.setpassword(pwd)
search_list = file_to_deal.namelist()
for f in file_to_deal.infolist():
if f.isdir():
i = search_list.index(f.filename)
search_list[i] += '/'
file_to_deal.close()
for i in range(len(search_list)):
search_list[i] = search_list[i].replace('\\','/')
if filename.endswith('.rar'):
xml_start(search_list,out)
return out,'success'
else :
dir = 'tmp/'+str(uuid.uuid1())
outname = dir+'/'+file_in[-1].split('/')[-1]
if os.path.exists(dir) is not True:
os.makedirs(dir,0755)
#outname = 'tmp/'+str(uuid.uuid1())+'/'+file_in[-1].split('/')[-1]
path= file_in[-1]
file_content = rar_deal(file_in[0],path.decode('utf-8'),pwd)
out = open(outname,'w')
out.write(file_content)
out.close()
name_pdf = officetoimage.file_to_pdf(outname)
return outname,name_pdf
def file_to_tar(filename,pwd):
out = 'tmp/'+str(uuid.uuid1())+'.xml'
file_in = filename.partition('/')
try:
file_to_deal = tarfile.open(file_in[0],'r')
except IOError:
print 'error'
sys.exit(0)
search_list = file_to_deal.getnames()
for f in search_list:
if file_to_deal.getmember(f).isdir():
i = search_list.index(f)
search_list[i] += '/'
file_to_deal.close()
if filename.endswith('.tar') or filename.endswith('.tar.gz'):
xml_start(search_list,out)
return out,'success'
else :
dir = 'tmp/'+str(uuid.uuid1())
outname = dir+'/'+file_in[-1].split('/')[-1]
if os.path.exists(dir) is not True:
os.makedirs(dir,0755)
#outname = 'tmp/'+str(uuid.uuid1())+'/'+file_in[-1].split('/')[-1]
path= file_in[-1]
file_content = tar_deal(file_in[0],path.decode('utf-8'))
out = open(outname,'w')
out.write(file_content)
out.close()
name_pdf = officetoimage.file_to_pdf(outname)
return outname,name_pdf
def file_to_7z(filename,pwd):
#out = 'tmp/' + filename.split('.')[0]+'.xml'
out = 'tmp/'+str(uuid.uuid1())+'.xml'
#print out
file_in = filename.partition('/')
if pwd == None:
pwd =''
if filename.endswith('.7z') :
command = '7z l -p'+pwd+' '+filename
f = os.popen(command)
content = f.readlines()
if content[6].startswith('Error'):
return 'error 7z'
for i in range(len(content)):
if content[i].startswith('-----'):
content = content[i+1:-2]
break
search_list = []
for s in content:
if s.split(' ')[2][0] == 'D':
search_list.append(s.split(' ')[-1][:-1]+'/')
else:
search_list.append(s.split(' ')[-1][:-1])
xml_start(search_list,out)
return out,'success'
else :
path= file_in[-1]
file_path = z7_deal(file_in[0],path.decode('utf-8'),pwd)
name_pdf = officetoimage.file_to_pdf(file_path)
return file_path,name_pdf
##### FILENAME IS FILE.ZIP OR FILE.ZIP/FILE#######
if __name__ == '__main__':
filename = sys.argv[1]
try:
pwd = sys.argv[2]
except:
pwd = ''
if filename.split('/')[0].endswith('.zip'):
file_path,dir_content = file_to_zip(filename,pwd)
elif filename.split('/')[0].endswith('.rar'):
file_path,dir_content = file_to_rar(filename,pwd)
elif filename.split('/')[0].endswith('.tar.gz') or filename.split('/')[0].endswith('.tar') :
file_path,dir_content = file_to_tar(filename,pwd)
elif filename.split('/')[0].endswith('.7z'):
file_path,dir_content = file_to_7z(filename,pwd)
else:
dir_content = 'error code'
print file_path,dir_content