Common methods for Python split files _python

Source: Internet
Author: User
Tags in python

This article has sorted out some of the more useful on the Python split file method, the method is very simple and practical. Share to everyone for your reference. Specifically as follows:

Example 1 specifies the split file size

Configuration file Config.ini:

Copy Code code as follows:
[Global]
#原文件存放目录
Dir1=f:\work\python\3595\pyserver\test
#新文件存放目录
Dir2=f:\work\python\3595\pyserver\test1

The Python code is as follows:

Copy Code code as follows:
#!/usr/bin/python
#-*-Coding:utf-8-*-
Import Os,sys,configparser
Class File_openate (object):
def __init__ (self):
#初如化读取数据库配置
Dir_config = Configparser.configparser ()
File_config=open (' Config.ini ', "RB")
DIR_CONFIG.READFP (File_config)
Self.dir1=str (Dir_config.get ("Global", "Dir1"))
Self.dir1=unicode (Self.dir1, ' UTF8 ')
Self.dir2=str (Dir_config.get ("Global", "Dir2"))
Self.dir2=unicode (Self.dir2, ' UTF8 ')
File_config.close ()
#print Self.dir2
#self. dir1= "F:\\work\\python\\3595\\pyserver\\test"
def file_list (self):
Input_name_han= "Software is not confirmed, the early use of the best backup to avoid data loss, confirm the backup, please enter the size of the bytes to be divided, according to B to calculate." Decode (' Utf-8 ')
Print Input_name_han
While 1:
Input_name=raw_input ("Number:")
If Input_name.isdigit ():
Input_name=int (Input_name)
Os.chdir (SELF.DIR1)
for filename in Os.listdir (SELF.DIR1):
Os.chdir (SELF.DIR1)
#print filename
Name, ext = os.path.splitext (filename)
File_size=int (os.path.getsize (filename))
F=open (filename, ' R ')
Chu_nmuber=0
While File_size >= 1:
#print file_size
Chu_nmuber=chu_nmuber + 1
If File_size >= input_name:
File_size=file_size-input_name
A=f.read (Input_name)
Os.chdir (SELF.DIR2)
Filename1=name + '-' + str (chu_nmuber) + ext
New_f=open (filename1, ' a ')
New_f.write (a)
New_f.close ()
#print file_size
Else
A=f.read ()
Os.chdir (SELF.DIR2)
Filename1=name + '-' + str (chu_nmuber) + ext
New_f=open (filename1, ' a ')
New_f.write (a)
New_f.close ()
Break
Print "split success". Decode (' utf-8 ') + filename
F.close ()
Else
Print "Please enter the correct number, please re-enter". Decode (' Utf-8 ')
File_name=file_openate ()
File_name.file_list ()

Example 2, split file size by row

Copy Code code as follows:
#!/usr/bin/env python
#--*--Coding:utf-8--*--
Import OS
Class Splitfiles ():
"" Split file by Row ""
def __init__ (self, file_name, line_count=200):
"" Initializes the source file name to be split and the number of split file rows "" "
Self.file_name = file_name
Self.line_count = Line_Count
def split_file (self):
If Self.file_name and Os.path.exists (self.file_name):
Try
With open (Self.file_name) as F: # using with read files
Temp_count = 0
Temp_content = []
Part_num = 1
For line in F:
If Temp_count < self.line_count:
Temp_count + 1
else:
Self.write_file (Part_num, temp_content)
Part_num + 1
Temp_count = 1
Temp_content = []
Temp_content.append (line)
else: # Write the remaining content to a new file after a normal end loop
Self.write_file (Part_num, temp_content)
Except IOError as err:
Print (ERR)
Else
Print ("%s is not a validate file"% Self.file_name)
def get_part_file_name (self, part_num):
"" "To get the split file name: Create a temporary folder Temp_part_file in the same directory as the source file, and then place the split file under the path" "
Temp_path = Os.path.dirname (self.file_name) # Gets the path of the file (without file name)
Part_file_name = Temp_path + "Temp_part_file"
If not os.path.exists (Temp_path): # If the temp directory does not exist, create
Os.makedirs (Temp_path)
Part_file_name + + os.sep + "Temp_file_" + str (part_num) + ". Part"
Return Part_file_name
def write_file (self, Part_num, *line_content):
"" "" "" "" "" "" "" "" ""
Part_file_name = Self.get_part_file_name (part_num)
Print (line_content)
Try:
With open (Part_file_name, "w") as Part_file:
Part_file.writelines (Line_content[0])
Except IOError as err:
Print (ERR)
if __name__ = = "__main__":
SF = Splitfiles (r "F:\multiple_thread_read_file.txt")
Sf.split_file ()

It's just a split up, what if we have to merge again? The following example enables segmentation and merging Oh, let's take a look.

Example 3, split file and merge function

Copy Code code as follows:
#!/usr/bin/python
##########################################################################
# split a file into a set of parts; Join.py puts them back together;
# This is a customizable version of the standard UNIX split command-line
# utility; Because it is written in Python, it also works on Windows and
# can be easily modified; Because it exports a function, its logic can
# also be imported and reused in the other applications;
##########################################################################

Import sys, OS
kilobytes = 1024
megabytes = kilobytes * 1000
chunksize = Int (1.4 * megabytes) # default:roughly a floppy

Def split (FromFile, Todir, chunksize=chunksize):
If not os.path.exists (todir): # Caller handles errors
Os.mkdir (todir) # make dir, read/write parts
Else
For fname in Os.listdir (todir): # Delete any existing files
Os.remove (Os.path.join (Todir, fname))
Partnum = 0
input = open (FromFile, ' RB ') # Use binary mode on Windows
While 1: # Eof=empty string from Read
Chunk = Input.read (chunksize) # Get Next part <= chunksize
If not chunk:break
Partnum = partnum+1
filename = Os.path.join (Todir, (' part%04d '% partnum))
fileobj = open (filename, ' WB ')
Fileobj.write (Chunk)
Fileobj.close () # or simply open (). Write ()
Input.close ()
Assert Partnum <= 9999 # Join sort fails if 5 digits
Return Partnum

if __name__ = = ' __main__ ':
If Len (sys.argv) = = 2 and sys.argv[1] = = '-help ':
print ' Use:split.py [file-to-split target-dir [chunksize]] '
Else
If Len (SYS.ARGV) < 3:
Interactive = 1
FromFile = Raw_input (' File to be split? ') # input if clicked
Todir = Raw_input (' Directory to store part files? ')
Else
Interactive = 0
FromFile, Todir = sys.argv[1:3] # args in CmdLine
If Len (sys.argv) = = 4:chunksize = Int (sys.argv[3])
Absfrom, Absto = Map (Os.path.abspath, [FromFile, Todir])
print ' splitting ', Absfrom, ' to ', Absto, ' by ', chunksize

Try
Parts = Split (FromFile, Todir, chunksize)
Except
print ' Error during split: '
Print sys.exc_info () [0], Sys.exc_info () [1]
Else
print ' Split finished: ', parts, ' parts are in ', Absto
If Interactive:raw_input (' Press Enter key ') # pause if clicked

join_file.py

Copy Code code as follows:
#!/usr/bin/python
##########################################################################
# Join all part files in a dir created by split.py and to recreate file.
# This are roughly like a ' cat fromdir/* > ToFile ' command on UNIX, but are
# more portable and configurable, and exports the join operation as a
# reusable function. Relies on sort order of file Names:must is same
# length. Could extend Split/join to popup tkinter file selectors.
##########################################################################

Import OS, sys
ReadSize = 1024

def join (Fromdir, ToFile):
Output = open (ToFile, ' WB ')
Parts = Os.listdir (Fromdir)
Parts.sort ()
For filename in parts:
filepath = Os.path.join (fromdir, filename)
Fileobj = open (filepath, ' RB ')
While 1:
Filebytes = Fileobj.read (readsize)
If not filebytes:break
Output.write (filebytes)
Fileobj.close ()
Output.close ()

if __name__ = = ' __main__ ':
If Len (sys.argv) = = 2 and sys.argv[1] = = '-help ':
print ' Use:join.py [from-dir-name to-file-name] '
Else
If Len (SYS.ARGV)!= 3:
Interactive = 1
Fromdir = Raw_input (' Directory containing part files? ')
ToFile = Raw_input (' Name of file to be recreated? ')
Else
Interactive = 0
Fromdir, ToFile = sys.argv[1:]
Absfrom, Absto = Map (Os.path.abspath, [Fromdir, ToFile])
print ' Joining ', Absfrom, ' to make ', Absto

Try
Join (Fromdir, ToFile)
Except
print ' Error Joining files: '
Print sys.exc_info () [0], Sys.exc_info () [1]
Else
print ' Join complete:see ', Absto
If Interactive:raw_input (' Press Enter key ') # pause if clicked

I hope this article will help you with your Python programming.

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.