First of all, you need to install the chardet library. There are many ways that I can use: sudo pip install chardet
Copy codeThe Code is as follows:
#! /Usr/bin/env python
# Coding: UTF-8
Import sys
Import OS
Import chardet
Def print_usage ():
Print ''' usage:
Change_charset [file | directory] [charset] [output file] \ n
For example:
Change 1.txt UTF-8 n1.txt
Change 1.txt UTF-8
Change. UTF-8
Change 1.txt
'''
Def get_charset (s ):
Return chardet. detect (s) ['encoding']
Def remove (file_name ):
OS. remove (file_name)
Def change_file_charset (file_name, output_file_name, charset ):
F = open (file_name)
S = f. read ()
F. close ()
If file_name = output_file_name or output_file_name = "":
Remove (file_name)
Old_charset = get_charset (s)
U = s. decode (old_charset)
If output_file_name = "":
Output_file_name = file_name
F = open (output_file_name, 'w ')
S = u. encode (charset)
F. write (s)
F. close ()
Def do (file_name, output_file_name, charset ):
If OS. path. isdir (file_name ):
For item in OS. listdir (file_name ):
Try:
If OS. path. isdir (file_name + "/" + item ):
Do (file_name + "/" + item, "", charset)
Else:
Change_file_charset (file_name + "/" + item, "", charset)
Counter t OSError, e:
Print e
Else:
Change_file_charset (file_name, output_file_name, charset)
If _ name _ = '_ main __':
Length = len (sys. argv)
If length = 1:
Print_usage ()
Elif length = 2:
Do (sys. argv [1], "", "UTF-8 ")
Elif length = 3:
Do (sys. argv [1], "", sys. argv [2])
Elif length = 4:
Do (sys. argv [1], sys. argv [3], sys. argv [2])
Else:
Print_usage ()