0x92
That is 10010010
, the first byte of a character encoded in UTF8 (start byte) can only be,,, 0xxxxxxx
110xxxxx
1110xxx
11110xxx
... And the latter byte is only possible 10xxxxxx
. 0x92
This means that only the subsequent bytes appear in the first byte.
The majority of this problem occurs because the file is not UTF8 encoded (for example, it may be GBK encoded), and the system defaults to UTF8 decoding. The workaround is to change the corresponding decoding method.
In rare cases, files can be repaired or decoded in such a way that they are corrupted or mixed with some non-UTF8 encoding replace
.
Python 2 is no problem because Python 2 reads the file by default in the same way as a byte stream (corresponding to Python 3 bytes
), unlike Python 3, which, by default, decodes the unicode
read file to the rb
same effect, but Python 3 bytes
There are many problems that can be dealt with.
This way error unicodedecodeerror: ' Utf-8 ' codec can ' t decode byte 0xEA in position 3171:invalid C
With open (Childrenfile + "\" + childrenfile_00, "R", "Utf-8") as F:
Try this method to explain the above
With open (Childrenfile + "\" + childrenfile_00, "RB") as F:
Python3 read file write to Library
/*******************************************************************************************************/
#coding: Utf-8
From IMP import reload
Import Pymysql
Import time
Import JSON
Import datetime
Import requests
Import Urllib
Import datetime
Import getopt
Import Random
Import XLWT
Import OS
Import Sys
Import Codecs
Reload (SYS)
If Os.path.exists (' logs ') ==false:
Os.mkdir (' logs ')
config={' host ': ' 127.0.0.1 ',
' User ': ' Root ',
' Password ': ' Root ',
' Port ': 3306,
' Database ': ' Test ',
' CharSet ': ' UTF8 '
}
conn= Pymysql.connect (**config)
Cur=conn.cursor ()
Path = ' F:\BreachCompilation\data '
Files = Os.listdir (path)
For file in Files:
Childrenfile = path + "\ \" + File
files_00 = Os.listdir (childrenfile)
For childrenfile_00 in files_00:
With open (Childrenfile + "\" + childrenfile_00, "RB") as F:
Ftextlist = F.readlines ()
Start = 0
For line in Ftextlist:
# #content_0 = Ftextlist.split ("\ n")
element = Line.decode ("Utf-8"). Split (":")
Start = start + 1
Print ("-------index------" + str (START))
if (len (Element) >= 2):
Print ("Mailbox" + element[0] + "Password:" + element[1])
Insertsql = "INSERT into 40G (uname, PWD) value ('%s ', '%s ')"% (Pymysql.escape_string (element[0]), pymysql.escape_string ( ELEMENT[1]))
Cur.execute (Insertsql)
Conn.commit ()
# #element = Content.split (":")
# #print (Element[0] + "----" + element[1])
# #insertSQL = "INSERT into 40G (uname, PWD) value ('%s ', '%s ')"% (Element[0], element[1])
Print ("Over")
# #cur. Close ()
# #conn. Close ()
/**********************python3 when \x needs to escape to \/x******************************************************************* *******/
#coding: Utf-8
From IMP import reload
Import Pymysql
Import time
Import JSON
Import datetime
Import requests
Import Urllib
Import datetime
Import getopt
Import Random
Import XLWT
Import OS
Import Sys
Import Codecs
Reload (SYS)
If Os.path.exists (' logs ') ==false:
Os.mkdir (' logs ')
config={' host ': ' 127.0.0.1 ',
' User ': ' Root ',
' Password ': ' Root ',
' Port ': 3306,
' Database ': ' Test ',
' CharSet ': ' UTF8 '
}
conn= Pymysql.connect (**config)
Cur=conn.cursor ()
Path = ' E:\/xunleixiazai\breachcompilation\data '
Files = Os.listdir (path)
For file in Files:
Childrenfile = path + "\ \" + File
files_00 = Os.listdir (childrenfile)
For childrenfile_00 in files_00:
With open (Childrenfile + "\" + childrenfile_00, "RB") as F:
Ftextlist = F.readlines ()
Start = 0
For line in Ftextlist:
# #content_0 = Ftextlist.split ("\ n")
element = Line.decode ("Utf-8"). Split (":")
Start = start + 1
Print ("-------index------" + str (START))
if (len (Element) >= 2):
Print ("Mailbox" + element[0] + "Password:" + element[1])
Insertsql = "Replace into 40G (uname, PWD) value ('%s ', '%s ')"% (Pymysql.escape_string (element[0]), pymysql.escape_string (Element[1]))
Cur.execute (Insertsql)
Conn.commit ()
# #element = Content.split (":")
# #print (Element[0] + "----" + element[1])
# #insertSQL = "INSERT into 40G (uname, PWD) value ('%s ', '%s ')"% (Element[0], element[1])
Print ("Over")
# #cur. Close ()
# #conn. Close ()
Python3 Writing file problems