MSSQL Chinese character processing (character set encoding and collation)
SQL Server Chinese processing involves character set coding and sorting rules, which is a very tangled problem.
SQL code
--ascii character
Select N,x=cast (n as binary (2)), U=nchar (n) from Nums where n inclusive and 126
--unicode Chinese character
Select N,x=cast (n as binary (2)), U=nchar (n) from Nums where n inclusive 19968 and 40869
19968 0x4e00
40869 0x9fa5
--The following two conditions are used to determine whether a string contains Chinese characters
like n '%[-]% ' collate chinese_prc_ci_as
like n '%[a-]% ' collate chinese_prc_bin
-This is because the order of Chinese characters is different in the above two kinds of sorting rules.
--Chinese full-angle punctuation mark
Select N,x=cast (n as binary (2)), Uq=nchar (n), Ub=nchar (n-65248) from Nums where n inclusive 65281 and 65374
Select NCHAR (12288), nchar (32)
65281 0xff01!!
65374 0xff5e ~ ~
--The following conditions are used to determine whether a string contains full-width punctuation
like n '%[! -~]% ' Collate Chinese_prc_bin
Conversion of Full-width punctuation:
SQL code
--full2half
create function [dbo]. [Full2half] (
@string nvarchar (max)
)
returns nvarchar (max)
as
/*
Full Angle (fullwidth) converted to Half-width (Halfwidth)
*/
begin
DECLARE @chr nchar (1)
declare @i int
Set @string = replace (@string, n ', n ')
Set @i = Patindex (n '%[! -~]% ' collate Latin1_General_BIN, @string)
while @i > 0
begin
Set @chr = substring (@string, @i,1)
Set @string = replace (@string, @chr, nchar (Unicode (@chr)-65248))
Set @i = Patindex (n '%[! -~]% ' collate Latin1_General_BIN, @string)
End
return @string
End
Go
create function [dbo]. [Half2full] (
@string nvarchar (max)
)
returns nvarchar (max)
as
/*
half Angle (Halfwidth) is converted to full angle (Fullwidth)
*/
begin
DECLARE @chr nchar (1)
declare @i int
Set @string = replace (@string, n ', n ')
Set @i = Patindex (n '%[!-~]% ' collate Latin1_General_BIN, @string)
while @i > 0
begin
Set @chr = substring (@string, @i,1)
Set @string = replace (@string, @chr, nchar (Unicode (@chr) +65248))
Set @i = Patindex (n '%[!-~]% ' collate Latin1_General_BIN, @string)
End
return @string
End
Go