MSSQL Chinese character processing (character set encoding and sorting rules)
SQL Server Chinese processing involves character set coding and sorting rules, which is a very tangled problem.
SQL code
--ascii characters
Select N,x=cast (n as binary (2)), U=nchar (n) from Nums where n between and 126
--unicode Chinese Characters
Select N,x=cast (n as binary (2)), U=nchar (n) from Nums where n between 19968 and 40869
19968 0x4e00 A
40869 0x9fa5
--The following two conditions are used to determine whether a string contains Chinese characters
Like n '%[-]% ' collate chinese_prc_ci_as
Like N '%[a-]% ' collate chinese_prc_bin
This is because the order of Chinese characters is different in the above two kinds of sorting rules.
--Chinese full angle punctuation mark
Select N,x=cast (n as binary (2)), Uq=nchar (n), Ub=nchar (n-65248) from Nums where n between 65281 and 65374
Select NCHAR (12288), nchar (32)
65281 0xff01!!
65374 0xff5e ~ ~
--The following conditions are used to determine whether a string contains full-width punctuation
Like N '%[! -~]% ' Collate Chinese_prc_bin
Conversion of full-angle half-width punctuation:
SQL code
--full2half
Create function [dbo]. [Full2half] (
@string nvarchar (max)
)
Returns nvarchar (max)
As
/*
Full Angle (Fullwidth) converted to half angle (Halfwidth)
*/
Begin
DECLARE @chr nchar (1)
DECLARE @i int
Set @string = replace (@string, n ', n ')
Set @i = Patindex (n '%[! -~]% ' collate Latin1_General_BIN, @string)
While @i > 0
Begin
Set @chr = substring (@string, @i,1)
Set @string = replace (@string, @chr, nchar (Unicode (@chr)-65248)
Set @i = Patindex (n '%[! -~]% ' collate Latin1_General_BIN, @string)
End
return @string
End
Go
Create function [dbo]. [Half2full] (
@string nvarchar (max)
)
Returns nvarchar (max)
As
/*
Half Angle (Halfwidth) converted to full angle (Fullwidth)
*/
Begin
DECLARE @chr nchar (1)
DECLARE @i int
Set @string = replace (@string, n ', n ')
Set @i = Patindex (n '%[!-~]% ' collate Latin1_General_BIN, @string)
While @i > 0
Begin
Set @chr = substring (@string, @i,1)
Set @string = replace (@string, @chr, nchar (Unicode (@chr) +65248))
Set @i = Patindex (n '%[!-~]% ' collate Latin1_General_BIN, @string)
End
return @string
End
Go