This article provides a way to ensure that the data is foolproof in the display and transmission process by encoding the string into Unicode format. No matter how the client browser changes the encoding, the Code on the page is not garbled.
For Html/xml, using &# + 10-bit Unicode code +; Format the characters in the form.
For JS, use the \u + 4-bit Unicode code to format the string.
Examples are written in C # and are displayed in Chinese, Russian, Korean, and Japanese. For PHP, it will be mentioned at the end of the article.
First, there is a string extension class.
Copy Code code as follows:
Using System.Text.RegularExpressions;
Namespace Xxoo
{
<summary>
Extension methods, providing HTML encoding and scripting encoding
</summary>
public static Class Stringextension
{
private static string Gethtmlencodedstr (Match m)
{
string x = M.tostring ();
return string. Format ("&#{0};", (int) x[0]);
}
<summary>
Converts a string to an HTML encoding format
</summary>
<param name= "Text" > String </param>
<returns> output form: Chinese rich police </returns>
public static string HtmlEncode (this string text)
{
return Regex.Replace (text
, "([^\\000-\\127]|&|\\\" |\\<|\\>| ') "
, New MatchEvaluator (GETHTMLENCODEDSTR)
, Regexoptions.ecmascript | RegexOptions.Compiled
);
}
private static string Getscriptencodedstr (Match m)
{
string x = M.tostring ();
Return "\\u" + string. Format (' {0:x} ', (int) x[0]). PadLeft (4, ' 0 ');
}
<summary>
Encode a string into Unicode format such as: \uxxxx
</summary>
<param name= "Text" > String </param>
<returns> output shape such as:\u4e2d\u6587\u4e30\u539a\u8b66\u65b9</returns>
public static string Scriptencode (this string text)
{
return Regex.Replace (text
, "([^\\000-\\127]|&|\\\] | \\<|\\>|\\n|\\r|\\t) "
, New MatchEvaluator (GETSCRIPTENCODEDSTR)
, Regexoptions.ecmascript | RegexOptions.Compiled
);
}
}
}
It provides 2 methods to use for the test page.
Test page (asp.net)
Copy Code code as follows:
<%@ Page language= "C #" autoeventwireup= "true" codebehind= "WebForm1.aspx.cs" inherits= "Webapplication1.webform1"% >
<! DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 transitional//en" "Http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd ">
<title></title>
<body>
<form id= "Form1" runat= "Server" >
<pre runat= "Server" id= "Pre" ></pre>
<asp:placeholder runat= "Server" id= "placeholder" ></asp:PlaceHolder>
</form>
</body>
To test the page code:
Copy Code code as follows:
Using Xxoo;
Namespace WebApplication1
{
public partial class WebForm1:System.Web.UI.Page
{
protected void Page_Load (object sender, EventArgs e)
{
var str = @ "Chinese rich police later developed see hair words that fast engine after home loan good
Китайскоепосольствовиракеибагдадеотделение
이라크에서중국대사관과알의바그다드지사-만수르호텔
イラクでの Ambassador Library やアルのバグダッド Sub-branches-マンスールホテル
1234567890!@#$%^&* () <> "" ' \|} {][:;
";
Pre. InnerHtml = str. HtmlEncode ();
HtmlGenericControl control = new HtmlGenericControl ("script");
Control. attributes["language"] = "javascript";
Control. attributes["type"] = "text/javascript";
Control. InnerHtml = string. Format ("alert (\ {0}\"); ", Str. Scriptencode ());
PLACEHOLDER.CONTROLS.ADD (Control);
}
}
}
HTML to run after:
Copy Code code as follows:
<! DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 transitional//en" "Http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd ">
</title><body>
<form name= "Form1" method= "Post" action= "WebForm1.aspx" Id= "Form1" >
<pre id= "Pre" > Chinese rich after the police developed see hair words that the fast engine after home loan good
Китайскоепосольствовиракеибагдадеотделение
이라크에서중국대사관과알의바그다드지사-만수르호텔
イラクでの Ambassador Library やアルのバグダッド Sub-branches-マンスールホテル
1234567890!@#$%^&* () <> "' \|} {][:;
</pre>
<script language= "javascript" type= "Text/javascript" ><!--
Alert ("\u4e2d\u6587\u4e30\u539a\u8b66\u65b9\u8fc7\u540e\u53d1\u8fbe\u770b\u89c1\u53d1\u7684\u8bdd\u8be5\u5feb\ U53d1\u52a8\u673a\u540e\u8d2d\u623f\u8d37\u6b3e\u597d\u000d\u000a\u041a\u0438\u0442\u0430\u0439\u0441\u043a\ u043e\u0435 \u043f\u043e\u0441\u043e\u043b\u044c\u0441\u0442\u0432\u043e \u0432 \u0418\u0440\u0430\u043A\u0435 \ u0438 \u0411\u0430\u0433\u0434\u0430\u0434\u0435 \u043e\u0442\u0434\u0435\u043b\u0435\u043d\u0438\u0435 \u000D\ u000a\uc774\ub77c\ud06c\uc5d0\uc11c \uc911\uad6d \ub300\uc0ac\uad00\uacfc \uc54c\uc758 \uBC14\uADF8\uB2E4\uB4DC \ UC9C0\UC0AC-\ub9cc\uc218\ub974 \ud638\ud154\u000d\u000a\u30a4\u30e9\u30af\u3067\u306e\u5927\u4f7f\u9928\u3084\ U30a2\u30eb\u306e\u30d0\u30b0\u30c0\u30c3\u30c9\u652f\u5c40-\u30de\u30f3\u30b9\u30fc\u30eb\u30db\u30c6\u30eb\ u000d\u000a1234567890!@#$%\u005e\u0026* () \u003c\u003e\u0022\u0027\u005c\u007c\u007d\u007b\u005d\u005b:;\u000d\ u000a ");
--></script>
</form>
</body>
<! DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 transitional//en" "Http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd ">
</title><body>
<form name= "Form1" method= "Post" action= "WebForm1.aspx" Id= "Form1" >
<pre id= "Pre" > Chinese rich after the police developed see hair words that the fast engine after home loan good
Китайскоепосольствовиракеибагдадеотделение
이라크에서중국대사관과알의바그다드지사-만수르호텔
イラクでの Ambassador Library やアルのバグダッド Sub-branches-マンスールホテル
1234567890!@#$%^&* () <> "' \|} {][:;
</pre>
<script language= "javascript" type= "Text/javascript" ><!--
Alert ("\u4e2d\u6587\u4e30\u539a\u8b66\u65b9\u8fc7\u540e\u53d1\u8fbe\u770b\u89c1\u53d1\u7684\u8bdd\u8be5\u5feb\ U53d1\u52a8\u673a\u540e\u8d2d\u623f\u8d37\u6b3e\u597d\u000d\u000a\u041a\u0438\u0442\u0430\u0439\u0441\u043a\ u043e\u0435 \u043f\u043e\u0441\u043e\u043b\u044c\u0441\u0442\u0432\u043e \u0432 \u0418\u0440\u0430\u043A\u0435 \ u0438 \u0411\u0430\u0433\u0434\u0430\u0434\u0435 \u043e\u0442\u0434\u0435\u043b\u0435\u043d\u0438\u0435 \u000D\ u000a\uc774\ub77c\ud06c\uc5d0\uc11c \uc911\uad6d \ub300\uc0ac\uad00\uacfc \uc54c\uc758 \uBC14\uADF8\uB2E4\uB4DC \ UC9C0\UC0AC-\ub9cc\uc218\ub974 \ud638\ud154\u000d\u000a\u30a4\u30e9\u30af\u3067\u306e\u5927\u4f7f\u9928\u3084\ U30a2\u30eb\u306e\u30d0\u30b0\u30c0\u30c3\u30c9\u652f\u5c40-\u30de\u30f3\u30b9\u30fc\u30eb\u30db\u30c6\u30eb\ u000d\u000a1234567890!@#$%\u005e\u0026* () \u003c\u003e\u0022\u0027\u005c\u007c\u007d\u007b\u005d\u005b:;\u000d\ u000a ");
--></script>
</form>
</body>
In this way, no matter what kind of code the browser uses, the page will not appear garbled.
==============================================
Php
PHP is a bit more complicated, and you need to consider MySQL's code. These aside.
Here is a sample of HTML encoding GBK. For reference only:
Copy Code code as follows:
function HtmlEncode ($text)
{
$encoded = "";
for ($index = 0; $index < strlen ($text); $index + +)
{
if (Ord ($text [$index]) <= 127)
{
Switch (ORD ($text [$index]))
{
Case 34:
Case 38:
Case 39:
Case 60:
Case 62:
$encoded. = "&#". Ord ($text [$index]). ";";
Break
Default
$encoded. = $text [$index];
}
}
Else
{
$char = $text [$index]. $text [$index +1];
$char = mb_convert_encoding ($char, "utf-16", "GBK");
$encoded. = "&#". (Ord ($char [0]) *256 + ord ($char [1])). ";";
$index + +;
}
}
return $encoded;
}
function HtmlEncode ($text)
{
$encoded = "";
for ($index = 0; $index < strlen ($text); $index + +)
{
if (Ord ($text [$index]) <= 127)
{
Switch (ORD ($text [$index]))
{
Case 34:
Case 38:
Case 39:
Case 60:
Case 62:
$encoded. = "&#". Ord ($text [$index]). ";";
Break
Default
$encoded. = $text [$index];
}
}
Else
{
$char = $text [$index]. $text [$index +1];
$char = mb_convert_encoding ($char, "utf-16", "GBK");
$encoded. = "&#". (Ord ($char [0]) *256 + ord ($char [1])). ";";
$index + +;
}
}
return $encoded;
}
Then return through the XML, so that it can completely eliminate garbled.
Copy Code code as follows:
echo "<?xml version=\" 1.0\ "encoding=\" utf-8\ ">\n";
echo "<result>";
echo "<success>". ($success 1:0). " </success> ";
echo "<message>". HtmlEncode ($message). "</message>";
if ($success)
{
echo "<nickname>". HtmlEncode ($nickname). "</nickname>";
echo "<userId>". $userId. " </userId> ";
echo "<siteId>". $siteId. " </siteId> ";
echo "<isTeacher>". ($isTeacher 1:0). " </isTeacher> ";
echo "<ipAddress>". HtmlEncode ($ipAddress). "</ipAddress>";
}
echo "</result>";
This article from Csdn Blog, reproduced please indicate the source: http://blog.csdn.net/wangjia184/archive/2009/10/26/4728318.aspx