Solution to Busybox's support for Chinese Characters

Source: Internet
Author: User

In an embedded linux system, busybox is the most common tool used to build a file system. However, after busybox1.htm and later versions, Chinese characters cannot be displayed without modifying the ls command. Even if the kernel is set to support Chinese characters, the ls command in shell cannot display Chinese characters, because support for Chinese characters is limited in Versions later than busybox1.17.0. Now let's talk about how to modify to make the busybox1.17.0 and later versions support Chinese. To make busybox1.17.0 and later versions support Chinese, we need to modify two files: printable_string.c and unicode. c.
. The following shows why the ls command cannot display Chinese characters. See the Code not modified in printable_string.c:

const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str){static char *saved[4];static unsigned cur_saved; /* = 0 */char *dst;const char *s;s = str;while (1) {unsigned char c = *s;if (c == '\0') {/* 99+% of inputs do not need conversion */if (stats) {stats->byte_count = (s - str);stats->unicode_count = (s - str);stats->unicode_width = (s - str);}return str;}if (c < ' ')break;if (c >= 0x7f)break;s++;}#if ENABLE_UNICODE_SUPPORTdst = unicode_conv_to_printable(stats, str);#else{char *d = dst = xstrdup(str);while (1) {unsigned char c = *d;if (c == '\0')break;        if (c < ' ' || c >= 0x7f)       *d = '?';d++;}if (stats) {stats->byte_count = (d - dst);stats->unicode_count = (d - dst);stats->unicode_width = (d - dst);}}#endiffree(saved[cur_saved]);saved[cur_saved] = dst;cur_saved = (cur_saved + 1) & (ARRAY_SIZE(saved)-1);return dst;}

From lines 23 and 24 and 37 and 38 above, we can see that the characters larger than 0x7F are directly broken by break, or are directly "?" Instead. Therefore, even if the Linux kernel supports Chinese characters, it cannot be displayed. Instead. The code for modifying the red bold lines is as follows:

const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str){static char *saved[4];static unsigned cur_saved; /* = 0 */char *dst;const char *s;s = str;while (1) {unsigned char c = *s;if (c == '\0') {/* 99+% of inputs do not need conversion */if (stats) {stats->byte_count = (s - str);stats->unicode_count = (s - str);stats->unicode_width = (s - str);}return str;}if (c < ' ')break;/*if (c >= 0x7f)break;*/s++;}#if ENABLE_UNICODE_SUPPORTdst = unicode_conv_to_printable(stats, str);#else{char *d = dst = xstrdup(str);while (1) {unsigned char c = *d;if (c == '\0')break;if (c < ' ' /*|| c >= 0x7f */)*d = '?';d++;}if (stats) {stats->byte_count = (d - dst);stats->unicode_count = (d - dst);stats->unicode_width = (d - dst);}}#endiffree(saved[cur_saved]);saved[cur_saved] = dst;cur_saved = (cur_saved + 1) & (ARRAY_SIZE(saved)-1);return dst;}

After the preceding modifications, the [] Support
For Unicode, You can see Chinese characters using the ls command. I have personally tested this. However, there is another case: busybox1.17.0 is selected during configuration: [*] Support Unicode, as shown below:

In the Configuration, choose Busybox Settings> General Configuration> │ [] Enable locale Support (system needs locale for this to work) from support Unicode) │ [*] Support Unicode │ [*] Support for -- long-options │

In this case, you need to modify a file, which is unicode. c. If this file is not modified, the ls command cannot display Chinese characters. See the unmodified code below:

static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags){char *dst;unsigned dst_len;unsigned uni_count;unsigned uni_width;if (unicode_status != UNICODE_ON) {char *d;if (flags & UNI_FLAG_PAD) {d = dst = xmalloc(width + 1);while ((int)--width >= 0) {unsigned char c = *src;if (c == '\0') {do*d++ = ' ';while ((int)--width >= 0);break;}*d++ = (c >= ' ' && c < 0x7f) ? c : '?';src++;}*d = '\0';} else {d = dst = xstrndup(src, width);while (*d) {unsigned char c = *d;if (c < ' ' || c >= 0x7f)*d = '?';d++;}}if (stats) {stats->byte_count = (d - dst);stats->unicode_count = (d - dst);stats->unicode_width = (d - dst);}return dst;}dst = NULL;uni_count = uni_width = 0;dst_len = 0;while (1) {int w;wchar_t wc;#if ENABLE_UNICODE_USING_LOCALE{mbstate_t mbst = { 0 };ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);/* If invalid sequence is seen: -1 is returned, * src points to the invalid sequence, errno = EILSEQ. * Else number of wchars (excluding terminating L'\0') * written to dest is returned. * If len (here: 1) non-L'\0' wchars stored at dest, * src points to the next char to be converted. * If string is completely converted: src = NULL. */if (rc == 0) /* end-of-string */break;if (rc < 0) { /* error */src++;goto subst;}if (!iswprint(wc))goto subst;}#elsesrc = mbstowc_internal(&wc, src);/* src is advanced to next mb char * wc == ERROR_WCHAR: invalid sequence is seen * else: wc is set */if (wc == ERROR_WCHAR) /* error */goto subst;if (wc == 0) /* end-of-string */break;#endifif (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)goto subst;w = wcwidth(wc);if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */ || (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0) || (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)) { subst:wc = CONFIG_SUBST_WCHAR;w = 1;}width -= w;/* Note: if width == 0, we still may add more chars, * they may be zero-width or combining ones */if ((int)width < 0) {/* can't add this wc, string would become longer than width */width += w;break;}uni_count++;uni_width += w;dst = xrealloc(dst, dst_len + MB_CUR_MAX);#if ENABLE_UNICODE_USING_LOCALE{mbstate_t mbst = { 0 };dst_len += wcrtomb(&dst[dst_len], wc, &mbst);}#elsedst_len += wcrtomb_internal(&dst[dst_len], wc);#endif}/* Pad to remaining width */if (flags & UNI_FLAG_PAD) {dst = xrealloc(dst, dst_len + width + 1);uni_count += width;uni_width += width;while ((int)--width >= 0) {dst[dst_len++] = ' ';}}dst[dst_len] = '\0';if (stats) {stats->byte_count = dst_len;stats->unicode_count = uni_count;stats->unicode_width = uni_width;}return dst;}

 
See the 20 and 28 rows above. You need to modify them. For the modified Code, see:

static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags){char *dst;unsigned dst_len;unsigned uni_count;unsigned uni_width;if (unicode_status != UNICODE_ON) {char *d;if (flags & UNI_FLAG_PAD) {d = dst = xmalloc(width + 1);while ((int)--width >= 0) {unsigned char c = *src;if (c == '\0') {do*d++ = ' ';while ((int)--width >= 0);break;}*d++ = (c >= ' '/* && c < 0x7f */) ? c : '?';src++;}*d = '\0';} else {d = dst = xstrndup(src, width);while (*d) {unsigned char c = *d;if (c < ' '/* || c >= 0x7f */)*d = '?';d++;}}if (stats) {stats->byte_count = (d - dst);stats->unicode_count = (d - dst);stats->unicode_width = (d - dst);}return dst;}dst = NULL;uni_count = uni_width = 0;dst_len = 0;while (1) {int w;wchar_t wc;#if ENABLE_UNICODE_USING_LOCALE{mbstate_t mbst = { 0 };ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);/* If invalid sequence is seen: -1 is returned, * src points to the invalid sequence, errno = EILSEQ. * Else number of wchars (excluding terminating L'\0') * written to dest is returned. * If len (here: 1) non-L'\0' wchars stored at dest, * src points to the next char to be converted. * If string is completely converted: src = NULL. */if (rc == 0) /* end-of-string */break;if (rc < 0) { /* error */src++;goto subst;}if (!iswprint(wc))goto subst;}#elsesrc = mbstowc_internal(&wc, src);/* src is advanced to next mb char * wc == ERROR_WCHAR: invalid sequence is seen * else: wc is set */if (wc == ERROR_WCHAR) /* error */goto subst;if (wc == 0) /* end-of-string */break;#endifif (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)goto subst;w = wcwidth(wc);if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */ || (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0) || (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)) { subst:wc = CONFIG_SUBST_WCHAR;w = 1;}width -= w;/* Note: if width == 0, we still may add more chars, * they may be zero-width or combining ones */if ((int)width < 0) {/* can't add this wc, string would become longer than width */width += w;break;}uni_count++;uni_width += w;dst = xrealloc(dst, dst_len + MB_CUR_MAX);#if ENABLE_UNICODE_USING_LOCALE{mbstate_t mbst = { 0 };dst_len += wcrtomb(&dst[dst_len], wc, &mbst);}#elsedst_len += wcrtomb_internal(&dst[dst_len], wc);#endif}/* Pad to remaining width */if (flags & UNI_FLAG_PAD) {dst = xrealloc(dst, dst_len + width + 1);uni_count += width;uni_width += width;while ((int)--width >= 0) {dst[dst_len++] = ' ';}}dst[dst_len] = '\0';if (stats) {stats->byte_count = dst_len;stats->unicode_count = uni_count;stats->unicode_width = uni_width;}return dst;}

After the preceding modifications, even if the configuration supports Unicode, the ls command can also support Chinese characters. You can also enter a folder in the Chinese directory.

 
Reposted from: CSDN blog of wavemcu

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.