git: c4c562eadf3b - main - libc: locale: fix EUC shift check

From: Kyle Evans <kevans_at_FreeBSD.org>
Date: Sun, 20 Apr 2025 18:29:50 UTC
The branch main has been updated by kevans:

URL: https://cgit.FreeBSD.org/src/commit/?id=c4c562eadf3b790fa221e220d6a442f0cb84ca35

commit c4c562eadf3b790fa221e220d6a442f0cb84ca35
Author:     Kyle Evans <kevans@FreeBSD.org>
AuthorDate: 2025-04-20 18:29:45 +0000
Commit:     Kyle Evans <kevans@FreeBSD.org>
CommitDate: 2025-04-20 18:29:45 +0000

    libc: locale: fix EUC shift check
    
    wchar_t is unsigned on ARM platforms, and signed pretty much everywhere
    else.  On signed platforms, `nm` ends up with bogus upper bits set if we
    did in-fact have a valid CS2 or CS3 (MSB set).  Mask just the low byte
    to avoid sign bit garbage.
    
    Bare basic test of converting a CS2 widechar in eucCN, which would
    previously kick back an EILSEQ.
    
    Reviewed by:    bapt, rew
    Sponsored by:   Klara, Inc.
    Differential Revision:  https://reviews.freebsd.org/D43262
---
 lib/libc/locale/euc.c               |  2 +-
 lib/libc/tests/locale/wctomb_test.c | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/lib/libc/locale/euc.c b/lib/libc/locale/euc.c
index c54a6ceceff2..c110fc61b375 100644
--- a/lib/libc/locale/euc.c
+++ b/lib/libc/locale/euc.c
@@ -426,7 +426,7 @@ _EUC_wcrtomb_impl(char * __restrict s, wchar_t wc,
 	/* This first check excludes CS1, which is implicitly valid. */
 	if ((wc < 0xa100) || (wc > 0xffff)) {
 		/* Check for valid CS2 or CS3 */
-		nm = (wc >> ((len - 1) * 8));
+		nm = (wc >> ((len - 1) * 8)) & 0xff;
 		if (nm == cs2) {
 			if (len != cs2width) {
 				errno = EILSEQ;
diff --git a/lib/libc/tests/locale/wctomb_test.c b/lib/libc/tests/locale/wctomb_test.c
index 1e142ed74c48..ef2a6dcbe1e3 100644
--- a/lib/libc/tests/locale/wctomb_test.c
+++ b/lib/libc/tests/locale/wctomb_test.c
@@ -41,6 +41,18 @@
 
 #include <atf-c.h>
 
+ATF_TC_WITHOUT_HEAD(euccs1_test);
+ATF_TC_BODY(euccs1_test, tc)
+{
+	wchar_t wc = 0x8e000000;
+	char buf[MB_LEN_MAX];
+
+	ATF_REQUIRE(strcmp(setlocale(LC_CTYPE, "zh_CN.eucCN"),
+	    "zh_CN.eucCN") == 0);
+
+	ATF_REQUIRE(wctomb(&buf[0], wc) == 4);
+}
+
 ATF_TC_WITHOUT_HEAD(wctomb_test);
 ATF_TC_BODY(wctomb_test, tc)
 {
@@ -104,6 +116,7 @@ ATF_TC_BODY(wctomb_test, tc)
 ATF_TP_ADD_TCS(tp)
 {
 
+	ATF_TP_ADD_TC(tp, euccs1_test);
 	ATF_TP_ADD_TC(tp, wctomb_test);
 
 	return (atf_no_error());