git: 269dea90d6eb - main - libc: use separate collate objects for C, POSIX, and C.UTF-8

From: Yuri Pankov <yuripv_at_FreeBSD.org>
Date: Tue, 28 Mar 2023 15:17:41 UTC
The branch main has been updated by yuripv:

URL: https://cgit.FreeBSD.org/src/commit/?id=269dea90d6eb32690730b0f6a17fb41170483104

commit 269dea90d6eb32690730b0f6a17fb41170483104
Author:     Yuri Pankov <yuripv@FreeBSD.org>
AuthorDate: 2023-03-28 15:10:47 +0000
Commit:     Yuri Pankov <yuripv@FreeBSD.org>
CommitDate: 2023-03-28 15:16:30 +0000

    libc: use separate collate objects for C, POSIX, and C.UTF-8
    
    Fix newlocale() overwriting the locale name in collate object
    when same instance was used for those locales, and querylocale()
    reporting unexpected value for LC_COLLATE_MASK.
    
    PR:             255646, 269375
    Reviewed by:    markj, bapt (previous version)
    Differential Revision:  https://reviews.freebsd.org/D30146
---
 lib/libc/locale/collate.c              |  17 ++++-
 lib/libc/tests/locale/Makefile         |   1 +
 lib/libc/tests/locale/newlocale_test.c | 111 +++++++++++++++++++++++++++++++++
 3 files changed, 126 insertions(+), 3 deletions(-)

diff --git a/lib/libc/locale/collate.c b/lib/libc/locale/collate.c
index 4a5512998438..bc2f4edccd09 100644
--- a/lib/libc/locale/collate.c
+++ b/lib/libc/locale/collate.c
@@ -68,6 +68,14 @@ struct xlocale_collate __xlocale_C_collate = {
 	{{0}, "C"}, 1, 0, 0, 0
 };
 
+struct xlocale_collate __xlocale_POSIX_collate = {
+	{{0}, "POSIX"}, 1, 0, 0, 0
+};
+
+struct xlocale_collate __xlocale_CUTF8_collate = {
+	{{0}, "C.UTF-8"}, 1, 0, 0, 0
+};
+
 static int
 __collate_load_tables_l(const char *encoding, struct xlocale_collate *table);
 
@@ -84,10 +92,13 @@ destruct_collate(void *t)
 void *
 __collate_load(const char *encoding, __unused locale_t unused)
 {
-	if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0 ||
-	    strncmp(encoding, "C.", 2) == 0) {
+	if (strcmp(encoding, "C") == 0)
 		return (&__xlocale_C_collate);
-	}
+	else if (strcmp(encoding, "POSIX") == 0)
+		return (&__xlocale_POSIX_collate);
+	else if (strcmp(encoding, "C.UTF-8") == 0)
+		return (&__xlocale_CUTF8_collate);
+
 	struct xlocale_collate *table = calloc(sizeof(struct xlocale_collate),
 	    1);
 	if (table == NULL)
diff --git a/lib/libc/tests/locale/Makefile b/lib/libc/tests/locale/Makefile
index 83caf8cb3bd3..86ff742b2c87 100644
--- a/lib/libc/tests/locale/Makefile
+++ b/lib/libc/tests/locale/Makefile
@@ -13,6 +13,7 @@ ATF_TESTS_C+=		mbsnrtowcs_2_test
 ATF_TESTS_C+=		mbsrtowcs_test
 ATF_TESTS_C+=		mbstowcs_2_test
 ATF_TESTS_C+=		mbtowc_2_test
+ATF_TESTS_C+=		newlocale_test
 ATF_TESTS_C+=		towctrans_test
 ATF_TESTS_C+=		wcrtomb_test
 ATF_TESTS_C+=		wcsnrtombs_test
diff --git a/lib/libc/tests/locale/newlocale_test.c b/lib/libc/tests/locale/newlocale_test.c
new file mode 100644
index 000000000000..cb0ebb323a42
--- /dev/null
+++ b/lib/libc/tests/locale/newlocale_test.c
@@ -0,0 +1,111 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright 2023 Yuri Pankov <yuripv@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+
+#include <locale.h>
+
+#include <atf-c.h>
+
+struct {
+	int		lpmask;
+	const char	*lpname;
+} lparts[] = {
+	{ LC_COLLATE_MASK,	"LC_COLLATE" },
+	{ LC_CTYPE_MASK,	"LC_CTYPE" },
+	{ LC_MONETARY_MASK,	"LC_MONETARY" },
+	{ LC_NUMERIC_MASK,	"LC_NUMERIC" },
+	{ LC_TIME_MASK,		"LC_TIME" },
+	{ LC_MESSAGES_MASK,	"LC_MESSAGES" },
+};
+
+static void
+check_lparts(const char *expected)
+{
+	int i;
+
+	for (i = 0; i < nitems(lparts); i++) {
+		const char *actual;
+
+		actual = querylocale(lparts[i].lpmask, uselocale(NULL));
+		ATF_CHECK_STREQ_MSG(expected, actual, "wrong value for %s",
+		    lparts[i].lpname);
+	}
+}
+
+static void
+do_locale_switch(const char *loc1, const char *loc2)
+{
+	locale_t l1, l2;
+
+	/* Create and use the first locale */
+	l1 = newlocale(LC_ALL_MASK, loc1, NULL);
+	ATF_REQUIRE(l1 != NULL);
+	ATF_REQUIRE(uselocale(l1) != NULL);
+	check_lparts(loc1);
+	/*
+	 * Create and use second locale, creation deliberately done only after
+	 * the first locale check as newlocale() call would previously clobber
+	 * the first locale contents.
+	 */
+	l2 = newlocale(LC_ALL_MASK, loc2, NULL);
+	ATF_REQUIRE(l2 != NULL);
+	ATF_REQUIRE(uselocale(l2) != NULL);
+	check_lparts(loc2);
+	/* Switch back to first locale */
+	ATF_REQUIRE(uselocale(l1) != NULL);
+	check_lparts(loc1);
+
+	freelocale(l1);
+	freelocale(l2);
+}
+
+/*
+ * PR 255646, 269375: Check that newlocale()/uselocale() used to switch between
+ * C, POSIX, and C.UTF-8 locales (and only these) do not stomp on other locale
+ * contents (collate part specifically).
+ * The issue is cosmetic only as all three have empty collate parts, but we need
+ * to correctly report the one in use in any case.
+ */
+
+ATF_TC_WITHOUT_HEAD(newlocale_c_posix_cu8_test);
+ATF_TC_BODY(newlocale_c_posix_cu8_test, tc)
+{
+	do_locale_switch("C", "POSIX");
+	do_locale_switch("C", "C.UTF-8");
+	do_locale_switch("POSIX", "C");
+	do_locale_switch("POSIX", "C.UTF-8");
+	do_locale_switch("C.UTF-8", "C");
+	do_locale_switch("C.UTF-8", "POSIX");
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+	ATF_TP_ADD_TC(tp, newlocale_c_posix_cu8_test);
+
+	return (atf_no_error());
+}