git: 2f489a509e61 - main - libc: fix some overflow scenarios in vis(3)

From: Kyle Evans <kevans_at_FreeBSD.org>
Date: Tue, 08 Aug 2023 17:02:06 UTC
The branch main has been updated by kevans:

URL: https://cgit.FreeBSD.org/src/commit/?id=2f489a509e615c46be6f7c6aa7cea161f50f18af

commit 2f489a509e615c46be6f7c6aa7cea161f50f18af
Author:     Kyle Evans <kevans@FreeBSD.org>
AuthorDate: 2023-08-08 17:01:28 +0000
Commit:     Kyle Evans <kevans@FreeBSD.org>
CommitDate: 2023-08-08 17:01:52 +0000

    libc: fix some overflow scenarios in vis(3)
    
    The previous incarnation of this would call wcrtomb() on the destination
    buffer, and only check for overflow *after* it's happened.
    Additionally, the conversion error / VIS_NOLOCALE path also didn't check
    for overflow, and the overflow check at the end didn't account for the
    fact that we still need to write a NUL terminator afterward.
    
    Start by only doing the multibyte conversion into mbdst directly if we
    have enough buffer space to guarantee it'll fit.  An additional
    MB_CUR_MAX buffer has been stashed on the stack to write into if we're
    cutting it close at the end of the buffer, since we don't really have a
    good way to determine the length of the wchar_t without just doing the
    conversion.  We'll do the conversion into the buffer that's guaranteed
    to fit, then copy it over if the copy won't overflow.
    
    The byte-for-byte overflow is a little bit easier, as we simply check
    for overflow with each byte written and make sure we can still NUL
    terminate after.
    
    Tests added to exercise these edge cases.
    
    Reviewed by:    des
    Sponsored by:   Klara, Inc.
    Differential Revision:  https://reviews.freebsd.org/D41328
---
 contrib/libc-vis/vis.c                    | 54 +++++++++++++++++++++----
 contrib/netbsd-tests/lib/libc/gen/t_vis.c | 66 +++++++++++++++++++++++++++++++
 2 files changed, 112 insertions(+), 8 deletions(-)

diff --git a/contrib/libc-vis/vis.c b/contrib/libc-vis/vis.c
index c43186a44b51..fe3939448087 100644
--- a/contrib/libc-vis/vis.c
+++ b/contrib/libc-vis/vis.c
@@ -395,14 +395,16 @@ static int
 istrsenvisx(char **mbdstp, size_t *dlen, const char *mbsrc, size_t mblength,
     int flags, const char *mbextra, int *cerr_ptr)
 {
+	char mbbuf[MB_CUR_MAX];
 	wchar_t *dst, *src, *pdst, *psrc, *start, *extra;
 	size_t len, olen;
 	uint64_t bmsk, wmsk;
 	wint_t c;
 	visfun_t f;
 	int clen = 0, cerr, error = -1, i, shft;
-	char *mbdst, *mdst;
-	ssize_t mbslength, maxolen;
+	char *mbdst, *mbwrite, *mdst;
+	ssize_t mbslength;
+	size_t maxolen;
 	mbstate_t mbstate;
 
 	_DIAGASSERT(mbdstp != NULL);
@@ -541,8 +543,33 @@ istrsenvisx(char **mbdstp, size_t *dlen, const char *mbsrc, size_t mblength,
 	olen = 0;
 	bzero(&mbstate, sizeof(mbstate));
 	for (dst = start; len > 0; len--) {
-		if (!cerr)
-			clen = wcrtomb(mbdst, *dst, &mbstate);
+		if (!cerr) {
+			/*
+			 * If we have at least MB_CUR_MAX bytes in the buffer,
+			 * we'll just do the conversion in-place into mbdst.  We
+			 * need to be a little more conservative when we get to
+			 * the end of the buffer, as we may not have MB_CUR_MAX
+			 * bytes but we may not need it.
+			 */
+			if (maxolen - olen > MB_CUR_MAX)
+				mbwrite = mbdst;
+			else
+				mbwrite = mbbuf;
+			clen = wcrtomb(mbwrite, *dst, &mbstate);
+			if (clen > 0 && mbwrite != mbdst) {
+				/*
+				 * Don't break past our output limit, noting
+				 * that maxolen includes the nul terminator so
+				 * we can't write past maxolen - 1 here.
+				 */
+				if (olen + clen >= maxolen) {
+					errno = ENOSPC;
+					goto out;
+				}
+
+				memcpy(mbdst, mbwrite, clen);
+			}
+		}
 		if (cerr || clen < 0) {
 			/*
 			 * Conversion error, process as a byte(s) instead.
@@ -557,16 +584,27 @@ istrsenvisx(char **mbdstp, size_t *dlen, const char *mbsrc, size_t mblength,
 				shft = i * NBBY;
 				bmsk = (uint64_t)0xffLL << shft;
 				wmsk |= bmsk;
-				if ((*dst & wmsk) || i == 0)
+				if ((*dst & wmsk) || i == 0) {
+					if (olen + clen + 1 >= maxolen) {
+						errno = ENOSPC;
+						goto out;
+					}
+
 					mbdst[clen++] = (char)(
 					    (uint64_t)(*dst & bmsk) >>
 					    shft);
+				}
 			}
 			cerr = 1;
 		}
-		/* If this character would exceed our output limit, stop. */
-		if (olen + clen > (size_t)maxolen)
-			break;
+
+		/*
+		 * We'll be dereferencing mbdst[clen] after this to write the
+		 * nul terminator; the above paths should have checked for a
+		 * possible overflow already.
+		 */
+		assert(olen + clen < maxolen);
+
 		/* Advance output pointer by number of bytes written. */
 		mbdst += clen;
 		/* Advance buffer character pointer. */
diff --git a/contrib/netbsd-tests/lib/libc/gen/t_vis.c b/contrib/netbsd-tests/lib/libc/gen/t_vis.c
index adb0930a300a..80800bf8b31f 100644
--- a/contrib/netbsd-tests/lib/libc/gen/t_vis.c
+++ b/contrib/netbsd-tests/lib/libc/gen/t_vis.c
@@ -175,6 +175,68 @@ ATF_TC_BODY(strvis_locale, tc)
 }
 #endif /* VIS_NOLOCALE */
 
+#ifdef __FreeBSD__
+#define	STRVIS_OVERFLOW_MARKER	0xff	/* Arbitrary */
+
+ATF_TC(strvis_overflow_mb);
+ATF_TC_HEAD(strvis_overflow_mb, tc)
+{
+	atf_tc_set_md_var(tc, "descr", "Test strvis(3) multi-byte overflow");
+}
+
+ATF_TC_BODY(strvis_overflow_mb, tc)
+{
+	const char src[] = "\xf0\x9f\xa5\x91";
+	/* Extra byte to detect overflow */
+	char dst[sizeof(src) + 1];
+	int n;
+
+	setlocale(LC_CTYPE, "en_US.UTF-8");
+
+	/* Arbitrary */
+	memset(dst, STRVIS_OVERFLOW_MARKER, sizeof(dst));
+
+	/*
+	 * If we only provide four bytes of buffer, we shouldn't be able encode
+	 * a full 4-byte sequence.
+	 */
+	n = strnvis(dst, 4, src, VIS_SAFE);
+	ATF_REQUIRE(dst[4] == STRVIS_OVERFLOW_MARKER);
+	ATF_REQUIRE(n == -1);
+
+	n = strnvis(dst, sizeof(src), src, VIS_SAFE);
+	ATF_REQUIRE(n == sizeof(src) - 1);
+}
+
+ATF_TC(strvis_overflow_c);
+ATF_TC_HEAD(strvis_overflow_c, tc)
+{
+	atf_tc_set_md_var(tc, "descr", "Test strvis(3) C locale overflow");
+}
+
+ATF_TC_BODY(strvis_overflow_c, tc)
+{
+	const char src[] = "AAAA";
+	/* Extra byte to detect overflow */
+	char dst[sizeof(src) + 1];
+	int n;
+
+	/* Arbitrary */
+	memset(dst, STRVIS_OVERFLOW_MARKER, sizeof(dst));
+
+	/*
+	 * If we only provide four bytes of buffer, we shouldn't be able encode
+	 * 4 bytes of input.
+	 */
+	n = strnvis(dst, 4, src, VIS_SAFE | VIS_NOLOCALE);
+	ATF_REQUIRE(dst[4] == STRVIS_OVERFLOW_MARKER);
+	ATF_REQUIRE(n == -1);
+
+	n = strnvis(dst, sizeof(src), src, VIS_SAFE | VIS_NOLOCALE);
+	ATF_REQUIRE(n == sizeof(src) - 1);
+}
+#endif /* __FreeBSD__ */
+
 ATF_TP_ADD_TCS(tp)
 {
 
@@ -185,6 +247,10 @@ ATF_TP_ADD_TCS(tp)
 #ifdef VIS_NOLOCALE
 	ATF_TP_ADD_TC(tp, strvis_locale);
 #endif /* VIS_NOLOCALE */
+#ifdef __FreeBSD__
+	ATF_TP_ADD_TC(tp, strvis_overflow_mb);
+	ATF_TP_ADD_TC(tp, strvis_overflow_c);
+#endif
 
 	return atf_no_error();
 }