git: 9317242469f1 - main - diff: Fix integer overflow.

From: Dag-Erling Smørgrav <des_at_FreeBSD.org>
Date: Mon, 29 Jul 2024 14:02:55 UTC
The branch main has been updated by des:

URL: https://cgit.FreeBSD.org/src/commit/?id=9317242469f1ca682626d9806f8caf65d143c09a

commit 9317242469f1ca682626d9806f8caf65d143c09a
Author:     Dag-Erling Smørgrav <des@FreeBSD.org>
AuthorDate: 2024-07-29 14:02:29 +0000
Commit:     Dag-Erling Smørgrav <des@FreeBSD.org>
CommitDate: 2024-07-29 14:02:29 +0000

    diff: Fix integer overflow.
    
    The legacy Stone algorithm uses `int` to represent line numbers, array
    indices, and array lengths.  If given inputs approaching `INT_MAX` lines,
    it would overflow and attempt to allocate ridiculously large amounts of
    memory.  To avoid this without penalizing non-pathological inputs,
    switch a few variables to `size_t` and add checks while and immediately
    after reading both inputs.
    
    MFC after:      3 days
    PR:             280371
    Sponsored by:   Klara, Inc.
    Reviewed by:    allanjude
    Differential Revision:  https://reviews.freebsd.org/D46169
---
 usr.bin/diff/diffreg.c | 45 ++++++++++++++++++++++++---------------------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/usr.bin/diff/diffreg.c b/usr.bin/diff/diffreg.c
index 6073feebf5fb..ffa5568bf442 100644
--- a/usr.bin/diff/diffreg.c
+++ b/usr.bin/diff/diffreg.c
@@ -203,9 +203,9 @@ static int	 *klist;		/* will be overlaid on file[0] after class */
 static int	 *member;		/* will be overlaid on file[1] */
 static int	 clen;
 static int	 inifdef;		/* whether or not we are in a #ifdef block */
-static int	 len[2];
-static int	 pref, suff;	/* length of prefix and suffix */
-static int	 slen[2];
+static size_t	 len[2];		/* lengths of files in lines */
+static size_t	 pref, suff;		/* lengths of prefix and suffix */
+static size_t	 slen[2];		/* lengths of files minus pref / suff */
 static int	 anychange;
 static int	 hw, lpad,rpad;		/* half width and padding */
 static int	 edoffset;
@@ -425,6 +425,10 @@ diffreg_stone(char *file1, char *file2, int flags, int capsicum)
 		status |= 1;
 		goto closem;
 	}
+	if (len[0] > INT_MAX - 2)
+		errc(1, EFBIG, "%s", file1);
+	if (len[1] > INT_MAX - 2)
+		errc(1, EFBIG, "%s", file2);
 
 	prune();
 	sort(sfile[0], slen[0]);
@@ -550,18 +554,17 @@ prepare(int i, FILE *fd, size_t filesize, int flags)
 		sz = 100;
 
 	p = xcalloc(sz + 3, sizeof(*p));
-	while ((r = readhash(fd, flags, &h)) != RH_EOF)
-		switch (r) {
-		case RH_EOF: /* otherwise clang complains */
-		case RH_BINARY:
+	while ((r = readhash(fd, flags, &h)) != RH_EOF) {
+		if (r == RH_BINARY)
 			return (false);
-		case RH_OK:
-			if (j == sz) {
-				sz = sz * 3 / 2;
-				p = xreallocarray(p, sz + 3, sizeof(*p));
-			}
-			p[++j].value = h;
+		if (j == SIZE_MAX)
+			break;
+		if (j == sz) {
+			sz = sz * 3 / 2;
+			p = xreallocarray(p, sz + 3, sizeof(*p));
 		}
+		p[++j].value = h;
+	}
 
 	len[i] = j;
 	file[i] = p;
@@ -572,7 +575,7 @@ prepare(int i, FILE *fd, size_t filesize, int flags)
 static void
 prune(void)
 {
-	int i, j;
+	size_t i, j;
 
 	for (pref = 0; pref < len[0] && pref < len[1] &&
 	    file[0][pref + 1].value == file[1][pref + 1].value;
@@ -710,7 +713,7 @@ static void
 unravel(int p)
 {
 	struct cand *q;
-	int i;
+	size_t i;
 
 	for (i = 0; i <= len[0]; i++)
 		J[i] = i <= pref ? i :
@@ -737,7 +740,7 @@ check(FILE *f1, FILE *f2, int flags)
 	ixold[0] = ixnew[0] = 0;
 	/* jackpot = 0; */
 	ctold = ctnew = 0;
-	for (i = 1; i <= len[0]; i++) {
+	for (i = 1; i <= (int)len[0]; i++) {
 		if (J[i] == 0) {
 			ixold[i] = ctold += skipline(f1);
 			continue;
@@ -837,7 +840,7 @@ check(FILE *f1, FILE *f2, int flags)
 		ixnew[j] = ctnew;
 		j++;
 	}
-	for (; j <= len[1]; j++) {
+	for (; j <= (int)len[1]; j++) {
 		ixnew[j] = ctnew += skipline(f2);
 	}
 	/*
@@ -1504,9 +1507,9 @@ dump_context_vec(FILE *f1, FILE *f2, int flags)
 
 	b = d = 0;		/* gcc */
 	lowa = MAX(1, cvp->a - diff_context);
-	upb = MIN(len[0], context_vec_ptr->b + diff_context);
+	upb = MIN((int)len[0], context_vec_ptr->b + diff_context);
 	lowc = MAX(1, cvp->c - diff_context);
-	upd = MIN(len[1], context_vec_ptr->d + diff_context);
+	upd = MIN((int)len[1], context_vec_ptr->d + diff_context);
 
 	printf("***************");
 	if (flags & (D_PROTOTYPE | D_MATCHLAST)) {
@@ -1607,9 +1610,9 @@ dump_unified_vec(FILE *f1, FILE *f2, int flags)
 
 	b = d = 0;		/* gcc */
 	lowa = MAX(1, cvp->a - diff_context);
-	upb = MIN(len[0], context_vec_ptr->b + diff_context);
+	upb = MIN((int)len[0], context_vec_ptr->b + diff_context);
 	lowc = MAX(1, cvp->c - diff_context);
-	upd = MIN(len[1], context_vec_ptr->d + diff_context);
+	upd = MIN((int)len[1], context_vec_ptr->d + diff_context);
 
 	printf("@@ -");
 	uni_range(lowa, upb);