git: 8bf187f35b62 - main - diff: fix side-by-side output with tabbed input

From: Kyle Evans <kevans_at_FreeBSD.org>
Date: Wed, 14 Dec 2022 01:33:30 UTC
The branch main has been updated by kevans:

URL: https://cgit.FreeBSD.org/src/commit/?id=8bf187f35b6298b7848c5ecf45b0b714327090d9

commit 8bf187f35b6298b7848c5ecf45b0b714327090d9
Author:     Kyle Evans <kevans@FreeBSD.org>
AuthorDate: 2022-12-14 01:31:21 +0000
Commit:     Kyle Evans <kevans@FreeBSD.org>
CommitDate: 2022-12-14 01:31:21 +0000

    diff: fix side-by-side output with tabbed input
    
    The previous logic conflated some things... in this block:
    - j: input characters rendered so far
    - nc: number of characters in the line
    - col: columns rendered so far
    - hw: column width ((h)ard (w)idth?)
    
    Comparing j to hw or col to nc are naturally wrong, as col and hw are
    limits on their respective counters and nc is already brought down to hw
    if the input line should be truncated to start with.
    
    Right now, we end up easily truncating lines with tabs in them as we
    count each tab for $tabwidth lines in the input line, but we really
    should only be accounting for them in the column count.  The problem is
    most easily demonstrated by the two input files added for the tests,
    the two tabbed lines lose at least a word or two even though there's
    plenty of space left in the row for each side.
    
    Reviewed by:    bapt, pstef
    Sponsored by:   Klara, Inc.
    Differential Revision:  https://reviews.freebsd.org/D37676
---
 usr.bin/diff/diffreg.c                      | 14 ++++++--------
 usr.bin/diff/tests/Makefile                 |  2 ++
 usr.bin/diff/tests/diff_test.sh             | 19 +++++++++++++++++++
 usr.bin/diff/tests/side_by_side_tabbed_a.in |  4 ++++
 usr.bin/diff/tests/side_by_side_tabbed_b.in |  4 ++++
 5 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/usr.bin/diff/diffreg.c b/usr.bin/diff/diffreg.c
index def8a4e05974..1247317951d3 100644
--- a/usr.bin/diff/diffreg.c
+++ b/usr.bin/diff/diffreg.c
@@ -1247,7 +1247,8 @@ fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
 				printf(" ");
 		}
 		col = 0;
-		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
+		for (j = 0, lastc = '\0'; j < nc && (hw == 0 || col < hw);
+		    j++, lastc = c) {
 			c = getc(lb);
 			if (flags & D_STRIPCR && c == '\r') {
 				if ((c = getc(lb)) == '\n')
@@ -1274,19 +1275,16 @@ fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
 				if (flags & D_EXPANDTABS) {
 					newcol = ((col / tabsize) + 1) * tabsize;
 					do {
-						if (diff_format == D_SIDEBYSIDE)
-							j++;
 						printf(" ");
-					} while (++col < newcol && j < nc);
+					} while (++col < newcol && col < hw);
 				} else {
 					if (diff_format == D_SIDEBYSIDE) {
-						if ((j + tabsize) > nc) {
-							printf("%*s", nc - j, "");
-							j = col = nc;
+						if ((col + tabsize) > hw) {
+							printf("%*s", hw - col, "");
+							col = hw;
 						} else {
 							printf("\t");
 							col += tabsize - 1;
-							j += tabsize - 1;
 						}
 					} else {
 						printf("\t");
diff --git a/usr.bin/diff/tests/Makefile b/usr.bin/diff/tests/Makefile
index fbfac0c8372d..9c4639fe9a98 100644
--- a/usr.bin/diff/tests/Makefile
+++ b/usr.bin/diff/tests/Makefile
@@ -12,6 +12,8 @@ ${PACKAGE}FILES+=	\
 	input2.in \
 	input_c1.in \
 	input_c2.in \
+	side_by_side_tabbed_a.in \
+	side_by_side_tabbed_b.in \
 	simple.out \
 	simple_e.out \
 	simple_n.out \
diff --git a/usr.bin/diff/tests/diff_test.sh b/usr.bin/diff/tests/diff_test.sh
index e0968706acfb..fc82d1431a8d 100755
--- a/usr.bin/diff/tests/diff_test.sh
+++ b/usr.bin/diff/tests/diff_test.sh
@@ -7,6 +7,7 @@ atf_test_case header_ns
 atf_test_case ifdef
 atf_test_case group_format
 atf_test_case side_by_side
+atf_test_case side_by_side_tabbed
 atf_test_case brief_format
 atf_test_case b230049
 atf_test_case stripcr_o
@@ -145,6 +146,23 @@ side_by_side_body()
 	    diff -W 65 -y --suppress-common-lines A B
 }
 
+side_by_side_tabbed_body()
+{
+	file_a=$(atf_get_srcdir)/side_by_side_tabbed_a.in
+	file_b=$(atf_get_srcdir)/side_by_side_tabbed_b.in
+
+	atf_check -o save:diffout -s not-exit:0 \
+	    diff -y ${file_a} ${file_b}
+	atf_check -o save:diffout_expanded -s not-exit:0 \
+	    diff -yt ${file_a} ${file_b}
+
+	atf_check -o not-empty grep -Ee 'file A.+file B' diffout
+	atf_check -o not-empty grep -Ee 'file A.+file B' diffout_expanded
+
+	atf_check -o not-empty grep -Ee 'tabs.+tabs' diffout
+	atf_check -o not-empty grep -Ee 'tabs.+tabs' diffout_expanded
+}
+
 brief_format_body()
 {
 	atf_check mkdir A B
@@ -343,6 +361,7 @@ atf_init_test_cases()
 	atf_add_test_case ifdef
 	atf_add_test_case group_format
 	atf_add_test_case side_by_side
+	atf_add_test_case side_by_side_tabbed
 	atf_add_test_case brief_format
 	atf_add_test_case b230049
 	atf_add_test_case stripcr_o
diff --git a/usr.bin/diff/tests/side_by_side_tabbed_a.in b/usr.bin/diff/tests/side_by_side_tabbed_a.in
new file mode 100644
index 000000000000..3198d29f9203
--- /dev/null
+++ b/usr.bin/diff/tests/side_by_side_tabbed_a.in
@@ -0,0 +1,4 @@
+This
+	is my test file A
+	it has tabs
+Thanks
diff --git a/usr.bin/diff/tests/side_by_side_tabbed_b.in b/usr.bin/diff/tests/side_by_side_tabbed_b.in
new file mode 100644
index 000000000000..6698182a8c5d
--- /dev/null
+++ b/usr.bin/diff/tests/side_by_side_tabbed_b.in
@@ -0,0 +1,4 @@
+This
+	is my test file B
+	it has tabs
+Thanks