git: a420e1b1cee6 - stable/14 - grep: avoid duplicated lines when we're coloring output
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 17 Apr 2025 01:05:55 UTC
The branch stable/14 has been updated by kevans: URL: https://cgit.FreeBSD.org/src/commit/?id=a420e1b1cee696f6f0fdeacdc04fd4f1e992234b commit a420e1b1cee696f6f0fdeacdc04fd4f1e992234b Author: Kyle Evans <kevans@FreeBSD.org> AuthorDate: 2025-03-20 04:34:13 +0000 Commit: Kyle Evans <kevans@FreeBSD.org> CommitDate: 2025-04-17 01:01:29 +0000 grep: avoid duplicated lines when we're coloring output For the default uncolored output, we'll just output a line once and then move on. For colored output, we'll output multiple matches per line with context from the line interspersed and may end up writing out some match context multiple times as we don't persist which part of the lines have already been printed. Fix it by tracking the length of line printed thus far in printline() and retaining it across successive calls to printline() in the same line. printline() should indicate whether it terminated the line or not to avoid tracking the logic for that in multiple places: -o lines are always terminated, so it's generally only some --color contexts where we wouldn't have terminated. Add a test to make sure that we're only printing one line going forward. Reported and tested by: Jamie Landeg-Jones <jamie catflap org> Reviewed by: emaste (cherry picked from commit 4c9ffb13dd74159bd3ed7e1c4c706dbd15a70df2) --- usr.bin/grep/tests/grep_freebsd_test.sh | 15 +++++++ usr.bin/grep/util.c | 72 +++++++++++++++++++++++++++------ 2 files changed, 74 insertions(+), 13 deletions(-) diff --git a/usr.bin/grep/tests/grep_freebsd_test.sh b/usr.bin/grep/tests/grep_freebsd_test.sh index 77017529843e..906b70645151 100755 --- a/usr.bin/grep/tests/grep_freebsd_test.sh +++ b/usr.bin/grep/tests/grep_freebsd_test.sh @@ -103,10 +103,25 @@ zflag_body() atf_check grep -qz "foo.*bar" in } +atf_test_case color_dupe +color_dupe_body() +{ + + # This assumes a MAX_MATCHES of exactly 32. Previously buggy procline() + # calls would terminate the line premature every MAX_MATCHES matches, + # meaning we'd see the line be output again for the next MAX_MATCHES + # number of matches. + jot -nb 'A' -s '' 33 > in + + atf_check -o save:color.out grep --color=always . in + atf_check -o match:"^ +1 color.out" wc -l color.out +} + atf_init_test_cases() { atf_add_test_case grep_r_implied atf_add_test_case rgrep atf_add_test_case gnuext atf_add_test_case zflag + atf_add_test_case color_dupe } diff --git a/usr.bin/grep/util.c b/usr.bin/grep/util.c index 4e1c44b442f2..ed87e56956f6 100644 --- a/usr.bin/grep/util.c +++ b/usr.bin/grep/util.c @@ -72,7 +72,7 @@ static int litexec(const struct pat *pat, const char *string, size_t nmatch, regmatch_t pmatch[]); #endif static bool procline(struct parsec *pc); -static void printline(struct parsec *pc, int sep); +static bool printline(struct parsec *pc, int sep, size_t *last_out); static void printline_metadata(struct str *line, int sep); bool @@ -214,15 +214,29 @@ procmatch_match(struct mprintc *mc, struct parsec *pc) /* Print the matching line, but only if not quiet/binary */ if (mc->printmatch) { - printline(pc, ':'); + size_t last_out; + bool terminated; + + last_out = 0; + terminated = printline(pc, ':', &last_out); while (pc->matchidx >= MAX_MATCHES) { /* Reset matchidx and try again */ pc->matchidx = 0; if (procline(pc) == !vflag) - printline(pc, ':'); + terminated = printline(pc, ':', &last_out); else break; } + + /* + * The above loop processes the entire line as long as we keep + * hitting the maximum match count. At this point, we know + * that there's nothing left to be printed and can terminate the + * line. + */ + if (!terminated) + printline(pc, ':', &last_out); + first_match = false; mc->same_file = true; mc->last_outed = 0; @@ -748,26 +762,39 @@ printline_metadata(struct str *line, int sep) } /* - * Prints a matching line according to the command line options. + * Prints a matching line according to the command line options. We need + * *last_out to be populated on entry in case this is just a continuation of + * matches within the same line. + * + * Returns true if the line was terminated, false if it was not. */ -static void -printline(struct parsec *pc, int sep) +static bool +printline(struct parsec *pc, int sep, size_t *last_out) { - size_t a = 0; + size_t a = *last_out; size_t i, matchidx; regmatch_t match; + bool terminated; + + /* + * Nearly all paths below will terminate the line by default, but it is + * avoided in some circumstances in case we don't have the full context + * available here. + */ + terminated = true; /* If matchall, everything matches but don't actually print for -o */ if (oflag && matchall) - return; + return (terminated); matchidx = pc->matchidx; /* --color and -o */ - if ((oflag || color) && matchidx > 0) { + if ((oflag || color) && (pc->printed > 0 || matchidx > 0)) { /* Only print metadata once per line if --color */ - if (!oflag && pc->printed == 0) + if (!oflag && pc->printed == 0) { printline_metadata(&pc->ln, sep); + } for (i = 0; i < matchidx; i++) { match = pc->matches[i]; /* Don't output zero length matches */ @@ -780,9 +807,10 @@ printline(struct parsec *pc, int sep) if (oflag) { pc->ln.boff = match.rm_so; printline_metadata(&pc->ln, sep); - } else + } else { fwrite(pc->ln.dat + a, match.rm_so - a, 1, stdout); + } if (color) fprintf(stdout, "\33[%sm\33[K", color); fwrite(pc->ln.dat + match.rm_so, @@ -793,13 +821,31 @@ printline(struct parsec *pc, int sep) if (oflag) putchar('\n'); } - if (!oflag) { - if (pc->ln.len - a > 0) + + /* + * Don't terminate if we reached the match limit; we may have + * other matches on this line to process. + */ + *last_out = a; + if (!oflag && matchidx != MAX_MATCHES) { + if (pc->ln.len - a > 0) { fwrite(pc->ln.dat + a, pc->ln.len - a, 1, stdout); + *last_out = pc->ln.len; + } putchar('\n'); + } else if (!oflag) { + /* + * -o is terminated on every match output, so this + * branch is only designed to capture MAX_MATCHES in a + * line which may be a signal to us for a lack of + * context. The caller will know more and call us again + * to terminate if it needs to. + */ + terminated = false; } } else grep_printline(&pc->ln, sep); pc->printed++; + return (terminated); }