svn commit: r318571 - in head: contrib/netbsd-tests/usr.bin/grep usr.bin/grep
Ed Maste
emaste at FreeBSD.org
Sat May 20 03:51:33 UTC 2017
Author: emaste
Date: Sat May 20 03:51:31 2017
New Revision: 318571
URL: https://svnweb.freebsd.org/changeset/base/318571
Log:
bsdgrep: emit more than MAX_LINE_MATCHES per line
We should not set an arbitrary cap on the number of matches on a line,
and in any case MAX_LINE_MATCHES of 32 is much too low. Instead, if we
match more than MAX_LINE_MATCHES, keep processing and matching from the
last match until all are found.
For the regression test, we produce 4096 matches (larger than we expect
we'll ever set MAX_LINE_MATCHES) and make sure we actually get 4096
lines of output with the -o flag.
We'll also make sure that every distinct line is getting its own line
number to detect line metadata not being printed as appropriate along
the way.
PR: 218811
Submitted by: Kyle Evans <kevans91 at ksu.edu>
Reported by: jbeich
Reviewed by: cem
Differential Revision: https://reviews.freebsd.org/D10577
Modified:
head/contrib/netbsd-tests/usr.bin/grep/t_grep.sh
head/usr.bin/grep/util.c
Modified: head/contrib/netbsd-tests/usr.bin/grep/t_grep.sh
==============================================================================
--- head/contrib/netbsd-tests/usr.bin/grep/t_grep.sh Sat May 20 01:04:47 2017 (r318570)
+++ head/contrib/netbsd-tests/usr.bin/grep/t_grep.sh Sat May 20 03:51:31 2017 (r318571)
@@ -413,6 +413,26 @@ wflag_emptypat_body()
atf_check -o file:test4 grep -w -e "" test4
}
+atf_test_case excessive_matches
+excessive_matches_head()
+{
+ atf_set "descr" "Check for proper handling of lines with excessive matches (PR 218811)"
+}
+excessive_matches_body()
+{
+ grep_type
+ if [ $? -eq $GREP_TYPE_GNU_FREEBSD ]; then
+ atf_expect_fail "this test does not pass with GNU grep in base"
+ fi
+
+ for i in $(jot 4096); do
+ printf "x" >> test.in
+ done
+
+ atf_check -s exit:0 -x '[ $(grep -o x test.in | wc -l) -eq 4096 ]'
+ #atf_check -s exit:1 -x 'grep -on x test.in | grep -v "1:x"'
+}
+
atf_test_case fgrep_sanity
fgrep_sanity_head()
{
@@ -603,6 +623,7 @@ atf_init_test_cases()
atf_add_test_case egrep_empty_invalid
atf_add_test_case zerolen
atf_add_test_case wflag_emptypat
+ atf_add_test_case excessive_matches
atf_add_test_case wv_combo_break
atf_add_test_case fgrep_sanity
atf_add_test_case egrep_sanity
Modified: head/usr.bin/grep/util.c
==============================================================================
--- head/usr.bin/grep/util.c Sat May 20 01:04:47 2017 (r318570)
+++ head/usr.bin/grep/util.c Sat May 20 03:51:31 2017 (r318571)
@@ -63,6 +63,7 @@ static bool first_match = true;
struct parsec {
regmatch_t matches[MAX_LINE_MATCHES]; /* Matches made */
struct str ln; /* Current line */
+ size_t lnstart; /* Start of line processing */
size_t matchidx; /* Latest used match index */
bool binary; /* Binary file? */
};
@@ -247,8 +248,9 @@ procfile(const char *fn)
mcount = mlimit;
for (c = 0; c == 0 || !(lflag || qflag); ) {
- /* Reset match count for every line processed */
+ /* Reset match count and line start for every line processed */
pc.matchidx = 0;
+ pc.lnstart = 0;
pc.ln.off += pc.ln.len + 1;
if ((pc.ln.dat = grep_fgetln(f, &pc.ln.len)) == NULL ||
pc.ln.len == 0) {
@@ -288,6 +290,14 @@ procfile(const char *fn)
/* Print the matching line, but only if not quiet/binary */
if (t == 0 && printmatch) {
printline(&pc, ':');
+ while (pc.matchidx >= MAX_LINE_MATCHES) {
+ /* Reset matchidx and try again */
+ pc.matchidx = 0;
+ if (procline(&pc) == 0)
+ printline(&pc, ':');
+ else
+ break;
+ }
first_match = false;
same_file = true;
last_outed = 0;
@@ -356,11 +366,11 @@ procline(struct parsec *pc)
{
regmatch_t pmatch, lastmatch, chkmatch;
wchar_t wbegin, wend;
- size_t st = 0, nst = 0;
+ size_t st, nst;
unsigned int i;
int c = 0, r = 0, lastmatches = 0, leflags = eflags;
size_t startm = 0, matchidx;
- int retry;
+ unsigned int retry;
matchidx = pc->matchidx;
@@ -376,6 +386,8 @@ procline(struct parsec *pc)
} else if (matchall)
return (0);
+ st = pc->lnstart;
+ nst = 0;
/* Initialize to avoid a false positive warning from GCC. */
lastmatch.rm_so = lastmatch.rm_eo = 0;
@@ -432,12 +444,12 @@ procline(struct parsec *pc)
* still match a whole word.
*/
if (r == REG_NOMATCH &&
- (retry == 0 || pmatch.rm_so + 1 < retry))
+ (retry == pc->lnstart ||
+ pmatch.rm_so + 1 < retry))
retry = pmatch.rm_so + 1;
if (r == REG_NOMATCH)
continue;
}
-
lastmatches++;
lastmatch = pmatch;
@@ -466,8 +478,11 @@ procline(struct parsec *pc)
}
/* avoid excessive matching - skip further patterns */
if ((color == NULL && !oflag) || qflag || lflag ||
- matchidx >= MAX_LINE_MATCHES)
+ matchidx >= MAX_LINE_MATCHES) {
+ pc->lnstart = nst;
+ lastmatches = 0;
break;
+ }
}
/*
@@ -475,7 +490,7 @@ procline(struct parsec *pc)
* again just in case we still have a chance to match later in
* the string.
*/
- if (lastmatches == 0 && retry > 0) {
+ if (lastmatches == 0 && retry > pc->lnstart) {
st = retry;
continue;
}
@@ -497,6 +512,7 @@ procline(struct parsec *pc)
/* Advance st based on previous matches */
st = nst;
+ pc->lnstart = st;
}
/* Reflect the new matchidx in the context */
More information about the svn-src-head
mailing list