git: 574d0dfae501 - stable/13 - grep: fix null pattern and empty pattern file behavior
Kyle Evans
kevans at FreeBSD.org
Thu Feb 11 02:50:33 UTC 2021
The branch stable/13 has been updated by kevans:
URL: https://cgit.FreeBSD.org/src/commit/?id=574d0dfae5011a766aa967f1d1675ddf7b535936
commit 574d0dfae5011a766aa967f1d1675ddf7b535936
Author: Kyle Evans <kevans at FreeBSD.org>
AuthorDate: 2021-02-04 21:26:45 +0000
Commit: Kyle Evans <kevans at FreeBSD.org>
CommitDate: 2021-02-11 02:49:44 +0000
grep: fix null pattern and empty pattern file behavior
The null pattern semantics were terrible because I tried to match gnugrep,
but I got it wrong. Let's unwind that:
- The null pattern should match every line if neither -w nor -x.
- The null pattern should match empty lines if -x.
- The null pattern should not match any lines if -w.
The first two will stop processing (shortcut) even if additional patterns
are specified. In any other case, we will continue processing other
patterns. If no other patterns are specified beside a null pattern, then
we match if neither -w nor -x or set and do not match if either of those
are specified.
The justification for -w is that it should match on a whole word, but the
null pattern deos not have a whole word to match on.
Empty pattern files should never match anything, and more importantly, -v
should cause everything to be written.
PR: 253209
(cherry picked from commit f823c6dc730b0dd08b54a53be1d8fd587eee7021)
---
contrib/netbsd-tests/usr.bin/grep/t_grep.sh | 22 +++++++++++++++---
usr.bin/grep/grep.c | 11 ---------
usr.bin/grep/util.c | 35 +++++++++++++----------------
3 files changed, 35 insertions(+), 33 deletions(-)
diff --git a/contrib/netbsd-tests/usr.bin/grep/t_grep.sh b/contrib/netbsd-tests/usr.bin/grep/t_grep.sh
index e094b15c6d67..ef3f0617465e 100755
--- a/contrib/netbsd-tests/usr.bin/grep/t_grep.sh
+++ b/contrib/netbsd-tests/usr.bin/grep/t_grep.sh
@@ -489,11 +489,11 @@ wflag_emptypat_body()
atf_check -s exit:1 -o empty grep -w -e "" test1
- atf_check -o file:test2 grep -w -e "" test2
+ atf_check -o file:test2 grep -vw -e "" test2
atf_check -s exit:1 -o empty grep -w -e "" test3
- atf_check -o file:test4 grep -w -e "" test4
+ atf_check -o file:test4 grep -vw -e "" test4
}
atf_test_case xflag_emptypat
@@ -504,7 +504,6 @@ xflag_emptypat_body()
printf "qaz" > test3
printf " qaz\n" > test4
- # -x is whole-line, more strict than -w.
atf_check -s exit:1 -o empty grep -x -e "" test1
atf_check -o file:test2 grep -x -e "" test2
@@ -550,6 +549,22 @@ xflag_emptypat_plus_body()
atf_check -o file:spacelines grep -Fxvf patlist1 target_spacelines
}
+atf_test_case emptyfile
+emptyfile_descr()
+{
+ atf_set "descr" "Check for proper handling of empty pattern files (PR 253209)"
+}
+emptyfile_body()
+{
+ :> epatfile
+ echo "blubb" > subj
+
+ # From PR 253209, bsdgrep was short-circuiting completely on an empty
+ # file, but we should have still been processing lines.
+ atf_check -s exit:1 -o empty fgrep -f epatfile subj
+ atf_check -o file:subj fgrep -vf epatfile subj
+}
+
atf_test_case excessive_matches
excessive_matches_head()
{
@@ -946,6 +961,7 @@ atf_init_test_cases()
atf_add_test_case wflag_emptypat
atf_add_test_case xflag_emptypat
atf_add_test_case xflag_emptypat_plus
+ atf_add_test_case emptyfile
atf_add_test_case excessive_matches
atf_add_test_case wv_combo_break
atf_add_test_case fgrep_sanity
diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c
index 307a91353b66..33541e4fe734 100644
--- a/usr.bin/grep/grep.c
+++ b/usr.bin/grep/grep.c
@@ -69,13 +69,6 @@ const char *errstr[] = {
int cflags = REG_NOSUB | REG_NEWLINE;
int eflags = REG_STARTEND;
-/* XXX TODO: Get rid of this flag.
- * matchall is a gross hack that means that an empty pattern was passed to us.
- * It is a necessary evil at the moment because our regex(3) implementation
- * does not allow for empty patterns, as supported by POSIX's definition of
- * grammar for BREs/EREs. When libregex becomes available, it would be wise
- * to remove this and let regex(3) handle the dirty details of empty patterns.
- */
bool matchall;
/* Searching patterns */
@@ -637,10 +630,6 @@ main(int argc, char *argv[])
aargc -= optind;
aargv += optind;
- /* Empty pattern file matches nothing */
- if (!needpattern && (patterns == 0) && !matchall)
- exit(1);
-
/* Fail if we don't have any pattern */
if (aargc == 0 && needpattern)
usage();
diff --git a/usr.bin/grep/util.c b/usr.bin/grep/util.c
index e517e4eaee6d..f22b7abd79ef 100644
--- a/usr.bin/grep/util.c
+++ b/usr.bin/grep/util.c
@@ -471,31 +471,28 @@ procline(struct parsec *pc)
matchidx = pc->matchidx;
- /*
- * With matchall (empty pattern), we can try to take some shortcuts.
- * Emtpy patterns trivially match every line except in the -w and -x
- * cases. For -w (whole-word) cases, we only match if the first
- * character isn't a word-character. For -x (whole-line) cases, we only
- * match if the line is empty.
- */
+ /* Null pattern shortcuts. */
if (matchall) {
- if (pc->ln.len == 0)
+ if (xflag && pc->ln.len == 0) {
+ /* Matches empty lines (-x). */
return (true);
- if (wflag) {
- wend = L' ';
- if (sscanf(&pc->ln.dat[0], "%lc", &wend) == 1 &&
- !iswword(wend))
- return (true);
- } else if (!xflag)
+ } else if (!wflag && !xflag) {
+ /* Matches every line (no -w or -x). */
return (true);
+ }
/*
- * If we don't have any other patterns, we really don't match.
- * If we do have other patterns, we must fall through and check
- * them.
+ * If we only have the NULL pattern, whether we match or not
+ * depends on if we got here with -w or -x. If either is set,
+ * the answer is no. If we have other patterns, we'll defer
+ * to them.
*/
- if (patterns == 0)
- return (false);
+ if (patterns == 0) {
+ return (!(wflag || xflag));
+ }
+ } else if (patterns == 0) {
+ /* Pattern file with no patterns. */
+ return (false);
}
matched = false;
More information about the dev-commits-src-all
mailing list