git: 3610bffd2888 - main - pkill: use an ARG_MAX size buffer for argument matching

From: Kyle Evans <kevans_at_FreeBSD.org>
Date: Mon, 20 Mar 2023 19:20:15 UTC
The branch main has been updated by kevans:

URL: https://cgit.FreeBSD.org/src/commit/?id=3610bffd2888b65389a46e8d075ce8e1fc83af4c

commit 3610bffd2888b65389a46e8d075ce8e1fc83af4c
Author:     Kyle Evans <kevans@FreeBSD.org>
AuthorDate: 2023-03-20 19:19:35 +0000
Commit:     Kyle Evans <kevans@FreeBSD.org>
CommitDate: 2023-03-20 19:19:36 +0000

    pkill: use an ARG_MAX size buffer for argument matching
    
    Right now pkill/pgrep cut off at _POSIX2_LINE_MAX (2048), but argument
    strings can be much larger (ARG_MAX is 256K/512K).  Stop arbitrarily
    cutting the search off at 2K, rather than documenting the limit.
    
    Reviewed by:    allanjude (earlier version), des
    Sponsored by:   Klara, Inc.
    Differential Revision:  https://reviews.freebsd.org/D38663
---
 bin/pkill/pkill.c               |  38 +++++++++++--
 bin/pkill/tests/Makefile        |   6 ++
 bin/pkill/tests/pgrep-f_test.sh |  58 +++++++++++++++++++
 bin/pkill/tests/spin_helper.c   | 123 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 220 insertions(+), 5 deletions(-)

diff --git a/bin/pkill/pkill.c b/bin/pkill/pkill.c
index 0ffa5ababaac..6d8f29147a04 100644
--- a/bin/pkill/pkill.c
+++ b/bin/pkill/pkill.c
@@ -133,8 +133,9 @@ static int	takepid(const char *, int);
 int
 main(int argc, char **argv)
 {
-	char buf[_POSIX2_LINE_MAX], *mstr, **pargv, *p, *q, *pidfile;
+	char *buf, *mstr, **pargv, *p, *q, *pidfile;
 	const char *execf, *coref;
+	size_t bufsz;
 	int ancestors, debug_opt, did_action;
 	int i, ch, bestidx, rv, criteria, pidfromfile, pidfilelock;
 	size_t jsz;
@@ -177,6 +178,7 @@ main(int argc, char **argv)
 		}
 	}
 
+	buf = NULL;
 	ancestors = 0;
 	criteria = 0;
 	debug_opt = 0;
@@ -311,6 +313,31 @@ main(int argc, char **argv)
 
 	mypid = getpid();
 
+	/*
+	 * If we're not matching args, we only need a buffer large enough to
+	 * hold some relatively short error strings.  Otherwise, we have to
+	 * assume we'll need up to ARG_MAX bytes for arguments.
+	 */
+	bufsz = _POSIX2_LINE_MAX;
+	if (matchargs) {
+		long arg_max;
+
+		arg_max = sysconf(_SC_ARG_MAX);
+		if (arg_max == -1)
+			arg_max = ARG_MAX;
+
+		/*
+		 * The absolute worst case scenario is ARG_MAX single-byte
+		 * arguments which we'll then separate with spaces and NUL
+		 * terminate.
+		 */
+		bufsz = (arg_max * 2) + 1;
+	}
+
+	buf = malloc(bufsz);
+	if (buf == NULL)
+		err(STATUS_ERROR, "malloc");
+
 	/*
 	 * Retrieve the list of running processes from the kernel.
 	 */
@@ -346,7 +373,7 @@ main(int argc, char **argv)
 	 */
 	for (; *argv != NULL; argv++) {
 		if ((rv = regcomp(&reg, *argv, cflags)) != 0) {
-			regerror(rv, &reg, buf, sizeof(buf));
+			regerror(rv, &reg, buf, bufsz);
 			errx(STATUS_BADUSAGE,
 			    "Cannot compile regular expression `%s' (%s)",
 			    *argv, buf);
@@ -363,9 +390,9 @@ main(int argc, char **argv)
 			if (matchargs &&
 			    (pargv = kvm_getargv(kd, kp, 0)) != NULL) {
 				jsz = 0;
-				while (jsz < sizeof(buf) && *pargv != NULL) {
+				while (jsz < bufsz && *pargv != NULL) {
 					jsz += snprintf(buf + jsz,
-					    sizeof(buf) - jsz,
+					    bufsz - jsz,
 					    pargv[1] != NULL ? "%s " : "%s",
 					    pargv[0]);
 					pargv++;
@@ -384,7 +411,7 @@ main(int argc, char **argv)
 				} else
 					selected[i] = 1;
 			} else if (rv != REG_NOMATCH) {
-				regerror(rv, &reg, buf, sizeof(buf));
+				regerror(rv, &reg, buf, bufsz);
 				errx(STATUS_ERROR,
 				    "Regular expression evaluation error (%s)",
 				    buf);
@@ -576,6 +603,7 @@ main(int argc, char **argv)
 		fprintf(stderr,
 		    "No matching processes belonging to you were found\n");
 
+	free(buf);
 	exit(rv ? STATUS_MATCH : STATUS_NOMATCH);
 }
 
diff --git a/bin/pkill/tests/Makefile b/bin/pkill/tests/Makefile
index be467074651f..9bf448bb2460 100644
--- a/bin/pkill/tests/Makefile
+++ b/bin/pkill/tests/Makefile
@@ -2,6 +2,11 @@
 
 .include <bsd.own.mk>
 
+PACKAGE=	tests
+
+PROGS+=		spin_helper
+BINDIR=		${TESTSDIR}
+
 TAP_TESTS_SH=			pgrep-F_test
 TAP_TESTS_SH+=			pgrep-LF_test
 TAP_TESTS_SH+=			pgrep-P_test
@@ -10,6 +15,7 @@ TAP_TESTS_SH+=			pgrep-_g_test
 TAP_TESTS_SH+=			pgrep-_s_test
 TAP_TESTS_SH+=			pgrep-g_test
 TAP_TESTS_SH+=			pgrep-i_test
+TAP_TESTS_SH+=			pgrep-f_test
 TAP_TESTS_SH+=			pgrep-j_test
 TEST_METADATA.pgrep-j_test+=	required_user="root"
 TEST_METADATA.pgrep-j_test+=	required_programs="jail jls"
diff --git a/bin/pkill/tests/pgrep-f_test.sh b/bin/pkill/tests/pgrep-f_test.sh
new file mode 100644
index 000000000000..85b1878b97a6
--- /dev/null
+++ b/bin/pkill/tests/pgrep-f_test.sh
@@ -0,0 +1,58 @@
+#!/bin/sh
+# $FreeBSD$
+
+: ${ARG_MAX:=524288}
+base=$(dirname $(realpath "$0"))
+
+echo "1..2"
+
+waitfor() {
+	flagfile=$1
+
+	iter=0
+
+	while [ ! -f ${flagfile} ] && [ ${iter} -lt 50 ]; do
+		sleep 0.10
+		iter=$((iter + 1))
+	done
+
+	if [ ! -f ${flagfile} ]; then
+		return 1
+	fi
+}
+
+sentinel="findme=test-$$"
+sentinelsz=$(printf "${sentinel}" | wc -c | tr -d '[[:space:]]')
+name="pgrep -f"
+spin="${base}/spin_helper"
+flagfile="pgrep_f_short.flag"
+
+${spin} --short ${flagfile} ${sentinel} &
+chpid=$!
+if ! waitfor ${flagfile}; then
+	echo "not ok - $name"
+else
+	pid=$(pgrep -f ${sentinel})
+	if [ "$pid" = "$chpid" ]; then
+		echo "ok - $name"
+	else
+		echo "not ok - $name"
+	fi
+fi
+kill $chpid
+
+name="pgrep -f long args"
+flagfile="pgrep_f_long.flag"
+${spin} --long ${flagfile} ${sentinel} &
+chpid=$!
+if ! waitfor ${flagfile}; then
+	echo "not ok - $name"
+else
+	pid=$(pgrep -f ${sentinel})
+	if [ "$pid" = "$chpid" ]; then
+		echo "ok - $name"
+	else
+		echo "not ok - $name"
+	fi
+fi
+kill $chpid
diff --git a/bin/pkill/tests/spin_helper.c b/bin/pkill/tests/spin_helper.c
new file mode 100644
index 000000000000..10541ad12516
--- /dev/null
+++ b/bin/pkill/tests/spin_helper.c
@@ -0,0 +1,123 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+
+static int
+exec_shortargs(char *argv[])
+{
+	char *flag_arg = argv[2];
+	char *sentinel = argv[3];
+	char * nargv[] = { argv[0], __DECONST(char *, "--spin"), flag_arg,
+	    sentinel, NULL };
+	char * const nenvp[] = { NULL };
+
+	execve(argv[0], nargv, nenvp);
+	err(1, "execve");
+}
+
+static int
+exec_largeargs(char *argv[])
+{
+	char *flag_arg = argv[2];
+	char *sentinel = argv[3];
+	/*
+	 * Account for each argument and their NUL terminator, as well as an
+	 * extra NUL terminator.
+	 */
+	size_t bufsz = ARG_MAX -
+	    ((strlen(argv[0]) + 1) + sizeof("--spin") + (strlen(flag_arg) + 1) +
+	    (strlen(sentinel) + 1) + 1);
+	char *s = NULL;
+	char * nargv[] = { argv[0], __DECONST(char *, "--spin"), flag_arg, NULL,
+	    sentinel, NULL };
+	char * const nenvp[] = { NULL };
+
+	/*
+	 * Our heuristic may or may not be accurate, we'll keep trying with
+	 * smaller argument sizes as needed until we stop getting E2BIG.
+	 */
+	do {
+		if (s == NULL)
+			s = malloc(bufsz + 1);
+		else
+			s = realloc(s, bufsz + 1);
+		if (s == NULL)
+			abort();
+		memset(s, 'x', bufsz);
+		s[bufsz] = '\0';
+		nargv[3] = s;
+
+		execve(argv[0], nargv, nenvp);
+		bufsz--;
+	} while (errno == E2BIG);
+	err(1, "execve");
+}
+
+int
+main(int argc, char *argv[])
+{
+
+	if (argc > 1 && strcmp(argv[1], "--spin") == 0) {
+		int fd;
+
+		if (argc < 4) {
+			fprintf(stderr, "usage: %s --spin flagfile ...\n", argv[0]);
+			return (1);
+		}
+
+		fd = open(argv[2], O_RDWR | O_CREAT, 0755);
+		if (fd < 0)
+			err(1, "%s", argv[2]);
+		close(fd);
+
+		for (;;) {
+			sleep(1);
+		}
+
+		return (1);
+	}
+
+	if (argc != 4) {
+		fprintf(stderr, "usage: %s [--short | --long] flagfile sentinel\n",
+		    argv[0]);
+		return (1);
+	}
+
+	if (strcmp(argv[1], "--short") == 0)
+		exec_shortargs(argv);
+	else
+		exec_largeargs(argv);
+}