git: aaa924138a31 - main - Revert "killpg(): close a race with fork(), part 2"

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Wed, 26 Jul 2023 15:22:06 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=aaa924138a31078a1742029ee2d3489aaaa11299

commit aaa924138a31078a1742029ee2d3489aaaa11299
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2023-07-20 20:59:41 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2023-07-26 15:12:55 +0000

    Revert "killpg(): close a race with fork(), part 2"
    
    This reverts commits 81a37995c757b4e3ad8a5c699864197fd1ebdcf5 and
    565a343ae3a30bc2973182ff8dfd2fa37d7f615f.
    
    There is still a leakage of the p_killpg_cnt, some but not all sources
    of which were identified.
    
    Second, and more important, is that there is a fundamental issue with
    blocked signals having KSI_KILLPG flag set.  Queueing of such signal
    increments p_killpg_cnt, but it cannot be decremented until the signal
    is delivered.  If, for instance, a single-threaded process with blocked
    signal receives killpg-kill and executes fork(2), the fork enter check
    returns with ERESTART.  And since signal is blocked, the condition
    cannot be cleared.
    
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D41128
---
 sys/kern/kern_exit.c   | 13 +------------
 sys/kern/kern_fork.c   |  3 +--
 sys/kern/kern_sig.c    | 39 ++++++---------------------------------
 sys/kern/kern_thread.c |  6 +++---
 sys/sys/proc.h         |  2 --
 sys/sys/signalvar.h    |  3 +--
 6 files changed, 12 insertions(+), 54 deletions(-)

diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index a6f3ca2a2d66..a92d5cc0f642 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -220,19 +220,13 @@ proc_set_p2_wexit(struct proc *p)
 	p->p_flag2 |= P2_WEXIT;
 }
 
-void
-exit1(struct thread *td, int rval, int signo)
-{
-	exit2(td, rval, signo, false);
-}
-
 /*
  * Exit: deallocate address space and other resources, change proc state to
  * zombie, and unlink proc from allproc and parent's lists.  Save exit status
  * and rusage for wait().  Check for child processes and orphan them.
  */
 void
-exit2(struct thread *td, int rval, int signo, bool dec_killpg_cnt)
+exit1(struct thread *td, int rval, int signo)
 {
 	struct proc *p, *nq, *q, *t;
 	struct thread *tdt;
@@ -310,11 +304,6 @@ exit2(struct thread *td, int rval, int signo, bool dec_killpg_cnt)
 	    ("exit1: proc %p exiting with %d threads", p, p->p_numthreads));
 	racct_sub(p, RACCT_NTHR, 1);
 
-	if (dec_killpg_cnt) {
-		MPASS(atomic_load_int(&p->p_killpg_cnt) > 0);
-		atomic_add_int(&p->p_killpg_cnt, -1);
-	}
-
 	/* Let event handler change exit status */
 	p->p_xexit = rval;
 	p->p_xsig = signo;
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 180c96ae33ef..81bee99fa1ca 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -957,8 +957,7 @@ fork1(struct thread *td, struct fork_req *fr)
 	if (sx_slock_sig(&pg->pg_killsx) != 0) {
 		error = ERESTART;
 		goto fail2;
-	} else if (__predict_false(p1->p_pgrp != pg || sig_intr() != 0 ||
-	    atomic_load_int(&p1->p_killpg_cnt) != 0)) {
+	} else if (__predict_false(p1->p_pgrp != pg || sig_intr() != 0)) {
 		/*
 		 * Either the process was moved to other process
 		 * group, or there is pending signal.  sx_slock_sig()
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index b15ad12724f8..de42255017d8 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -120,7 +120,6 @@ static int	filt_signal(struct knote *kn, long hint);
 static struct thread *sigtd(struct proc *p, int sig, bool fast_sigblock);
 static void	sigqueue_start(void);
 static void	sigfastblock_setpend(struct thread *td, bool resched);
-static void	sigexit1(struct thread *td, int sig, ksiginfo_t *ksi) __dead2;
 
 static uma_zone_t	ksiginfo_zone = NULL;
 struct filterops sig_filtops = {
@@ -371,15 +370,6 @@ sigqueue_start(void)
 	    TDP_OLDMASK, ast_sigsuspend);
 }
 
-static void
-sig_handle_killpg(struct proc *p, ksiginfo_t *ksi)
-{
-	if ((ksi->ksi_flags & KSI_KILLPG) != 0 && p != NULL) {
-		MPASS(atomic_load_int(&p->p_killpg_cnt) > 0);
-		atomic_add_int(&p->p_killpg_cnt, -1);
-	}
-}
-
 ksiginfo_t *
 ksiginfo_alloc(int mwait)
 {
@@ -479,7 +469,6 @@ sigqueue_take(ksiginfo_t *ksi)
 	p = sq->sq_proc;
 	TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link);
 	ksi->ksi_sigq = NULL;
-	sig_handle_killpg(p, ksi);
 	if (!(ksi->ksi_flags & KSI_EXT) && p != NULL)
 		p->p_pendingcnt--;
 
@@ -577,7 +566,6 @@ sigqueue_flush(sigqueue_t *sq)
 	while ((ksi = TAILQ_FIRST(&sq->sq_list)) != NULL) {
 		TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link);
 		ksi->ksi_sigq = NULL;
-		sig_handle_killpg(p, ksi);
 		if (ksiginfo_tryfree(ksi) && p != NULL)
 			p->p_pendingcnt--;
 	}
@@ -653,7 +641,6 @@ sigqueue_delete_set(sigqueue_t *sq, const sigset_t *set)
 		if (SIGISMEMBER(*set, ksi->ksi_signo)) {
 			TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link);
 			ksi->ksi_sigq = NULL;
-			sig_handle_killpg(p, ksi);
 			if (ksiginfo_tryfree(ksi) && p != NULL)
 				p->p_pendingcnt--;
 		}
@@ -682,7 +669,7 @@ sigqueue_delete_set_proc(struct proc *p, const sigset_t *set)
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
-	sigqueue_init(&worklist, p);
+	sigqueue_init(&worklist, NULL);
 	sigqueue_move_set(&p->p_sigqueue, &worklist, set);
 
 	FOREACH_THREAD_IN_PROC(p, td0)
@@ -1470,7 +1457,7 @@ kern_sigtimedwait(struct thread *td, sigset_t waitset, ksiginfo_t *ksi,
 #endif
 		if (sig == SIGKILL) {
 			proc_td_siginfo_capture(td, &ksi->ksi_info);
-			sigexit1(td, sig, ksi);
+			sigexit(td, sig);
 		}
 	}
 	PROC_UNLOCK(p);
@@ -1948,10 +1935,8 @@ kern_kill(struct thread *td, pid_t pid, int signum)
 	case -1:		/* broadcast signal */
 		return (killpg1(td, signum, 0, 1, &ksi));
 	case 0:			/* signal own process group */
-		ksi.ksi_flags |= KSI_KILLPG;
 		return (killpg1(td, signum, 0, 0, &ksi));
 	default:		/* negative explicit process group */
-		ksi.ksi_flags |= KSI_KILLPG;
 		return (killpg1(td, signum, -pid, 0, &ksi));
 	}
 	/* NOTREACHED */
@@ -2002,7 +1987,6 @@ okillpg(struct thread *td, struct okillpg_args *uap)
 	ksi.ksi_code = SI_USER;
 	ksi.ksi_pid = td->td_proc->p_pid;
 	ksi.ksi_uid = td->td_ucred->cr_ruid;
-	ksi.ksi_flags |= KSI_KILLPG;
 	return (killpg1(td, uap->signum, uap->pgid, 0, &ksi));
 }
 #endif /* COMPAT_43 */
@@ -2371,10 +2355,6 @@ tdsendsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi)
 	ret = sigqueue_add(sigqueue, sig, ksi);
 	if (ret != 0)
 		return (ret);
-	if ((ksi->ksi_flags & KSI_KILLPG) != 0) {
-		sx_assert(&p->p_pgrp->pg_killsx, SX_XLOCKED);
-		atomic_add_int(&p->p_killpg_cnt, 1);
-	}
 	signotify(td);
 	/*
 	 * Defer further processing for signals which are held,
@@ -3425,7 +3405,7 @@ postsig(int sig)
 		 */
 		mtx_unlock(&ps->ps_mtx);
 		proc_td_siginfo_capture(td, &ksi.ksi_info);
-		sigexit1(td, sig, &ksi);
+		sigexit(td, sig);
 		/* NOTREACHED */
 	} else {
 		/*
@@ -3453,7 +3433,6 @@ postsig(int sig)
 		if (p->p_sig == sig) {
 			p->p_sig = 0;
 		}
-		sig_handle_killpg(p, &ksi);
 		(*p->p_sysent->sv_sendsig)(action, &ksi, &returnmask);
 		postsig_done(sig, td, ps);
 	}
@@ -3611,8 +3590,8 @@ killproc(struct proc *p, const char *why)
  * If dumping core, save the signal number for the debugger.  Calls exit and
  * does not return.
  */
-static void
-sigexit1(struct thread *td, int sig, ksiginfo_t *ksi)
+void
+sigexit(struct thread *td, int sig)
 {
 	struct proc *p = td->td_proc;
 
@@ -3651,16 +3630,10 @@ sigexit1(struct thread *td, int sig, ksiginfo_t *ksi)
 			    sig & WCOREFLAG ? " (core dumped)" : "");
 	} else
 		PROC_UNLOCK(p);
-	exit2(td, 0, sig, ksi != NULL && (ksi->ksi_flags & KSI_KILLPG) != 0);
+	exit1(td, 0, sig);
 	/* NOTREACHED */
 }
 
-void
-sigexit(struct thread *td, int sig)
-{
-	sigexit1(td, sig, NULL);
-}
-
 /*
  * Send queued SIGCHLD to parent when child process's state
  * is changed.
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index fad1abd1be6c..67712450c128 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -99,7 +99,7 @@ _Static_assert(offsetof(struct proc, p_pid) == 0xc4,
     "struct proc KBI p_pid");
 _Static_assert(offsetof(struct proc, p_filemon) == 0x3c8,
     "struct proc KBI p_filemon");
-_Static_assert(offsetof(struct proc, p_comm) == 0x3e4,
+_Static_assert(offsetof(struct proc, p_comm) == 0x3e0,
     "struct proc KBI p_comm");
 _Static_assert(offsetof(struct proc, p_emuldata) == 0x4d0,
     "struct proc KBI p_emuldata");
@@ -119,9 +119,9 @@ _Static_assert(offsetof(struct proc, p_pid) == 0x78,
     "struct proc KBI p_pid");
 _Static_assert(offsetof(struct proc, p_filemon) == 0x270,
     "struct proc KBI p_filemon");
-_Static_assert(offsetof(struct proc, p_comm) == 0x288,
+_Static_assert(offsetof(struct proc, p_comm) == 0x284,
     "struct proc KBI p_comm");
-_Static_assert(offsetof(struct proc, p_emuldata) == 0x31c,
+_Static_assert(offsetof(struct proc, p_emuldata) == 0x318,
     "struct proc KBI p_emuldata");
 #endif
 
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 5c77c2d683c1..d79b7a440168 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -722,7 +722,6 @@ struct proc {
 	int		p_pendingexits; /* (c) Count of pending thread exits. */
 	struct filemon	*p_filemon;	/* (c) filemon-specific data. */
 	int		p_pdeathsig;	/* (c) Signal from parent on exit. */
-	int		p_killpg_cnt;
 /* End area that is zeroed on creation. */
 #define	p_endzero	p_magic
 
@@ -1237,7 +1236,6 @@ void	userret(struct thread *, struct trapframe *);
 
 void	cpu_exit(struct thread *);
 void	exit1(struct thread *, int, int) __dead2;
-void	exit2(struct thread *, int, int, bool) __dead2;
 void	cpu_copy_thread(struct thread *td, struct thread *td0);
 bool	cpu_exec_vmspace_reuse(struct proc *p, struct vm_map *map);
 int	cpu_fetch_syscall_args(struct thread *td);
diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h
index 1db8813b6bf0..251775c37259 100644
--- a/sys/sys/signalvar.h
+++ b/sys/sys/signalvar.h
@@ -240,8 +240,7 @@ typedef struct ksiginfo {
 #define	KSI_SIGQ	0x08	/* Generated by sigqueue, might ret EAGAIN. */
 #define	KSI_HEAD	0x10	/* Insert into head, not tail. */
 #define	KSI_PTRACE	0x20	/* Generated by ptrace. */
-#define	KSI_KILLPG	0x40	/* killpg - update p_killpg_cnt */
-#define	KSI_COPYMASK	(KSI_TRAP | KSI_SIGQ | KSI_PTRACE | KSI_KILLPG)
+#define	KSI_COPYMASK	(KSI_TRAP | KSI_SIGQ | KSI_PTRACE)
 
 #define	KSI_ONQ(ksi)	((ksi)->ksi_sigq != NULL)