git: 232b922cb363 - main - killpg(): close a race with fork(), part 2

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Wed, 26 Jul 2023 15:22:08 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=232b922cb363e01ac0dd2a277d93cf74d8485e79

commit 232b922cb363e01ac0dd2a277d93cf74d8485e79
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2023-07-20 21:19:03 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2023-07-26 15:13:02 +0000

    killpg(): close a race with fork(), part 2
    
    When we are sending terminating signal to the group, killpg() needs
    to guarantee that all group members are to be terminated (it does not
    need to ensure that they are terminated on return from killpg()).  The
    pg_killsx change eliminates the largest window there, but still, if
    a multithreaded process is signalled, the following could happen:
    - thread 1 is selected for the signal delivery and gets descheduled
    - thread 2 waits for pg_killsx lock, obtains it and forks
    - thread 1 continue executing and terminates the process
    This scenario allows the child to escape still.
    
    Fix it by single-threading forking parent if a conflict with pg_killsx
    is noted.  We try to lock pg_killsx without sleeping, and failure to
    acquire it means that a parallel killpg(2) is executed.  Then, stop
    other threads for running and in particular, receive signals, to avoid
    the situation explained above.
    
    Reviewed by:    markj
    Tested by:      pho
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D41128
---
 sys/kern/kern_fork.c | 47 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 8 deletions(-)

diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 81bee99fa1ca..e64a91ea5f80 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -862,7 +862,7 @@ fork1(struct thread *td, struct fork_req *fr)
 	static int curfail;
 	static struct timeval lastfail;
 	int flags, pages;
-	bool killsx_locked;
+	bool killsx_locked, singlethreaded;
 
 	flags = fr->fr_flags;
 	pages = fr->fr_pages;
@@ -920,6 +920,7 @@ fork1(struct thread *td, struct fork_req *fr)
 	newproc = NULL;
 	vm2 = NULL;
 	killsx_locked = false;
+	singlethreaded = false;
 
 	/*
 	 * Increment the nprocs resource before allocations occur.
@@ -950,14 +951,37 @@ fork1(struct thread *td, struct fork_req *fr)
 	}
 
 	/*
-	 * Atomically check for signals and block threads from sending
-	 * a signal to our process group until the child is visible.
+	 * If we are possibly multi-threaded, and there is a process
+	 * sending a signal to our group right now, ensure that our
+	 * other threads cannot be chosen for the signal queueing.
+	 * Otherwise, this might delay signal action, and make the new
+	 * child escape the signaling.
 	 */
 	pg = p1->p_pgrp;
-	if (sx_slock_sig(&pg->pg_killsx) != 0) {
+	if (p1->p_numthreads > 1) {
+		if (sx_try_slock(&pg->pg_killsx) != 0) {
+			killsx_locked = true;
+		} else {
+			PROC_LOCK(p1);
+			if (thread_single(p1, SINGLE_BOUNDARY)) {
+				PROC_UNLOCK(p1);
+				error = ERESTART;
+				goto fail2;
+			}
+			PROC_UNLOCK(p1);
+			singlethreaded = true;
+		}
+	}
+
+	/*
+	 * Atomically check for signals and block processes from sending
+	 * a signal to our process group until the child is visible.
+	 */
+	if (!killsx_locked && sx_slock_sig(&pg->pg_killsx) != 0) {
 		error = ERESTART;
 		goto fail2;
-	} else if (__predict_false(p1->p_pgrp != pg || sig_intr() != 0)) {
+	}
+	if (__predict_false(p1->p_pgrp != pg || sig_intr() != 0)) {
 		/*
 		 * Either the process was moved to other process
 		 * group, or there is pending signal.  sx_slock_sig()
@@ -1062,8 +1086,8 @@ fork1(struct thread *td, struct fork_req *fr)
 	}
 
 	do_fork(td, fr, newproc, td2, vm2, fp_procdesc);
-	sx_sunlock(&pg->pg_killsx);
-	return (0);
+	error = 0;
+	goto cleanup;
 fail0:
 	error = EAGAIN;
 #ifdef MAC
@@ -1081,9 +1105,16 @@ fail2:
 		fdrop(fp_procdesc, td);
 	}
 	atomic_add_int(&nprocs, -1);
+cleanup:
 	if (killsx_locked)
 		sx_sunlock(&pg->pg_killsx);
-	pause("fork", hz / 2);
+	if (singlethreaded) {
+		PROC_LOCK(p1);
+		thread_single_end(p1, SINGLE_BOUNDARY);
+		PROC_UNLOCK(p1);
+	}
+	if (error != 0)
+		pause("fork", hz / 2);
 	return (error);
 }