git: 81a37995c757 - main - killpg(): close a race with fork(), part 2
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 04 Jul 2023 03:44:11 UTC
The branch main has been updated by kib: URL: https://cgit.FreeBSD.org/src/commit/?id=81a37995c757b4e3ad8a5c699864197fd1ebdcf5 commit 81a37995c757b4e3ad8a5c699864197fd1ebdcf5 Author: Konstantin Belousov <kib@FreeBSD.org> AuthorDate: 2023-06-16 09:52:10 +0000 Commit: Konstantin Belousov <kib@FreeBSD.org> CommitDate: 2023-07-04 03:43:16 +0000 killpg(): close a race with fork(), part 2 When we are sending terminating signal to the group, killpg() needs to guarantee that all group members are to be terminated (it does not need to ensure that they are terminated on return from killpg()). The pg_killsx change eliminates the largest window there, but still, if a multithreaded process is signalled, the following could happen: - thread 1 is selected for the signal delivery and gets descheduled - thread 2 waits for pg_killsx lock, obtains it and forks - thread 1 continue executing and terminates the process This scenario allows the child to escape still. To fix it, count the number of signals sent to the process with killpg(2), in p_killpg_cnt variable, which is incremented in killpg() and decremented after signal handler frame is created or in exit1() after single-threading. This way we avoid forking if the termination is due. Noted and reviewed by: markj (previous version) Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 2 weeks Differential revision: https://reviews.freebsd.org/D40493 --- sys/kern/kern_exit.c | 13 ++++++++++++- sys/kern/kern_fork.c | 3 ++- sys/kern/kern_sig.c | 37 ++++++++++++++++++++++++++++++++----- sys/kern/kern_thread.c | 6 +++--- sys/sys/proc.h | 2 ++ sys/sys/signalvar.h | 3 ++- 6 files changed, 53 insertions(+), 11 deletions(-) diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index a92d5cc0f642..a6f3ca2a2d66 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -220,13 +220,19 @@ proc_set_p2_wexit(struct proc *p) p->p_flag2 |= P2_WEXIT; } +void +exit1(struct thread *td, int rval, int signo) +{ + exit2(td, rval, signo, false); +} + /* * Exit: deallocate address space and other resources, change proc state to * zombie, and unlink proc from allproc and parent's lists. Save exit status * and rusage for wait(). Check for child processes and orphan them. */ void -exit1(struct thread *td, int rval, int signo) +exit2(struct thread *td, int rval, int signo, bool dec_killpg_cnt) { struct proc *p, *nq, *q, *t; struct thread *tdt; @@ -304,6 +310,11 @@ exit1(struct thread *td, int rval, int signo) ("exit1: proc %p exiting with %d threads", p, p->p_numthreads)); racct_sub(p, RACCT_NTHR, 1); + if (dec_killpg_cnt) { + MPASS(atomic_load_int(&p->p_killpg_cnt) > 0); + atomic_add_int(&p->p_killpg_cnt, -1); + } + /* Let event handler change exit status */ p->p_xexit = rval; p->p_xsig = signo; diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 81bee99fa1ca..180c96ae33ef 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -957,7 +957,8 @@ fork1(struct thread *td, struct fork_req *fr) if (sx_slock_sig(&pg->pg_killsx) != 0) { error = ERESTART; goto fail2; - } else if (__predict_false(p1->p_pgrp != pg || sig_intr() != 0)) { + } else if (__predict_false(p1->p_pgrp != pg || sig_intr() != 0 || + atomic_load_int(&p1->p_killpg_cnt) != 0)) { /* * Either the process was moved to other process * group, or there is pending signal. sx_slock_sig() diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 948ef97c0715..18756d53e98c 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -121,6 +121,7 @@ static int filt_signal(struct knote *kn, long hint); static struct thread *sigtd(struct proc *p, int sig, bool fast_sigblock); static void sigqueue_start(void); static void sigfastblock_setpend(struct thread *td, bool resched); +static void sigexit1(struct thread *td, int sig, ksiginfo_t *ksi) __dead2; static uma_zone_t ksiginfo_zone = NULL; struct filterops sig_filtops = { @@ -371,6 +372,15 @@ sigqueue_start(void) TDP_OLDMASK, ast_sigsuspend); } +static void +sig_handle_killpg(struct proc *p, ksiginfo_t *ksi) +{ + if ((ksi->ksi_flags & KSI_KILLPG) != 0 && p != NULL) { + MPASS(atomic_load_int(&p->p_killpg_cnt) > 0); + atomic_add_int(&p->p_killpg_cnt, -1); + } +} + ksiginfo_t * ksiginfo_alloc(int mwait) { @@ -470,6 +480,7 @@ sigqueue_take(ksiginfo_t *ksi) p = sq->sq_proc; TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link); ksi->ksi_sigq = NULL; + sig_handle_killpg(p, ksi); if (!(ksi->ksi_flags & KSI_EXT) && p != NULL) p->p_pendingcnt--; @@ -567,6 +578,7 @@ sigqueue_flush(sigqueue_t *sq) while ((ksi = TAILQ_FIRST(&sq->sq_list)) != NULL) { TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link); ksi->ksi_sigq = NULL; + sig_handle_killpg(p, ksi); if (ksiginfo_tryfree(ksi) && p != NULL) p->p_pendingcnt--; } @@ -642,6 +654,7 @@ sigqueue_delete_set(sigqueue_t *sq, const sigset_t *set) if (SIGISMEMBER(*set, ksi->ksi_signo)) { TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link); ksi->ksi_sigq = NULL; + sig_handle_killpg(p, ksi); if (ksiginfo_tryfree(ksi) && p != NULL) p->p_pendingcnt--; } @@ -1458,7 +1471,7 @@ kern_sigtimedwait(struct thread *td, sigset_t waitset, ksiginfo_t *ksi, #endif if (sig == SIGKILL) { proc_td_siginfo_capture(td, &ksi->ksi_info); - sigexit(td, sig); + sigexit1(td, sig, ksi); } } PROC_UNLOCK(p); @@ -1936,8 +1949,10 @@ kern_kill(struct thread *td, pid_t pid, int signum) case -1: /* broadcast signal */ return (killpg1(td, signum, 0, 1, &ksi)); case 0: /* signal own process group */ + ksi.ksi_flags |= KSI_KILLPG; return (killpg1(td, signum, 0, 0, &ksi)); default: /* negative explicit process group */ + ksi.ksi_flags |= KSI_KILLPG; return (killpg1(td, signum, -pid, 0, &ksi)); } /* NOTREACHED */ @@ -1988,6 +2003,7 @@ okillpg(struct thread *td, struct okillpg_args *uap) ksi.ksi_code = SI_USER; ksi.ksi_pid = td->td_proc->p_pid; ksi.ksi_uid = td->td_ucred->cr_ruid; + ksi.ksi_flags |= KSI_KILLPG; return (killpg1(td, uap->signum, uap->pgid, 0, &ksi)); } #endif /* COMPAT_43 */ @@ -2375,6 +2391,10 @@ tdsendsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi) ret = sigqueue_add(sigqueue, sig, ksi); if (ret != 0) return (ret); + if ((ksi->ksi_flags & KSI_KILLPG) != 0) { + sx_assert(&p->p_pgrp->pg_killsx, SX_XLOCKED); + atomic_add_int(&p->p_killpg_cnt, 1); + } signotify(td); /* * Defer further processing for signals which are held, @@ -3425,7 +3445,7 @@ postsig(int sig) */ mtx_unlock(&ps->ps_mtx); proc_td_siginfo_capture(td, &ksi.ksi_info); - sigexit(td, sig); + sigexit1(td, sig, &ksi); /* NOTREACHED */ } else { /* @@ -3453,6 +3473,7 @@ postsig(int sig) if (p->p_sig == sig) { p->p_sig = 0; } + sig_handle_killpg(p, &ksi); (*p->p_sysent->sv_sendsig)(action, &ksi, &returnmask); postsig_done(sig, td, ps); } @@ -3610,8 +3631,8 @@ killproc(struct proc *p, const char *why) * If dumping core, save the signal number for the debugger. Calls exit and * does not return. */ -void -sigexit(struct thread *td, int sig) +static void +sigexit1(struct thread *td, int sig, ksiginfo_t *ksi) { struct proc *p = td->td_proc; @@ -3650,10 +3671,16 @@ sigexit(struct thread *td, int sig) sig & WCOREFLAG ? " (core dumped)" : ""); } else PROC_UNLOCK(p); - exit1(td, 0, sig); + exit2(td, 0, sig, ksi != NULL && (ksi->ksi_flags & KSI_KILLPG) != 0); /* NOTREACHED */ } +void +sigexit(struct thread *td, int sig) +{ + sigexit1(td, sig, NULL); +} + /* * Send queued SIGCHLD to parent when child process's state * is changed. diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index 67712450c128..fad1abd1be6c 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -99,7 +99,7 @@ _Static_assert(offsetof(struct proc, p_pid) == 0xc4, "struct proc KBI p_pid"); _Static_assert(offsetof(struct proc, p_filemon) == 0x3c8, "struct proc KBI p_filemon"); -_Static_assert(offsetof(struct proc, p_comm) == 0x3e0, +_Static_assert(offsetof(struct proc, p_comm) == 0x3e4, "struct proc KBI p_comm"); _Static_assert(offsetof(struct proc, p_emuldata) == 0x4d0, "struct proc KBI p_emuldata"); @@ -119,9 +119,9 @@ _Static_assert(offsetof(struct proc, p_pid) == 0x78, "struct proc KBI p_pid"); _Static_assert(offsetof(struct proc, p_filemon) == 0x270, "struct proc KBI p_filemon"); -_Static_assert(offsetof(struct proc, p_comm) == 0x284, +_Static_assert(offsetof(struct proc, p_comm) == 0x288, "struct proc KBI p_comm"); -_Static_assert(offsetof(struct proc, p_emuldata) == 0x318, +_Static_assert(offsetof(struct proc, p_emuldata) == 0x31c, "struct proc KBI p_emuldata"); #endif diff --git a/sys/sys/proc.h b/sys/sys/proc.h index d79b7a440168..5c77c2d683c1 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -722,6 +722,7 @@ struct proc { int p_pendingexits; /* (c) Count of pending thread exits. */ struct filemon *p_filemon; /* (c) filemon-specific data. */ int p_pdeathsig; /* (c) Signal from parent on exit. */ + int p_killpg_cnt; /* End area that is zeroed on creation. */ #define p_endzero p_magic @@ -1236,6 +1237,7 @@ void userret(struct thread *, struct trapframe *); void cpu_exit(struct thread *); void exit1(struct thread *, int, int) __dead2; +void exit2(struct thread *, int, int, bool) __dead2; void cpu_copy_thread(struct thread *td, struct thread *td0); bool cpu_exec_vmspace_reuse(struct proc *p, struct vm_map *map); int cpu_fetch_syscall_args(struct thread *td); diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h index 59b26105effd..611eb11a8629 100644 --- a/sys/sys/signalvar.h +++ b/sys/sys/signalvar.h @@ -240,7 +240,8 @@ typedef struct ksiginfo { #define KSI_SIGQ 0x08 /* Generated by sigqueue, might ret EAGAIN. */ #define KSI_HEAD 0x10 /* Insert into head, not tail. */ #define KSI_PTRACE 0x20 /* Generated by ptrace. */ -#define KSI_COPYMASK (KSI_TRAP | KSI_SIGQ | KSI_PTRACE) +#define KSI_KILLPG 0x40 /* killpg - update p_killpg_cnt */ +#define KSI_COPYMASK (KSI_TRAP | KSI_SIGQ | KSI_PTRACE | KSI_KILLPG) #define KSI_ONQ(ksi) ((ksi)->ksi_sigq != NULL)