git: 96064e0924d8 - releng/13.2 - amd64: Avoid copying td_frame from kernel procs

From: Ed Maste <emaste_at_FreeBSD.org>
Date: Tue, 07 Mar 2023 16:37:45 UTC
The branch releng/13.2 has been updated by emaste:

URL: https://cgit.FreeBSD.org/src/commit/?id=96064e0924d8e456f50252205bf3b221cb2e835c

commit 96064e0924d8e456f50252205bf3b221cb2e835c
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2021-09-25 14:15:31 +0000
Commit:     Ed Maste <emaste@FreeBSD.org>
CommitDate: 2023-03-07 16:36:02 +0000

    amd64: Avoid copying td_frame from kernel procs
    
    When creating a new thread, we unconditionally copy td_frame from the
    creating thread.  For threads which never return to user mode, this is
    unnecessary since td_frame just points to the base of the stack or a
    random interrupt frame.
    
    If KASAN is configured this copying may also trigger false positives
    since the td_frame region may contain poisoned stack regions.  It was
    not noticed before since thread0 used a dummy proc0_tf trapframe, and
    kernel procs are generally created by thread0.  Since commit
    df8dd6025af88a99d34f549fa9591a9b8f9b75b1, though, we call
    cpu_thread_alloc(&thread0) when initializing FPU state, which
    reinitializes thread0.td_frame.
    
    Work around the problem by not copying the frame unless the copying
    thread came from user mode.  While here, de-duplicate the copying and
    remove redundant re(initialization) of td_frame.
    
    Reported by:    syzbot+2ec89312bffbf38d9aec@syzkaller.appspotmail.com
    Reviewed by:    kib
    Approved by:    re (delphij)
    Fixes:          df8dd6025af8
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D32057
    
    (cherry picked from commit ca1e447b1048b26b855d7f7fbcdad78309e4d741)
    (cherry picked from commit cf25fa7f31e7bda9b8aa20757659524db9c4ef97)
---
 sys/amd64/amd64/vm_machdep.c | 48 +++++++++++++++++---------------------------
 1 file changed, 18 insertions(+), 30 deletions(-)

diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index e308f4a44b3e..c9c498180c7e 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -193,6 +193,24 @@ copy_thread(struct thread *td1, struct thread *td2)
 	td2->td_md.md_spinlock_count = 1;
 	td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
 	pmap_thread_init_invl_gen(td2);
+
+	/*
+	 * Copy the trap frame for the return to user mode as if from a syscall.
+	 * This copies most of the user mode register values.  Some of these
+	 * registers are rewritten by cpu_set_upcall() and linux_set_upcall().
+	 */
+	if ((td1->td_proc->p_flag & P_KPROC) == 0) {
+		bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
+
+		/*
+		 * If the current thread has the trap bit set (i.e. a debugger
+		 * had single stepped the process to the system call), we need
+		 * to clear the trap flag from the new frame. Otherwise, the new
+		 * thread will receive a (likely unexpected) SIGTRAP when it
+		 * executes the first instruction after returning to userland.
+		 */
+		td2->td_frame->tf_rflags &= ~PSL_T;
+	}
 }
 
 /*
@@ -236,23 +254,9 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
 	mdp2 = &p2->p_md;
 	bcopy(&p1->p_md, mdp2, sizeof(*mdp2));
 
-	/*
-	 * Copy the trap frame for the return to user mode as if from a
-	 * syscall.  This copies most of the user mode register values.
-	 */
-	td2->td_frame = (struct trapframe *)td2->td_md.md_stack_base - 1;
-	bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
-
 	/* Set child return values. */
 	p2->p_sysent->sv_set_fork_retval(td2);
 
-	/*
-	 * If the parent process has the trap bit set (i.e. a debugger
-	 * had single stepped the process to the system call), we need
-	 * to clear the trap flag from the new frame.
-	 */
-	td2->td_frame->tf_rflags &= ~PSL_T;
-
 	/* As on i386, do not copy io permission bitmap. */
 	pcb2->pcb_tssp = NULL;
 
@@ -602,22 +606,6 @@ cpu_copy_thread(struct thread *td, struct thread *td0)
 {
 	copy_thread(td0, td);
 
-	/*
-	 * Copy user general-purpose registers.
-	 *
-	 * Some of these registers are rewritten by cpu_set_upcall()
-	 * and linux_set_upcall().
-	 */
-	bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
-
-	/* If the current thread has the trap bit set (i.e. a debugger had
-	 * single stepped the process to the system call), we need to clear
-	 * the trap flag from the new frame. Otherwise, the new thread will
-	 * receive a (likely unexpected) SIGTRAP when it executes the first
-	 * instruction after returning to userland.
-	 */
-	td->td_frame->tf_rflags &= ~PSL_T;
-
 	set_pcb_flags_raw(td->td_pcb, PCB_FULL_IRET);
 }