From nobody Sun Oct 10 09:24:09 2021 X-Original-To: dev-commits-src-all@mlmmj.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mlmmj.nyi.freebsd.org (Postfix) with ESMTP id 84D5212DECD8; Sun, 10 Oct 2021 09:24:10 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4HRxMF60kyz3Mrc; Sun, 10 Oct 2021 09:24:09 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 2D7E9304D; Sun, 10 Oct 2021 09:24:09 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.16.1/8.16.1) with ESMTP id 19A9O9I8003308; Sun, 10 Oct 2021 09:24:09 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.16.1/8.16.1/Submit) id 19A9O9PX003307; Sun, 10 Oct 2021 09:24:09 GMT (envelope-from git) Date: Sun, 10 Oct 2021 09:24:09 GMT Message-Id: <202110100924.19A9O9PX003307@gitrepo.freebsd.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org From: Konstantin Belousov Subject: git: 4d0d7ea9e752 - stable/13 - amd64: stop using top of the thread' kernel stack for FPU user save area List-Id: Commit messages for all branches of the src repository List-Archive: https://lists.freebsd.org/archives/dev-commits-src-all List-Help: List-Post: List-Subscribe: List-Unsubscribe: Sender: owner-dev-commits-src-all@freebsd.org X-BeenThere: dev-commits-src-all@freebsd.org MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: kib X-Git-Repository: src X-Git-Refname: refs/heads/stable/13 X-Git-Reftype: branch X-Git-Commit: 4d0d7ea9e752d7fce2a2f46e4c6c02951aaaf504 Auto-Submitted: auto-generated X-ThisMailContainsUnwantedMimeParts: N The branch stable/13 has been updated by kib: URL: https://cgit.FreeBSD.org/src/commit/?id=4d0d7ea9e752d7fce2a2f46e4c6c02951aaaf504 commit 4d0d7ea9e752d7fce2a2f46e4c6c02951aaaf504 Author: Konstantin Belousov AuthorDate: 2021-09-13 21:05:47 +0000 Commit: Konstantin Belousov CommitDate: 2021-10-10 09:21:17 +0000 amd64: stop using top of the thread' kernel stack for FPU user save area MFC note: this commit changes layout of td_md for amd64, resulting in static checks for struct thread ABI in kern_thread.c to fail. Next two commits restore the layout, I decided to not overcomplicate the merge and not do the work that is going to be overwritten immediately. (cherry picked from commit df8dd6025af88a99d34f549fa9591a9b8f9b75b1) --- sys/amd64/amd64/exec_machdep.c | 4 ++-- sys/amd64/amd64/fpu.c | 2 ++ sys/amd64/amd64/machdep.c | 14 -------------- sys/amd64/amd64/vm_machdep.c | 22 +++++++++++++--------- sys/amd64/ia32/ia32_signal.c | 6 +++--- sys/amd64/include/proc.h | 2 ++ sys/kern/kern_thread.c | 2 +- 7 files changed, 23 insertions(+), 29 deletions(-) diff --git a/sys/amd64/amd64/exec_machdep.c b/sys/amd64/amd64/exec_machdep.c index 426bb08edd7c..e85ebe741353 100644 --- a/sys/amd64/amd64/exec_machdep.c +++ b/sys/amd64/amd64/exec_machdep.c @@ -135,7 +135,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) { xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu); - xfpusave = __builtin_alloca(xfpusave_len); + xfpusave = (char *)td->td_md.md_fpu_scratch; } else { xfpusave_len = 0; xfpusave = NULL; @@ -674,7 +674,7 @@ set_mcontext(struct thread *td, mcontext_t *mcp) if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - sizeof(struct savefpu)) return (EINVAL); - xfpustate = __builtin_alloca(mcp->mc_xfpustate_len); + xfpustate = (char *)td->td_md.md_fpu_scratch; ret = copyin((void *)mcp->mc_xfpustate, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index d7936b3b1922..24986958d4ca 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -448,6 +448,8 @@ fpuinitstate(void *arg __unused) xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO); } + cpu_thread_alloc(&thread0); + saveintr = intr_disable(); stop_emulating(); diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index db1b88faeabb..512fee0de7df 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -1266,7 +1266,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) caddr_t kmdp; int gsel_tss, x; struct pcpu *pc; - struct xstate_hdr *xhdr; uint64_t cr3, rsp0; pml4_entry_t *pml4e; pdp_entry_t *pdpe; @@ -1572,19 +1571,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) msgbufinit(msgbufp, msgbufsize); fpuinit(); - /* - * Reinitialize thread0's stack base now that the xsave area size is - * known. Set up thread0's pcb save area after fpuinit calculated fpu - * save area size. Zero out the extended state header in fpu save area. - */ - set_top_of_stack_td(&thread0); - thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0); - bzero(thread0.td_pcb->pcb_save, cpu_max_ext_state_size); - if (use_xsave) { - xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) + - 1); - xhdr->xstate_bv = xsave_mask; - } /* make an initial tss so cpu can get interrupt stack on syscall! */ rsp0 = thread0.td_md.md_stack_base; /* Ensure the stack is aligned to 16 bytes */ diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 7d65269410e0..5c3cd638c92d 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -90,19 +90,17 @@ void set_top_of_stack_td(struct thread *td) { td->td_md.md_stack_base = td->td_kstack + - td->td_kstack_pages * PAGE_SIZE - - roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN); + td->td_kstack_pages * PAGE_SIZE; } struct savefpu * get_pcb_user_save_td(struct thread *td) { - vm_offset_t p; - - p = td->td_md.md_stack_base; - KASSERT((p % XSAVE_AREA_ALIGN) == 0, - ("Unaligned pcb_user_save area ptr %#lx td %p", p, td)); - return ((struct savefpu *)p); + KASSERT(((vm_offset_t)td->td_md.md_usr_fpu_save % + XSAVE_AREA_ALIGN) == 0, + ("Unaligned pcb_user_save area ptr %p td %p", + td->td_md.md_usr_fpu_save, td)); + return (td->td_md.md_usr_fpu_save); } struct pcb * @@ -384,6 +382,8 @@ cpu_thread_alloc(struct thread *td) set_top_of_stack_td(td); td->td_pcb = pcb = get_pcb_td(td); td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1; + td->td_md.md_usr_fpu_save = fpu_save_area_alloc(); + td->td_md.md_fpu_scratch = fpu_save_area_alloc(); pcb->pcb_save = get_pcb_user_save_pcb(pcb); if (use_xsave) { xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1); @@ -395,8 +395,12 @@ cpu_thread_alloc(struct thread *td) void cpu_thread_free(struct thread *td) { - cpu_thread_clean(td); + + fpu_save_area_free(td->td_md.md_usr_fpu_save); + td->td_md.md_usr_fpu_save = NULL; + fpu_save_area_free(td->td_md.md_fpu_scratch); + td->td_md.md_fpu_scratch = NULL; } bool diff --git a/sys/amd64/ia32/ia32_signal.c b/sys/amd64/ia32/ia32_signal.c index 49b5797d68fd..9b67c7001a87 100644 --- a/sys/amd64/ia32/ia32_signal.c +++ b/sys/amd64/ia32/ia32_signal.c @@ -210,7 +210,7 @@ ia32_set_mcontext(struct thread *td, struct ia32_mcontext *mcp) if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - sizeof(struct savefpu)) return (EINVAL); - xfpustate = __builtin_alloca(mcp->mc_xfpustate_len); + xfpustate = (char *)td->td_md.md_fpu_scratch; ret = copyin(PTRIN(mcp->mc_xfpustate), xfpustate, mcp->mc_xfpustate_len); if (ret != 0) @@ -579,7 +579,7 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) { xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu); - xfpusave = __builtin_alloca(xfpusave_len); + xfpusave = (char *)td->td_md.md_fpu_scratch; } else { xfpusave_len = 0; xfpusave = NULL; @@ -882,7 +882,7 @@ freebsd32_sigreturn(td, uap) td->td_proc->p_pid, td->td_name, xfpustate_len); return (EINVAL); } - xfpustate = __builtin_alloca(xfpustate_len); + xfpustate = (char *)td->td_md.md_fpu_scratch; error = copyin(PTRIN(ucp->uc_mcontext.mc_xfpustate), xfpustate, xfpustate_len); if (error != 0) { diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h index 59796e729ac4..12fb7bf9724c 100644 --- a/sys/amd64/include/proc.h +++ b/sys/amd64/include/proc.h @@ -75,6 +75,8 @@ struct mdthread { int md_efirt_dis_pf; /* (k) */ struct pcb md_pcb; vm_offset_t md_stack_base; + struct savefpu *md_usr_fpu_save; + struct savefpu *md_fpu_scratch; }; struct mdproc { diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index 48aac8e057b8..a8282c94b8e1 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -90,7 +90,7 @@ _Static_assert(offsetof(struct thread, td_pflags) == 0x104, "struct thread KBI td_pflags"); _Static_assert(offsetof(struct thread, td_frame) == 0x4a0, "struct thread KBI td_frame"); -_Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0, +_Static_assert(offsetof(struct thread, td_emuldata) == 0x6c0, "struct thread KBI td_emuldata"); _Static_assert(offsetof(struct proc, p_flag) == 0xb8, "struct proc KBI p_flag");