git: cbbac5609115 - main - linux(4): Preserve fpu xsave state across signal delivery on amd64

From: Dmitry Chagin <dchagin_at_FreeBSD.org>
Date: Thu, 08 Jun 2023 22:33:46 UTC
The branch main has been updated by dchagin:

URL: https://cgit.FreeBSD.org/src/commit/?id=cbbac560911521c0ded3e06e713107176855fae4

commit cbbac560911521c0ded3e06e713107176855fae4
Author:     Dmitry Chagin <dchagin@FreeBSD.org>
AuthorDate: 2023-06-08 22:33:26 +0000
Commit:     Dmitry Chagin <dchagin@FreeBSD.org>
CommitDate: 2023-06-08 22:33:26 +0000

    linux(4): Preserve fpu xsave state across signal delivery on amd64
    
    PR:                     270247
    Reviewed by:            kib
    Differential Revision:  https://reviews.freebsd.org/D40444
    MFC after:              2 weeks
---
 sys/amd64/linux/linux_sysvec.c     | 99 ++++++++++++++++++++++++++++++++++++--
 sys/x86/linux/linux_x86_sigframe.h | 20 +++++++-
 2 files changed, 114 insertions(+), 5 deletions(-)

diff --git a/sys/amd64/linux/linux_sysvec.c b/sys/amd64/linux/linux_sysvec.c
index acc7593308af..ba4aff6f0c15 100644
--- a/sys/amd64/linux/linux_sysvec.c
+++ b/sys/amd64/linux/linux_sysvec.c
@@ -295,6 +295,54 @@ linux_fxrstor(struct thread *td, mcontext_t *mcp, struct l_sigcontext *sc)
 	return (set_fpcontext(td, mcp, NULL, 0));
 }
 
+static int
+linux_xrstor(struct thread *td, mcontext_t *mcp, struct l_sigcontext *sc)
+{
+	struct savefpu *fp = (struct savefpu *)&mcp->mc_fpstate[0];
+	char *xfpustate;
+	struct proc *p;
+	uint32_t magic2;
+	int error;
+
+	p = td->td_proc;
+	mcp->mc_xfpustate_len = cpu_max_ext_state_size - sizeof(struct savefpu);
+
+	/* Legacy region of an xsave area. */
+	error = copyin(PTRIN(sc->sc_fpstate), fp, sizeof(mcp->mc_fpstate));
+	if (error != 0)
+		return (error);
+	bzero(&fp->sv_pad[0], sizeof(fp->sv_pad));
+
+	/* Extended region of an xsave area. */
+	sc->sc_fpstate += sizeof(mcp->mc_fpstate);
+	xfpustate = (char *)fpu_save_area_alloc();
+	error = copyin(PTRIN(sc->sc_fpstate), xfpustate, mcp->mc_xfpustate_len);
+	if (error != 0) {
+		fpu_save_area_free((struct savefpu *)xfpustate);
+		uprintf("pid %d (%s): linux xrstor failed\n", p->p_pid,
+		    td->td_name);
+		return (error);
+	}
+
+	/* Linux specific end of xsave area marker. */
+	sc->sc_fpstate += mcp->mc_xfpustate_len;
+	error = copyin(PTRIN(sc->sc_fpstate), &magic2, LINUX_FP_XSTATE_MAGIC2_SIZE);
+	if (error != 0 || magic2 != LINUX_FP_XSTATE_MAGIC2) {
+		fpu_save_area_free((struct savefpu *)xfpustate);
+		uprintf("pid %d (%s): sigreturn magic2 0x%x error %d\n",
+		    p->p_pid, td->td_name, magic2, error);
+		return (error);
+	}
+
+	error = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
+	fpu_save_area_free((struct savefpu *)xfpustate);
+	if (error != 0) {
+		uprintf("pid %d (%s): sigreturn set_fpcontext error %d\n",
+		    p->p_pid, td->td_name, error);
+	}
+	return (error);
+}
+
 static int
 linux_copyin_fpstate(struct thread *td, struct l_ucontext *uc)
 {
@@ -304,7 +352,10 @@ linux_copyin_fpstate(struct thread *td, struct l_ucontext *uc)
 	mc.mc_ownedfp = _MC_FPOWNED_FPU;
 	mc.mc_fpformat = _MC_FPFMT_XMM;
 
-	return (linux_fxrstor(td, &mc, &uc->uc_mcontext));
+	if ((uc->uc_flags & LINUX_UC_FP_XSTATE) != 0)
+		return (linux_xrstor(td, &mc, &uc->uc_mcontext));
+	else
+		return (linux_fxrstor(td, &mc, &uc->uc_mcontext));
 }
 
 /*
@@ -411,20 +462,60 @@ linux_fxsave(mcontext_t *mcp, void *ufp)
 	return (copyout(fx, ufp, sizeof(*fx)));
 }
 
+static int
+linux_xsave(mcontext_t *mcp, char *xfpusave, char *ufp)
+{
+	struct l_fpstate *fx = (struct l_fpstate *)&mcp->mc_fpstate[0];
+	uint32_t magic2;
+	int error;
+
+	/* Legacy region of an xsave area. */
+	fx->sw_reserved.magic1 = LINUX_FP_XSTATE_MAGIC1;
+	fx->sw_reserved.xstate_size = mcp->mc_xfpustate_len + sizeof(*fx);
+	fx->sw_reserved.extended_size = fx->sw_reserved.xstate_size +
+	    LINUX_FP_XSTATE_MAGIC2_SIZE;
+	fx->sw_reserved.xfeatures = xsave_mask;
+
+	error = copyout(fx, ufp, sizeof(*fx));
+	if (error != 0)
+		return (error);
+	ufp += sizeof(*fx);
+
+	/* Extended region of an xsave area. */
+	error = copyout(xfpusave, ufp, mcp->mc_xfpustate_len);
+	if (error != 0)
+		return (error);
+
+	/* Linux specific end of xsave area marker. */
+	ufp += mcp->mc_xfpustate_len;
+	magic2 = LINUX_FP_XSTATE_MAGIC2;
+	return (copyout(&magic2, ufp, LINUX_FP_XSTATE_MAGIC2_SIZE));
+}
+
 static int
 linux_copyout_fpstate(struct thread *td, struct l_ucontext *uc, char **sp)
 {
+	size_t xfpusave_len;
+	char *xfpusave;
 	mcontext_t mc;
 	char *ufp = *sp;
 
-	get_fpcontext(td, &mc, NULL, NULL);
+	get_fpcontext(td, &mc, &xfpusave, &xfpusave_len);
 	KASSERT(mc.mc_fpformat != _MC_FPFMT_NODEV, ("fpu not present"));
 
-	/* fxsave area */
+	/* Room for fxsave area. */
 	ufp -= sizeof(struct l_fpstate);
+	if (xfpusave != NULL) {
+		/* Room for xsave area. */
+		ufp -= (xfpusave_len + LINUX_FP_XSTATE_MAGIC2_SIZE);
+		uc->uc_flags |= LINUX_UC_FP_XSTATE;
+	}
 	*sp = ufp = (char *)((unsigned long)ufp & ~0x3Ful);
 
-	return (linux_fxsave(&mc, ufp));
+	if (xfpusave != NULL)
+		return (linux_xsave(&mc, xfpusave, ufp));
+	else
+		return (linux_fxsave(&mc, ufp));
 }
 
 /*
diff --git a/sys/x86/linux/linux_x86_sigframe.h b/sys/x86/linux/linux_x86_sigframe.h
index e5687069651f..c748073a3457 100644
--- a/sys/x86/linux/linux_x86_sigframe.h
+++ b/sys/x86/linux/linux_x86_sigframe.h
@@ -35,6 +35,20 @@
 #ifndef _X86_LINUX_SIGFRAME_H_
 #define	_X86_LINUX_SIGFRAME_H_
 
+#define	LINUX_UC_FP_XSTATE		0x1
+
+#define	LINUX_FP_XSTATE_MAGIC1		0x46505853U
+#define	LINUX_FP_XSTATE_MAGIC2		0x46505845U
+#define	LINUX_FP_XSTATE_MAGIC2_SIZE	sizeof(uint32_t)
+
+struct l_fpx_sw_bytes {
+	uint32_t	magic1;
+	uint32_t	extended_size;
+	uint64_t	xfeatures;
+	uint32_t	xstate_size;
+	uint32_t	padding[7];
+};
+
 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 
 /* The Linux sigcontext, pretty much a standard 386 trapframe. */
@@ -140,7 +154,11 @@ struct l_fpstate {
 	u_int32_t mxcsr_mask;
 	u_int8_t st[8][16];
 	u_int8_t xmm[16][16];
-	u_int32_t reserved2[24];
+	u_int32_t reserved2[12];
+	union {
+		u_int32_t		reserved3[12];
+		struct l_fpx_sw_bytes	sw_reserved;
+	};
 } __aligned(16);
 
 struct l_sigcontext {