git: a6662c37b6ff - main - powerpc: Implement fpu_kern_enter/fpu_kern_leave

From: Justin Hibbits <jhibbits_at_FreeBSD.org>
Date: Sun, 17 Sep 2023 17:21:34 UTC
The branch main has been updated by jhibbits:

URL: https://cgit.FreeBSD.org/src/commit/?id=a6662c37b6ffee46e18be5f7570149edc64c1d0b

commit a6662c37b6ffee46e18be5f7570149edc64c1d0b
Author:     Shawn Anastasio <sanastasio@raptorengineering.com>
AuthorDate: 2023-09-17 14:40:48 +0000
Commit:     Justin Hibbits <jhibbits@FreeBSD.org>
CommitDate: 2023-09-17 17:21:04 +0000

    powerpc: Implement fpu_kern_enter/fpu_kern_leave
    
    Summary:
    Provide an implementation of fpu_kern_enter/fpu_kern_leave for PPC to
    enable FPU, VSX, and Altivec usage in-kernel. The functions currently
    only support FPU_KERN_NOCTX, but this is sufficient for ossl(1) and many
    other users of the API.
    
    This patchset has been tested on powerpc64le using a modified version of
    the in-tree tools/tools/crypto/cryptocheck.c tool to check for FPU/Vec
    register clobbering along with a follow-up patch to enable ossl(4) on
    powerpc64*.
    
    Reviewed by:    jhibbits
    MFC after:      2 weeks
    Differential Revision: https://reviews.freebsd.org/D41540
---
 share/man/man9/fpu_kern.9          |   5 +-
 sys/powerpc/include/altivec.h      |   2 +
 sys/powerpc/include/fpu.h          |  20 ++++++
 sys/powerpc/include/pcb.h          |   7 ++-
 sys/powerpc/powerpc/altivec.c      |  34 +++++++++-
 sys/powerpc/powerpc/exec_machdep.c |  21 +++----
 sys/powerpc/powerpc/fpu.c          | 125 +++++++++++++++++++++++++++++++++++++
 sys/sys/param.h                    |   2 +-
 8 files changed, 199 insertions(+), 17 deletions(-)

diff --git a/share/man/man9/fpu_kern.9 b/share/man/man9/fpu_kern.9
index c9dd58e96adf..92dc0eaa7e17 100644
--- a/share/man/man9/fpu_kern.9
+++ b/share/man/man9/fpu_kern.9
@@ -185,7 +185,8 @@ and false otherwise.
 .Sh NOTES
 The
 .Nm
-is currently implemented only for the i386, amd64, and arm64 architectures.
+is currently implemented only for the i386, amd64, arm64, and powerpc
+architectures.
 .Pp
 There is no way to handle floating point exceptions raised from
 kernel mode.
@@ -205,6 +206,8 @@ facitily and this manual page were written by
 .An Konstantin Belousov Aq Mt kib@FreeBSD.org .
 The arm64 support was added by
 .An Andrew Turner Aq Mt andrew@FreeBSD.org .
+The powerpc support was added by
+.An Shawn Anastasio Aq Mt sanastasio@raptorengineering.com .
 .Sh BUGS
 .Fn fpu_kern_leave
 should probably have type
diff --git a/sys/powerpc/include/altivec.h b/sys/powerpc/include/altivec.h
index 581a568b7034..e5151529f698 100644
--- a/sys/powerpc/include/altivec.h
+++ b/sys/powerpc/include/altivec.h
@@ -35,5 +35,7 @@
 void    enable_vec(struct thread *);
 void    save_vec(struct thread *);
 void    save_vec_nodrop(struct thread *);
+void    enable_vec_kern(void);
+void    disable_vec(struct thread *td);
 
 #endif	/* _MACHINE_ALTIVEC_H_ */
diff --git a/sys/powerpc/include/fpu.h b/sys/powerpc/include/fpu.h
index 30df3a470b09..aa5640ea31fb 100644
--- a/sys/powerpc/include/fpu.h
+++ b/sys/powerpc/include/fpu.h
@@ -76,6 +76,26 @@ void    save_fpu(struct thread *);
 void    save_fpu_nodrop(struct thread *);
 void    cleanup_fpscr(void);
 u_int   get_fpu_exception(struct thread *);
+void    enable_fpu_kern(void);
+void    disable_fpu(struct thread *td);
+
+/*
+ * Flags for fpu_kern_alloc_ctx(), fpu_kern_enter() and fpu_kern_thread().
+ */
+#define	FPU_KERN_NORMAL	0x0000
+#define	FPU_KERN_NOWAIT	0x0001
+#define	FPU_KERN_KTHR	0x0002
+#define	FPU_KERN_NOCTX	0x0004
+
+struct fpu_kern_ctx;
+
+struct fpu_kern_ctx *fpu_kern_alloc_ctx(u_int flags);
+void	fpu_kern_free_ctx(struct fpu_kern_ctx *ctx);
+void	fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx,
+	    u_int flags);
+int	fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx);
+int	fpu_kern_thread(u_int flags);
+int	is_fpu_kern_thread(u_int flags);
 
 #endif /* _KERNEL */
 
diff --git a/sys/powerpc/include/pcb.h b/sys/powerpc/include/pcb.h
index e5e6e3223406..050ada6b0f64 100644
--- a/sys/powerpc/include/pcb.h
+++ b/sys/powerpc/include/pcb.h
@@ -48,7 +48,7 @@ struct pcb {
 	register_t	pcb_toc;		/* toc pointer */
 	register_t	pcb_lr;			/* link register */
 	register_t	pcb_dscr;		/* dscr value */
-	register_t	pcb_fscr;		
+	register_t	pcb_fscr;
 	register_t	pcb_tar;
 	struct		pmap *pcb_pm;		/* pmap of our vmspace */
 	jmp_buf		*pcb_onfault;		/* For use during
@@ -56,11 +56,14 @@ struct pcb {
 	int		pcb_flags;
 #define	PCB_FPU		0x1	/* Process uses FPU */
 #define	PCB_FPREGS	0x2	/* Process had FPU registers initialized */
-#define	PCB_VEC		0x4	/* Process had Altivec initialized */
+#define	PCB_VEC		0x4	/* Process uses Altivec */
 #define	PCB_VSX		0x8	/* Process had VSX initialized */
 #define	PCB_CDSCR	0x10	/* Process had Custom DSCR initialized */
 #define	PCB_HTM		0x20	/* Process had HTM initialized */
 #define	PCB_CFSCR	0x40	/* Process had FSCR updated */
+#define	PCB_KERN_FPU    0x80	/* Kernel is using FPU/Vector unit */
+#define	PCB_KERN_FPU_NOSAVE 0x100 /* FPU/Vec state not saved for kernel use */
+#define	PCB_VECREGS     0x200	/* Process had Altivec registers initialized */
 	struct fpu {
 		union {
 #if _BYTE_ORDER == _BIG_ENDIAN
diff --git a/sys/powerpc/powerpc/altivec.c b/sys/powerpc/powerpc/altivec.c
index 16e4477703d2..5072cf3dc6f6 100644
--- a/sys/powerpc/powerpc/altivec.c
+++ b/sys/powerpc/powerpc/altivec.c
@@ -105,10 +105,11 @@ enable_vec(struct thread *td)
 	 * the thread, initialise the vector registers and VSCR to 0, and
 	 * set the flag to indicate that the vector unit is in use.
 	 */
+	pcb->pcb_flags |= PCB_VEC;
 	tf->srr1 |= PSL_VEC;
-	if (!(pcb->pcb_flags & PCB_VEC)) {
+	if (!(pcb->pcb_flags & PCB_VECREGS)) {
 		memset(&pcb->pcb_vec, 0, sizeof pcb->pcb_vec);
-		pcb->pcb_flags |= PCB_VEC;
+		pcb->pcb_flags |= PCB_VECREGS;
 	}
 
 	/*
@@ -170,3 +171,32 @@ save_vec_nodrop(struct thread *td)
 	if (td == PCPU_GET(vecthread))
 		save_vec_int(td);
 }
+
+void
+enable_vec_kern(void)
+{
+	mtmsr(mfmsr() | PSL_VEC);
+}
+
+void
+disable_vec(struct thread *td)
+{
+	register_t msr;
+	struct pcb *pcb;
+	struct trapframe *tf;
+
+	pcb = td->td_pcb;
+	tf = trapframe(td);
+
+	/* Disable PSL_VEC in kernel (if enabled) */
+	msr = mfmsr() & ~PSL_VEC;
+	isync();
+	mtmsr(msr);
+
+	/*
+	 * Disable PSL_VEC in userspace. It will be re-enabled when
+	 * an Altivec instruction is executed.
+	 */
+	tf->srr1 &= ~PSL_VEC;
+	pcb->pcb_flags &= ~PCB_VEC;
+}
diff --git a/sys/powerpc/powerpc/exec_machdep.c b/sys/powerpc/powerpc/exec_machdep.c
index b42978ff94a8..05d3a3cf79ba 100644
--- a/sys/powerpc/powerpc/exec_machdep.c
+++ b/sys/powerpc/powerpc/exec_machdep.c
@@ -441,12 +441,14 @@ grab_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 	 * Repeat for Altivec context
 	 */
 
-	if (pcb->pcb_flags & PCB_VEC) {
-		KASSERT(td == curthread,
-			("get_mcontext: fp save not curthread"));
-		critical_enter();
-		save_vec(td);
-		critical_exit();
+	if (pcb->pcb_flags & PCB_VECREGS) {
+		if (pcb->pcb_flags & PCB_VEC) {
+			KASSERT(td == curthread,
+				("get_mcontext: altivec save not curthread"));
+			critical_enter();
+			save_vec(td);
+			critical_exit();
+		}
 		mcp->mc_flags |= _MC_AV_VALID;
 		mcp->mc_vscr  = pcb->pcb_vec.vscr;
 		mcp->mc_vrsave =  pcb->pcb_vec.vrsave;
@@ -543,11 +545,8 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
 	}
 
 	if (mcp->mc_flags & _MC_AV_VALID) {
-		if ((pcb->pcb_flags & PCB_VEC) != PCB_VEC) {
-			critical_enter();
-			enable_vec(td);
-			critical_exit();
-		}
+		/* enable_vec() will happen lazily on a fault */
+		pcb->pcb_flags |= PCB_VECREGS;
 		pcb->pcb_vec.vscr = mcp->mc_vscr;
 		pcb->pcb_vec.vrsave = mcp->mc_vrsave;
 		memcpy(pcb->pcb_vec.vr, mcp->mc_avec, sizeof(mcp->mc_avec));
diff --git a/sys/powerpc/powerpc/fpu.c b/sys/powerpc/powerpc/fpu.c
index cc1381046b4b..8f5df2f7d576 100644
--- a/sys/powerpc/powerpc/fpu.c
+++ b/sys/powerpc/powerpc/fpu.c
@@ -42,6 +42,7 @@
 #include <machine/fpu.h>
 #include <machine/pcb.h>
 #include <machine/psl.h>
+#include <machine/altivec.h>
 
 static void
 save_fpu_int(struct thread *td)
@@ -259,3 +260,127 @@ get_fpu_exception(struct thread *td)
 	return ucode;
 }
 
+void
+enable_fpu_kern(void)
+{
+	register_t msr;
+
+	msr = mfmsr() | PSL_FP;
+
+	if (cpu_features & PPC_FEATURE_HAS_VSX)
+		msr |= PSL_VSX;
+
+	mtmsr(msr);
+}
+
+void
+disable_fpu(struct thread *td)
+{
+	register_t msr;
+	struct pcb *pcb;
+	struct trapframe *tf;
+
+	pcb = td->td_pcb;
+	tf = trapframe(td);
+
+	/* Disable FPU in kernel (if enabled) */
+	msr = mfmsr() & ~(PSL_FP | PSL_VSX);
+	isync();
+	mtmsr(msr);
+
+	/*
+	 * Disable FPU in userspace. It will be re-enabled when
+	 * an FP or VSX instruction is executed.
+	 */
+	tf->srr1 &= ~(PSL_FP | PSL_VSX);
+	pcb->pcb_flags &= ~(PCB_FPU | PCB_VSX);
+}
+
+#ifndef __SPE__
+/*
+ * XXX: Implement fpu_kern_alloc_ctx/fpu_kern_free_ctx once fpu_kern_enter and
+ * fpu_kern_leave can handle !FPU_KERN_NOCTX.
+ */
+struct fpu_kern_ctx {
+#define	FPU_KERN_CTX_DUMMY	0x01	/* avoided save for the kern thread */
+#define	FPU_KERN_CTX_INUSE	0x02
+	uint32_t	 flags;
+};
+
+void
+fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
+{
+	struct pcb *pcb;
+
+	pcb = td->td_pcb;
+
+	KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL,
+	    ("ctx is required when !FPU_KERN_NOCTX"));
+	KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0,
+	    ("using inuse ctx"));
+	KASSERT((pcb->pcb_flags & PCB_KERN_FPU_NOSAVE) == 0,
+	    ("recursive fpu_kern_enter while in PCB_KERN_FPU_NOSAVE state"));
+
+	if ((flags & FPU_KERN_NOCTX) != 0) {
+		critical_enter();
+
+		if (pcb->pcb_flags & PCB_FPU) {
+			save_fpu(td);
+			pcb->pcb_flags |= PCB_FPREGS;
+		}
+		enable_fpu_kern();
+
+		if (pcb->pcb_flags & PCB_VEC) {
+			save_vec(td);
+			pcb->pcb_flags |= PCB_VECREGS;
+		}
+		enable_vec_kern();
+
+		pcb->pcb_flags |= PCB_KERN_FPU | PCB_KERN_FPU_NOSAVE;
+		return;
+	}
+
+	KASSERT(0, ("fpu_kern_enter with !FPU_KERN_NOCTX not implemented!"));
+}
+
+int
+fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
+{
+	struct pcb *pcb;
+
+	pcb = td->td_pcb;
+
+	if ((pcb->pcb_flags & PCB_KERN_FPU_NOSAVE) != 0) {
+		KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX"));
+		KASSERT(PCPU_GET(fpcurthread) == NULL,
+		    ("non-NULL fpcurthread for PCB_FP_NOSAVE"));
+		CRITICAL_ASSERT(td);
+
+		/* Disable FPU, VMX, and VSX */
+		disable_fpu(td);
+		disable_vec(td);
+
+		pcb->pcb_flags &= ~PCB_KERN_FPU_NOSAVE;
+
+		critical_exit();
+	} else {
+		KASSERT(0, ("fpu_kern_leave with !FPU_KERN_NOCTX not implemented!"));
+	}
+
+	pcb->pcb_flags &= ~PCB_KERN_FPU;
+
+	return 0;
+}
+
+int
+is_fpu_kern_thread(u_int flags __unused)
+{
+	struct pcb *curpcb;
+
+	if ((curthread->td_pflags & TDP_KTHREAD) == 0)
+		return (0);
+	curpcb = curthread->td_pcb;
+	return ((curpcb->pcb_flags & PCB_KERN_FPU) != 0);
+}
+
+#endif /* !__SPE__ */
diff --git a/sys/sys/param.h b/sys/sys/param.h
index da88c3e28581..2e4310dac111 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -75,7 +75,7 @@
  * cannot include sys/param.h and should only be updated here.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1500000
+#define __FreeBSD_version 1500001
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,