amd64/169927: siginfo, si_code for fpe errors when error occurs using the SSE math processor

Konstantin Belousov kostikbel at gmail.com
Tue Jul 17 17:09:21 UTC 2012


On Wed, Jul 18, 2012 at 02:03:58AM +1000, Bruce Evans wrote:
> On Wed, 18 Jul 2012, Bruce Evans wrote:
> 
> >On Wed, 18 Jul 2012, Bruce Evans wrote:
> >>..
> >>So I still want a single kernel exception handle that merges the statuses.
> >
> >Merge the independent statuses modified by their independent controls:
> >
> > 	return (fpetable[(fpsw & ((~fpcw & 0x3f) | 0x40)) |
> >	    ((mxcsr & (mxcsr >> 16)) & 0x3f)]);
> >
> >Use the same trap handler that reads all these statuses and controls.
> 
> Changed my mind again.  Need sleep.  Merging the traps breaks the rule
> that i387 traps occur on the first non-control instruction after the
> one that causes the exception.  There may be mixed code like this:
> 
> 	fldz
> 	fld1
> 	fdiv	%st,%st(1)	# i387 exception now; i387 trap pending
> 	load	$0 into %xmm0
> 	load	$1 into %xmm1
> 	divsd	%xmm0,%xmm1	# SSE exception now; SSE trap now
> 
> Debuggers can see both exception states including the i387 trap pending,
> provided the i387 trap is not bogusly cleared, either by never clearing
> it in the kernel trap handler or by using a separate trap handler that
> doesn't clear it for T_XMMFLT.  They can even figure out that an SSE
> trap occurred, because the i387 trap is still pending.
> 
> 	...
> 	fnop			# i387 trap on first non-control FP instr...
> 
> Apart from doing the bogus fnclex for T_XMMFLT and the delayed effect of
> i387 status bits, merging or not merging the statuses makes little
> difference, since if a status bit is set and is not masked according
> to its control word, then it will generate a trap soon if it didn't
> genearate the current one.

The trap number is available for SA_SIGINFO type of handlers with
si_trapno member of siginfo_t. I think this is final argument to have
separate fputrap_{x87,sse} functions.

For amd64, SSE hardware is FPU, so I do not see much wrong with the name.

I changed fputrap_sse() according to your suggestion.

diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index a7812b7..356b3ac 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -73,6 +73,7 @@ __FBSDID("$FreeBSD$");
 #define	fxrstor(addr)		__asm __volatile("fxrstor %0" : : "m" (*(addr)))
 #define	fxsave(addr)		__asm __volatile("fxsave %0" : "=m" (*(addr)))
 #define	ldmxcsr(csr)		__asm __volatile("ldmxcsr %0" : : "m" (csr))
+#define	stmxcsr(addr)		__asm __volatile("stmxcsr %0" : : "m" (*(addr)))
 
 static __inline void
 xrstor(char *addr, uint64_t mask)
@@ -105,6 +106,7 @@ void	fnstsw(caddr_t addr);
 void	fxsave(caddr_t addr);
 void	fxrstor(caddr_t addr);
 void	ldmxcsr(u_int csr);
+void	stmxcsr(u_int csr);
 void	xrstor(char *addr, uint64_t mask);
 void	xsave(char *addr, uint64_t mask);
 
@@ -113,9 +115,6 @@ void	xsave(char *addr, uint64_t mask);
 #define	start_emulating()	load_cr0(rcr0() | CR0_TS)
 #define	stop_emulating()	clts()
 
-#define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_cw)
-#define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_sw)
-
 CTASSERT(sizeof(struct savefpu) == 512);
 CTASSERT(sizeof(struct xstate_hdr) == 64);
 CTASSERT(sizeof(struct savefpu_ymm) == 832);
@@ -514,11 +513,15 @@ static char fpetable[128] = {
 };
 
 /*
- * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE.
+ * Preserve the FP status word, clear FP exceptions for x87, then
+ * generate a SIGFPE.
+ *
+ * Clearing exceptions was necessary mainly to avoid IRQ13 bugs and is
+ * engraved in our i386 ABI.  We now depend on longjmp() restoring a
+ * usable state.  Restoring the state or examining it might fail if we
+ * didn't clear exceptions.
  *
- * Clearing exceptions is necessary mainly to avoid IRQ13 bugs.  We now
- * depend on longjmp() restoring a usable state.  Restoring the state
- * or examining it might fail if we didn't clear exceptions.
+ * For SSE exceptions, the exceptions are not cleared.
  *
  * The error code chosen will be one of the FPE_... macros. It will be
  * sent as the second argument to old BSD-style signal handlers and as
@@ -531,8 +534,9 @@ static char fpetable[128] = {
  * solution for signals other than SIGFPE.
  */
 int
-fputrap()
+fputrap_x87(void)
 {
+	struct savefpu *pcb_save;
 	u_short control, status;
 
 	critical_enter();
@@ -543,19 +547,40 @@ fputrap()
 	 * wherever they are.
 	 */
 	if (PCPU_GET(fpcurthread) != curthread) {
-		control = GET_FPU_CW(curthread);
-		status = GET_FPU_SW(curthread);
+		pcb_save = curthread->td_pcb->pcb_save;
+		control = pcb_save->sv_env.en_cw;
+		status = pcb_save->sv_env.en_sw;
 	} else {
 		fnstcw(&control);
 		fnstsw(&status);
+		fnclex();
 	}
 
-	if (PCPU_GET(fpcurthread) == curthread)
-		fnclex();
 	critical_exit();
 	return (fpetable[status & ((~control & 0x3f) | 0x40)]);
 }
 
+int
+fputrap_sse(void)
+{
+	u_int mxcsr;
+
+	critical_enter();
+
+	/*
+	 * Coomparing with the x87 #MF handler, we do not clear
+	 * exceptions from the mxcsr.
+	 */
+	if (PCPU_GET(fpcurthread) != curthread)
+		mxcsr = curthread->td_pcb->pcb_save->sv_env.en_mxcsr;
+	else
+		stmxcsr(&mxcsr);
+
+	critical_exit();
+
+	return (fpetable[(mxcsr & (mxcsr >> 16)) & 0x3f]);
+}
+
 /*
  * Implement device not available (DNA) exception
  *
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 75e15e0..57d1cc2 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -328,7 +328,7 @@ trap(struct trapframe *frame)
 			break;
 
 		case T_ARITHTRAP:	/* arithmetic trap */
-			ucode = fputrap();
+			ucode = fputrap_x87();
 			if (ucode == -1)
 				goto userout;
 			i = SIGFPE;
@@ -442,7 +442,9 @@ trap(struct trapframe *frame)
 			break;
 
 		case T_XMMFLT:		/* SIMD floating-point exception */
-			ucode = 0; /* XXX */
+			ucode = fputrap_sse();
+			if (ucode == -1)
+				goto userout;
 			i = SIGFPE;
 			break;
 		}
diff --git a/sys/amd64/include/fpu.h b/sys/amd64/include/fpu.h
index 98a016b..7d0f0ea 100644
--- a/sys/amd64/include/fpu.h
+++ b/sys/amd64/include/fpu.h
@@ -62,7 +62,8 @@ int	fpusetregs(struct thread *td, struct savefpu *addr,
 	    char *xfpustate, size_t xfpustate_size);
 int	fpusetxstate(struct thread *td, char *xfpustate,
 	    size_t xfpustate_size);
-int	fputrap(void);
+int	fputrap_sse(void);
+int	fputrap_x87(void);
 void	fpuuserinited(struct thread *td);
 struct fpu_kern_ctx *fpu_kern_alloc_ctx(u_int flags);
 void	fpu_kern_free_ctx(struct fpu_kern_ctx *ctx);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 196 bytes
Desc: not available
Url : http://lists.freebsd.org/pipermail/freebsd-amd64/attachments/20120717/f80669a3/attachment.pgp


More information about the freebsd-amd64 mailing list