cvs commit: src/sys/amd64/amd64 exception.S
Joseph Koshy
jkoshy at FreeBSD.ORG
Fri Aug 26 19:05:12 GMT 2005
> ups 2005-08-25 20:33:43 UTC
>
> FreeBSD src repository
>
> Modified files:
> sys/amd64/amd64 exception.S
> Log:
> NMI handler should not enable interrupts.
>
> Tested by: kris@
> MFC after: 3 weeks
Actually, NMI handling doesn't quite work on the AMD64, even with
this change.
The problem is that the kernel protects code where it uses SWAPGS
from reentrancy by blocking interrupts (using 'cli'). However,
NMIs are non-maskable, and if an NMI is taken when in such a critical
section we end up with the GS.base values getting out of sync and
a processor reset or a double fault on a subsequent use of the
out-of-sync %gs value.
The attached patch addresses this bug for NMIs. It is needed before
you can use hwpmc(4) to sample on an AMD64 box.
The general solution is more complicated than this patch. In theory
all unmaskable traps into the kernel appear to be susceptible to
having the processor's GS.base values getting out of sync. In
practice though, this appears to be a problem only for NMIs as most
of the other exceptions do not happen during the vulnerable critical
section. Debug exceptions taken from userland cannot trigger the
bug. A machine check exception would be vulnerable, but we don't
implement this today.
Regards,
Koshy
<jkoshy at freebsd.org>
Index: db_trace.c
===================================================================
RCS file: /cvs/FreeBSD/src/sys/amd64/amd64/db_trace.c,v
retrieving revision 1.68
diff -u -u -r1.68 db_trace.c
--- db_trace.c 3 Aug 2005 04:33:48 -0000 1.68
+++ db_trace.c 20 Aug 2005 15:17:12 -0000
@@ -317,7 +317,8 @@
db_symbol_values(sym, &name, NULL);
if (name != NULL) {
if (strcmp(name, "calltrap") == 0 ||
- strcmp(name, "fork_trampoline") == 0)
+ strcmp(name, "fork_trampoline") == 0 ||
+ strcmp(name, "nmi_calltrap") == 0)
frame_type = TRAP;
else if (strncmp(name, "Xatpic_intr", 11) == 0 ||
strncmp(name, "Xatpic_fastintr", 15) == 0 ||
Index: exception.S
===================================================================
RCS file: /cvs/FreeBSD/src/sys/amd64/amd64/exception.S,v
retrieving revision 1.126
diff -u -u -r1.126 exception.S
--- exception.S 25 Aug 2005 20:33:43 -0000 1.126
+++ exception.S 26 Aug 2005 18:31:39 -0000
@@ -93,8 +93,6 @@
jmp alltraps
IDTVEC(div)
TRAP(T_DIVIDE)
-IDTVEC(nmi)
- TRAP_NOEN(T_NMI)
IDTVEC(ofl)
TRAP(T_OFLOW)
IDTVEC(bnd)
@@ -313,6 +311,82 @@
IDTVEC(fast_syscall32)
sysret
+/*
+ * NMI handling is special.
+ *
+ * First, NMIs do not respect the state of the processor's RFLAGS.IF
+ * bit and the NMI handler may be invoked at any time, including when
+ * the processor is in a critical section. In particular, this means
+ * that the processor's GS.base values could be inconsistent on entry
+ * to the handler.
+ *
+ * Second, the processor treats NMIs specially, blocking further NMIs
+ * till an 'iretq' instruction is executed. We therefore need to
+ * execute the NMI handler with interrupts disabled to prevent a
+ * nested interrupt from executing an 'iretq' instruction and
+ * inadvertently taking the processor out of NMI mode.
+ *
+ * We use '%ebx', a C-preserved register, to remember whether to swap
+ * GS back on the exit path.
+ */
+
+IDTVEC(nmi)
+ subq $TF_RIP,%rsp
+ movq $(T_NMI),TF_TRAPNO(%rsp)
+ movq $0,TF_ADDR(%rsp)
+ movq $0,TF_ERR(%rsp)
+ movq %rdi,TF_RDI(%rsp)
+ movq %rsi,TF_RSI(%rsp)
+ movq %rdx,TF_RDX(%rsp)
+ movq %rcx,TF_RCX(%rsp)
+ movq %r8,TF_R8(%rsp)
+ movq %r9,TF_R9(%rsp)
+ movq %rax,TF_RAX(%rsp)
+ movq %rbx,TF_RBX(%rsp)
+ movq %rbp,TF_RBP(%rsp)
+ movq %r10,TF_R10(%rsp)
+ movq %r11,TF_R11(%rsp)
+ movq %r12,TF_R12(%rsp)
+ movq %r13,TF_R13(%rsp)
+ movq %r14,TF_R14(%rsp)
+ movq %r15,TF_R15(%rsp)
+ xorl %ebx,%ebx
+ testb $SEL_RPL_MASK,TF_CS(%rsp)
+ jnz nmi_needswapgs /* we came from userland */
+ movl $MSR_GSBASE,%ecx
+ rdmsr
+ cmpl $VM_MAXUSER_ADDRESS >> 32,%edx
+ jae nmi_calltrap /* GS.base holds a kernel VA */
+nmi_needswapgs:
+ incl %ebx
+ swapgs
+/* Note: this label is also used by ddb and gdb: */
+nmi_calltrap:
+ FAKE_MCOUNT(TF_RIP(%rsp))
+ call trap
+ MEXITCOUNT
+ testl %ebx,%ebx
+ jz nmi_restoreregs
+ swapgs
+nmi_restoreregs:
+ movq TF_RDI(%rsp),%rdi
+ movq TF_RSI(%rsp),%rsi
+ movq TF_RDX(%rsp),%rdx
+ movq TF_RCX(%rsp),%rcx
+ movq TF_R8(%rsp),%r8
+ movq TF_R9(%rsp),%r9
+ movq TF_RAX(%rsp),%rax
+ movq TF_RBX(%rsp),%rbx
+ movq TF_RBP(%rsp),%rbp
+ movq TF_R10(%rsp),%r10
+ movq TF_R11(%rsp),%r11
+ movq TF_R12(%rsp),%r12
+ movq TF_R13(%rsp),%r13
+ movq TF_R14(%rsp),%r14
+ movq TF_R15(%rsp),%r15
+ addq $TF_RIP,%rsp
+ iretq
+
ENTRY(fork_trampoline)
movq %r12, %rdi /* function */
movq %rbx, %rsi /* arg1 */
Index: genassym.c
===================================================================
RCS file: /cvs/FreeBSD/src/sys/amd64/amd64/genassym.c,v
retrieving revision 1.155
diff -u -u -r1.155 genassym.c
--- genassym.c 20 Nov 2004 02:30:59 -0000 1.155
+++ genassym.c 20 Aug 2005 15:11:01 -0000
@@ -209,3 +209,5 @@
ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock));
ASSYM(MTX_RECURSECNT, offsetof(struct mtx, mtx_recurse));
+
+ASSYM(MSR_GSBASE, MSR_GSBASE);
Index: trap.c
===================================================================
RCS file: /cvs/FreeBSD/src/sys/amd64/amd64/trap.c,v
retrieving revision 1.289
diff -u -u -r1.289 trap.c
--- trap.c 29 Jun 2005 23:23:16 -0000 1.289
+++ trap.c 20 Aug 2005 15:18:25 -0000
@@ -219,9 +219,9 @@
type);
/*
* We shouldn't enable interrupts while in a critical
- * section.
+ * section, or if servicing an NMI.
*/
- if (td->td_critnest == 0)
+ if (type != T_NMI && td->td_critnest == 0)
enable_intr();
}
}
More information about the cvs-src
mailing list