svn commit: r335196 - in releng/11.2/sys: amd64/amd64 i386/i386 i386/isa
Konstantin Belousov
kib at FreeBSD.org
Fri Jun 15 13:21:39 UTC 2018
Author: kib
Date: Fri Jun 15 13:21:37 2018
New Revision: 335196
URL: https://svnweb.freebsd.org/changeset/base/335196
Log:
MFC rr335072, r335089:
Enable eager FPU context switch on i386 and amd64.
CVE: CVE-2018-3665
MFC r335131
Remove printf() in #NM handler.
MFC r335132:
Reorganize code flow in fpudna()/npxdna().
Approved by: re (gjb)
Modified:
releng/11.2/sys/amd64/amd64/cpu_switch.S
releng/11.2/sys/amd64/amd64/fpu.c
releng/11.2/sys/i386/i386/swtch.s
releng/11.2/sys/i386/isa/npx.c
Directory Properties:
releng/11.2/ (props changed)
Modified: releng/11.2/sys/amd64/amd64/cpu_switch.S
==============================================================================
--- releng/11.2/sys/amd64/amd64/cpu_switch.S Fri Jun 15 13:14:45 2018 (r335195)
+++ releng/11.2/sys/amd64/amd64/cpu_switch.S Fri Jun 15 13:21:37 2018 (r335196)
@@ -128,10 +128,10 @@ done_store_dr:
/* have we used fp, and need a save? */
cmpq %rdi,PCPU(FPCURTHREAD)
- jne 3f
+ jne 2f
movq PCB_SAVEFPU(%r8),%r8
clts
- cmpl $0,use_xsave
+ cmpl $0,use_xsave(%rip)
jne 1f
fxsave (%r8)
jmp 2f
@@ -143,12 +143,7 @@ ctx_switch_xsave:
/* This is patched to xsaveopt if supported, see fpuinit_bsp1() */
xsave (%r8)
movq %rcx,%rdx
-2: smsw %ax
- orb $CR0_TS,%al
- lmsw %ax
- xorl %eax,%eax
- movq %rax,PCPU(FPCURTHREAD)
-3:
+2:
/* Save is done. Now fire up new thread. Leave old vmspace. */
movq %rsi,%r12
movq %rdi,%r13
@@ -235,6 +230,8 @@ done_load_dr:
movq PCB_RBX(%r8),%rbx
movq PCB_RIP(%r8),%rax
movq %rax,(%rsp)
+ movq PCPU(CURTHREAD),%rdi
+ call fpu_activate_sw
ret
/*
Modified: releng/11.2/sys/amd64/amd64/fpu.c
==============================================================================
--- releng/11.2/sys/amd64/amd64/fpu.c Fri Jun 15 13:14:45 2018 (r335195)
+++ releng/11.2/sys/amd64/amd64/fpu.c Fri Jun 15 13:21:37 2018 (r335196)
@@ -139,6 +139,11 @@ static void fpu_clean_state(void);
SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
SYSCTL_NULL_INT_PTR, 1, "Floating point instructions executed in hardware");
+int lazy_fpu_switch = 0;
+SYSCTL_INT(_hw, OID_AUTO, lazy_fpu_switch, CTLFLAG_RWTUN | CTLFLAG_NOFETCH,
+ &lazy_fpu_switch, 0,
+ "Lazily load FPU context after context switch");
+
int use_xsave; /* non-static for cpu_switch.S */
uint64_t xsave_mask; /* the same */
static uma_zone_t fpu_save_area_zone;
@@ -204,6 +209,7 @@ fpuinit_bsp1(void)
u_int cp[4];
uint64_t xsave_mask_user;
+ TUNABLE_INT_FETCH("hw.lazy_fpu_switch", &lazy_fpu_switch);
if ((cpu_feature2 & CPUID2_XSAVE) != 0) {
use_xsave = 1;
TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave);
@@ -612,6 +618,45 @@ fputrap_sse(void)
return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
}
+static void
+restore_fpu_curthread(struct thread *td)
+{
+ struct pcb *pcb;
+
+ /*
+ * Record new context early in case frstor causes a trap.
+ */
+ PCPU_SET(fpcurthread, td);
+
+ stop_emulating();
+ fpu_clean_state();
+ pcb = td->td_pcb;
+
+ if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) {
+ /*
+ * This is the first time this thread has used the FPU or
+ * the PCB doesn't contain a clean FPU state. Explicitly
+ * load an initial state.
+ *
+ * We prefer to restore the state from the actual save
+ * area in PCB instead of directly loading from
+ * fpu_initialstate, to ignite the XSAVEOPT
+ * tracking engine.
+ */
+ bcopy(fpu_initialstate, pcb->pcb_save,
+ cpu_max_ext_state_size);
+ fpurestore(pcb->pcb_save);
+ if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
+ fldcw(pcb->pcb_initial_fpucw);
+ if (PCB_USER_FPU(pcb))
+ set_pcb_flags(pcb, PCB_FPUINITDONE |
+ PCB_USERFPUINITDONE);
+ else
+ set_pcb_flags(pcb, PCB_FPUINITDONE);
+ } else
+ fpurestore(pcb->pcb_save);
+}
+
/*
* Device Not Available (DNA, #NM) exception handler.
*
@@ -622,7 +667,9 @@ fputrap_sse(void)
void
fpudna(void)
{
+ struct thread *td;
+ td = curthread;
/*
* This handler is entered with interrupts enabled, so context
* switches may occur before critical_enter() is executed. If
@@ -636,49 +683,38 @@ fpudna(void)
KASSERT((curpcb->pcb_flags & PCB_FPUNOSAVE) == 0,
("fpudna while in fpu_kern_enter(FPU_KERN_NOCTX)"));
- if (PCPU_GET(fpcurthread) == curthread) {
- printf("fpudna: fpcurthread == curthread\n");
+ if (__predict_false(PCPU_GET(fpcurthread) == td)) {
+ /*
+ * Some virtual machines seems to set %cr0.TS at
+ * arbitrary moments. Silently clear the TS bit
+ * regardless of the eager/lazy FPU context switch
+ * mode.
+ */
stop_emulating();
- critical_exit();
- return;
+ } else {
+ if (__predict_false(PCPU_GET(fpcurthread) != NULL)) {
+ panic(
+ "fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n",
+ PCPU_GET(fpcurthread),
+ PCPU_GET(fpcurthread)->td_tid, td, td->td_tid);
+ }
+ restore_fpu_curthread(td);
}
- if (PCPU_GET(fpcurthread) != NULL) {
- panic("fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n",
- PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_tid,
- curthread, curthread->td_tid);
- }
- stop_emulating();
- /*
- * Record new context early in case frstor causes a trap.
- */
- PCPU_SET(fpcurthread, curthread);
+ critical_exit();
+}
- fpu_clean_state();
+void fpu_activate_sw(struct thread *td); /* Called from the context switch */
+void
+fpu_activate_sw(struct thread *td)
+{
- if ((curpcb->pcb_flags & PCB_FPUINITDONE) == 0) {
- /*
- * This is the first time this thread has used the FPU or
- * the PCB doesn't contain a clean FPU state. Explicitly
- * load an initial state.
- *
- * We prefer to restore the state from the actual save
- * area in PCB instead of directly loading from
- * fpu_initialstate, to ignite the XSAVEOPT
- * tracking engine.
- */
- bcopy(fpu_initialstate, curpcb->pcb_save,
- cpu_max_ext_state_size);
- fpurestore(curpcb->pcb_save);
- if (curpcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
- fldcw(curpcb->pcb_initial_fpucw);
- if (PCB_USER_FPU(curpcb))
- set_pcb_flags(curpcb,
- PCB_FPUINITDONE | PCB_USERFPUINITDONE);
- else
- set_pcb_flags(curpcb, PCB_FPUINITDONE);
- } else
- fpurestore(curpcb->pcb_save);
- critical_exit();
+ if (lazy_fpu_switch || (td->td_pflags & TDP_KTHREAD) != 0 ||
+ !PCB_USER_FPU(td->td_pcb)) {
+ PCPU_SET(fpcurthread, NULL);
+ start_emulating();
+ } else if (PCPU_GET(fpcurthread) != td) {
+ restore_fpu_curthread(td);
+ }
}
void
Modified: releng/11.2/sys/i386/i386/swtch.s
==============================================================================
--- releng/11.2/sys/i386/i386/swtch.s Fri Jun 15 13:14:45 2018 (r335195)
+++ releng/11.2/sys/i386/i386/swtch.s Fri Jun 15 13:21:37 2018 (r335196)
@@ -296,6 +296,12 @@ sw1:
cpu_switch_load_gs:
mov PCB_GS(%edx),%gs
+ pushl %edx
+ pushl PCPU(CURTHREAD)
+ call npxswitch
+ popl %edx
+ popl %edx
+
/* Test if debug registers should be restored. */
testl $PCB_DBREGS,PCB_FLAGS(%edx)
jz 1f
Modified: releng/11.2/sys/i386/isa/npx.c
==============================================================================
--- releng/11.2/sys/i386/isa/npx.c Fri Jun 15 13:14:45 2018 (r335195)
+++ releng/11.2/sys/i386/isa/npx.c Fri Jun 15 13:21:37 2018 (r335196)
@@ -191,6 +191,11 @@ int hw_float;
SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
&hw_float, 0, "Floating point instructions executed in hardware");
+int lazy_fpu_switch = 0;
+SYSCTL_INT(_hw, OID_AUTO, lazy_fpu_switch, CTLFLAG_RWTUN | CTLFLAG_NOFETCH,
+ &lazy_fpu_switch, 0,
+ "Lazily load FPU context after context switch");
+
int use_xsave;
uint64_t xsave_mask;
static uma_zone_t fpu_save_area_zone;
@@ -327,6 +332,7 @@ npxinit_bsp1(void)
u_int cp[4];
uint64_t xsave_mask_user;
+ TUNABLE_INT_FETCH("hw.lazy_fpu_switch", &lazy_fpu_switch);
if (cpu_fxsr && (cpu_feature2 & CPUID2_XSAVE) != 0) {
use_xsave = 1;
TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave);
@@ -785,47 +791,20 @@ npxtrap_sse(void)
return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
}
-/*
- * Implement device not available (DNA) exception
- *
- * It would be better to switch FP context here (if curthread != fpcurthread)
- * and not necessarily for every context switch, but it is too hard to
- * access foreign pcb's.
- */
-
-static int err_count = 0;
-
-int
-npxdna(void)
+static void
+restore_npx_curthread(struct thread *td, struct pcb *pcb)
{
- if (!hw_float)
- return (0);
- critical_enter();
- if (PCPU_GET(fpcurthread) == curthread) {
- printf("npxdna: fpcurthread == curthread %d times\n",
- ++err_count);
- stop_emulating();
- critical_exit();
- return (1);
- }
- if (PCPU_GET(fpcurthread) != NULL) {
- printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n",
- PCPU_GET(fpcurthread),
- PCPU_GET(fpcurthread)->td_proc->p_pid,
- curthread, curthread->td_proc->p_pid);
- panic("npxdna");
- }
- stop_emulating();
/*
* Record new context early in case frstor causes a trap.
*/
- PCPU_SET(fpcurthread, curthread);
+ PCPU_SET(fpcurthread, td);
+ stop_emulating();
if (cpu_fxsr)
fpu_clean_state();
- if ((curpcb->pcb_flags & PCB_NPXINITDONE) == 0) {
+ if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
/*
* This is the first time this thread has used the FPU or
* the PCB doesn't contain a clean FPU state. Explicitly
@@ -836,18 +815,54 @@ npxdna(void)
* npx_initialstate, to ignite the XSAVEOPT
* tracking engine.
*/
- bcopy(npx_initialstate, curpcb->pcb_save, cpu_max_ext_state_size);
- fpurstor(curpcb->pcb_save);
- if (curpcb->pcb_initial_npxcw != __INITIAL_NPXCW__)
- fldcw(curpcb->pcb_initial_npxcw);
- curpcb->pcb_flags |= PCB_NPXINITDONE;
- if (PCB_USER_FPU(curpcb))
- curpcb->pcb_flags |= PCB_NPXUSERINITDONE;
+ bcopy(npx_initialstate, pcb->pcb_save, cpu_max_ext_state_size);
+ fpurstor(pcb->pcb_save);
+ if (pcb->pcb_initial_npxcw != __INITIAL_NPXCW__)
+ fldcw(pcb->pcb_initial_npxcw);
+ pcb->pcb_flags |= PCB_NPXINITDONE;
+ if (PCB_USER_FPU(pcb))
+ pcb->pcb_flags |= PCB_NPXUSERINITDONE;
} else {
- fpurstor(curpcb->pcb_save);
+ fpurstor(pcb->pcb_save);
}
- critical_exit();
+}
+/*
+ * Implement device not available (DNA) exception
+ *
+ * It would be better to switch FP context here (if curthread != fpcurthread)
+ * and not necessarily for every context switch, but it is too hard to
+ * access foreign pcb's.
+ */
+int
+npxdna(void)
+{
+ struct thread *td;
+
+ if (!hw_float)
+ return (0);
+ td = curthread;
+ critical_enter();
+ if (__predict_false(PCPU_GET(fpcurthread) == td)) {
+ /*
+ * Some virtual machines seems to set %cr0.TS at
+ * arbitrary moments. Silently clear the TS bit
+ * regardless of the eager/lazy FPU context switch
+ * mode.
+ */
+ stop_emulating();
+ } else {
+ if (__predict_false(PCPU_GET(fpcurthread) != NULL)) {
+ printf(
+ "npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n",
+ PCPU_GET(fpcurthread),
+ PCPU_GET(fpcurthread)->td_proc->p_pid,
+ td, td->td_proc->p_pid);
+ panic("npxdna");
+ }
+ restore_npx_curthread(td, td->td_pcb);
+ }
+ critical_exit();
return (1);
}
@@ -869,8 +884,20 @@ npxsave(addr)
xsaveopt((char *)addr, xsave_mask);
else
fpusave(addr);
- start_emulating();
- PCPU_SET(fpcurthread, NULL);
+}
+
+void npxswitch(struct thread *td, struct pcb *pcb);
+void
+npxswitch(struct thread *td, struct pcb *pcb)
+{
+
+ if (lazy_fpu_switch || (td->td_pflags & TDP_KTHREAD) != 0 ||
+ !PCB_USER_FPU(pcb)) {
+ start_emulating();
+ PCPU_SET(fpcurthread, NULL);
+ } else if (PCPU_GET(fpcurthread) != td) {
+ restore_npx_curthread(td, pcb);
+ }
}
/*
More information about the svn-src-releng
mailing list