svn commit: r329462 - in stable/11/sys: amd64/amd64 amd64/ia32 amd64/include amd64/vmm amd64/vmm/intel dev/cpuctl dev/hyperv/vmbus dev/hyperv/vmbus/amd64 dev/hyperv/vmbus/i386 i386/i386 x86/include...
Konstantin Belousov
kib at FreeBSD.org
Sat Feb 17 18:00:03 UTC 2018
Author: kib
Date: Sat Feb 17 18:00:01 2018
New Revision: 329462
URL: https://svnweb.freebsd.org/changeset/base/329462
Log:
MFC r328083,328096,328116,328119,328120,328128,328135,328153,328157,
328166,328177,328199,328202,328205,328468,328470,328624,328625,328627,
328628,329214,329297,329365:
Meltdown mitigation by PTI, PCID optimization of PTI, and kernel use of IBRS
for some mitigations of Spectre.
Tested by: emaste, Arshan Khanifar <arshankhanifar at gmail.com>
Discussed with: jkim
Sponsored by: The FreeBSD Foundation
Modified:
stable/11/sys/amd64/amd64/apic_vector.S
stable/11/sys/amd64/amd64/atpic_vector.S
stable/11/sys/amd64/amd64/cpu_switch.S
stable/11/sys/amd64/amd64/db_trace.c
stable/11/sys/amd64/amd64/exception.S
stable/11/sys/amd64/amd64/genassym.c
stable/11/sys/amd64/amd64/initcpu.c
stable/11/sys/amd64/amd64/machdep.c
stable/11/sys/amd64/amd64/mp_machdep.c
stable/11/sys/amd64/amd64/pmap.c
stable/11/sys/amd64/amd64/support.S
stable/11/sys/amd64/amd64/sys_machdep.c
stable/11/sys/amd64/amd64/trap.c
stable/11/sys/amd64/amd64/vm_machdep.c
stable/11/sys/amd64/ia32/ia32_exception.S
stable/11/sys/amd64/ia32/ia32_syscall.c
stable/11/sys/amd64/include/asmacros.h
stable/11/sys/amd64/include/frame.h
stable/11/sys/amd64/include/intr_machdep.h
stable/11/sys/amd64/include/md_var.h
stable/11/sys/amd64/include/pcb.h
stable/11/sys/amd64/include/pcpu.h
stable/11/sys/amd64/include/pmap.h
stable/11/sys/amd64/include/smp.h
stable/11/sys/amd64/vmm/intel/vmx.c
stable/11/sys/amd64/vmm/vmm.c
stable/11/sys/dev/cpuctl/cpuctl.c
stable/11/sys/dev/hyperv/vmbus/amd64/vmbus_vector.S
stable/11/sys/dev/hyperv/vmbus/i386/vmbus_vector.S
stable/11/sys/dev/hyperv/vmbus/vmbus.c
stable/11/sys/i386/i386/apic_vector.s
stable/11/sys/i386/i386/atpic_vector.s
stable/11/sys/i386/i386/exception.s
stable/11/sys/i386/i386/machdep.c
stable/11/sys/i386/i386/pmap.c
stable/11/sys/i386/i386/support.s
stable/11/sys/i386/i386/vm_machdep.c
stable/11/sys/x86/include/apicvar.h
stable/11/sys/x86/include/specialreg.h
stable/11/sys/x86/include/x86_smp.h
stable/11/sys/x86/include/x86_var.h
stable/11/sys/x86/isa/atpic.c
stable/11/sys/x86/x86/cpu_machdep.c
stable/11/sys/x86/x86/identcpu.c
stable/11/sys/x86/x86/local_apic.c
stable/11/sys/x86/x86/mp_x86.c
stable/11/sys/x86/xen/pv.c
Directory Properties:
stable/11/ (props changed)
Modified: stable/11/sys/amd64/amd64/apic_vector.S
==============================================================================
--- stable/11/sys/amd64/amd64/apic_vector.S Sat Feb 17 17:23:43 2018 (r329461)
+++ stable/11/sys/amd64/amd64/apic_vector.S Sat Feb 17 18:00:01 2018 (r329462)
@@ -2,7 +2,13 @@
* Copyright (c) 1989, 1990 William F. Jolitz.
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
+ * Copyright (c) 2014-2018 The FreeBSD Foundation
+ * All rights reserved.
*
+ * Portions of this software were developed by
+ * Konstantin Belousov <kib at FreeBSD.org> under sponsorship from
+ * the FreeBSD Foundation.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -38,12 +44,12 @@
#include "opt_smp.h"
+#include "assym.s"
+
#include <machine/asmacros.h>
#include <machine/specialreg.h>
#include <x86/apicreg.h>
-#include "assym.s"
-
#ifdef SMP
#define LK lock ;
#else
@@ -73,30 +79,28 @@ as_lapic_eoi:
* translates that into a vector, and passes the vector to the
* lapic_handle_intr() function.
*/
-#define ISR_VEC(index, vec_name) \
- .text ; \
- SUPERALIGN_TEXT ; \
-IDTVEC(vec_name) ; \
- PUSH_FRAME ; \
- FAKE_MCOUNT(TF_RIP(%rsp)) ; \
- cmpl $0,x2apic_mode ; \
- je 1f ; \
- movl $(MSR_APIC_ISR0 + index),%ecx ; \
- rdmsr ; \
- jmp 2f ; \
-1: ; \
- movq lapic_map, %rdx ; /* pointer to local APIC */ \
- movl LA_ISR + 16 * (index)(%rdx), %eax ; /* load ISR */ \
-2: ; \
- bsrl %eax, %eax ; /* index of highest set bit in ISR */ \
- jz 3f ; \
- addl $(32 * index),%eax ; \
- movq %rsp, %rsi ; \
- movl %eax, %edi ; /* pass the IRQ */ \
- call lapic_handle_intr ; \
-3: ; \
- MEXITCOUNT ; \
+ .macro ISR_VEC index, vec_name
+ INTR_HANDLER \vec_name
+ FAKE_MCOUNT(TF_RIP(%rsp))
+ cmpl $0,x2apic_mode
+ je 1f
+ movl $(MSR_APIC_ISR0 + \index),%ecx
+ rdmsr
+ jmp 2f
+1:
+ movq lapic_map, %rdx /* pointer to local APIC */
+ movl LA_ISR + 16 * (\index)(%rdx), %eax /* load ISR */
+2:
+ bsrl %eax, %eax /* index of highest set bit in ISR */
+ jz 3f
+ addl $(32 * \index),%eax
+ movq %rsp, %rsi
+ movl %eax, %edi /* pass the IRQ */
+ call lapic_handle_intr
+3:
+ MEXITCOUNT
jmp doreti
+ .endm
/*
* Handle "spurious INTerrupts".
@@ -108,26 +112,21 @@ IDTVEC(vec_name) ; \
.text
SUPERALIGN_TEXT
IDTVEC(spuriousint)
-
/* No EOI cycle used here */
-
jmp doreti_iret
- ISR_VEC(1, apic_isr1)
- ISR_VEC(2, apic_isr2)
- ISR_VEC(3, apic_isr3)
- ISR_VEC(4, apic_isr4)
- ISR_VEC(5, apic_isr5)
- ISR_VEC(6, apic_isr6)
- ISR_VEC(7, apic_isr7)
+ ISR_VEC 1, apic_isr1
+ ISR_VEC 2, apic_isr2
+ ISR_VEC 3, apic_isr3
+ ISR_VEC 4, apic_isr4
+ ISR_VEC 5, apic_isr5
+ ISR_VEC 6, apic_isr6
+ ISR_VEC 7, apic_isr7
/*
* Local APIC periodic timer handler.
*/
- .text
- SUPERALIGN_TEXT
-IDTVEC(timerint)
- PUSH_FRAME
+ INTR_HANDLER timerint
FAKE_MCOUNT(TF_RIP(%rsp))
movq %rsp, %rdi
call lapic_handle_timer
@@ -137,10 +136,7 @@ IDTVEC(timerint)
/*
* Local APIC CMCI handler.
*/
- .text
- SUPERALIGN_TEXT
-IDTVEC(cmcint)
- PUSH_FRAME
+ INTR_HANDLER cmcint
FAKE_MCOUNT(TF_RIP(%rsp))
call lapic_handle_cmc
MEXITCOUNT
@@ -149,10 +145,7 @@ IDTVEC(cmcint)
/*
* Local APIC error interrupt handler.
*/
- .text
- SUPERALIGN_TEXT
-IDTVEC(errorint)
- PUSH_FRAME
+ INTR_HANDLER errorint
FAKE_MCOUNT(TF_RIP(%rsp))
call lapic_handle_error
MEXITCOUNT
@@ -163,10 +156,7 @@ IDTVEC(errorint)
* Xen event channel upcall interrupt handler.
* Only used when the hypervisor supports direct vector callbacks.
*/
- .text
- SUPERALIGN_TEXT
-IDTVEC(xen_intr_upcall)
- PUSH_FRAME
+ INTR_HANDLER xen_intr_upcall
FAKE_MCOUNT(TF_RIP(%rsp))
movq %rsp, %rdi
call xen_intr_handle_upcall
@@ -183,74 +173,68 @@ IDTVEC(xen_intr_upcall)
SUPERALIGN_TEXT
invltlb_ret:
call as_lapic_eoi
- POP_FRAME
- jmp doreti_iret
+ jmp ld_regs
SUPERALIGN_TEXT
-IDTVEC(invltlb)
- PUSH_FRAME
-
+ INTR_HANDLER invltlb
call invltlb_handler
jmp invltlb_ret
-IDTVEC(invltlb_pcid)
- PUSH_FRAME
-
+ INTR_HANDLER invltlb_pcid
call invltlb_pcid_handler
jmp invltlb_ret
-IDTVEC(invltlb_invpcid)
- PUSH_FRAME
-
+ INTR_HANDLER invltlb_invpcid_nopti
call invltlb_invpcid_handler
jmp invltlb_ret
+ INTR_HANDLER invltlb_invpcid_pti
+ call invltlb_invpcid_pti_handler
+ jmp invltlb_ret
+
/*
* Single page TLB shootdown
*/
- .text
+ INTR_HANDLER invlpg
+ call invlpg_handler
+ jmp invltlb_ret
- SUPERALIGN_TEXT
-IDTVEC(invlpg)
- PUSH_FRAME
+ INTR_HANDLER invlpg_invpcid
+ call invlpg_invpcid_handler
+ jmp invltlb_ret
- call invlpg_handler
+ INTR_HANDLER invlpg_pcid
+ call invlpg_pcid_handler
jmp invltlb_ret
/*
* Page range TLB shootdown.
*/
- .text
- SUPERALIGN_TEXT
-IDTVEC(invlrng)
- PUSH_FRAME
-
+ INTR_HANDLER invlrng
call invlrng_handler
jmp invltlb_ret
+ INTR_HANDLER invlrng_invpcid
+ call invlrng_invpcid_handler
+ jmp invltlb_ret
+
+ INTR_HANDLER invlrng_pcid
+ call invlrng_pcid_handler
+ jmp invltlb_ret
+
/*
* Invalidate cache.
*/
- .text
- SUPERALIGN_TEXT
-IDTVEC(invlcache)
- PUSH_FRAME
-
+ INTR_HANDLER invlcache
call invlcache_handler
jmp invltlb_ret
/*
* Handler for IPIs sent via the per-cpu IPI bitmap.
*/
- .text
- SUPERALIGN_TEXT
-IDTVEC(ipi_intr_bitmap_handler)
- PUSH_FRAME
-
+ INTR_HANDLER ipi_intr_bitmap_handler
call as_lapic_eoi
-
FAKE_MCOUNT(TF_RIP(%rsp))
-
call ipi_bitmap_handler
MEXITCOUNT
jmp doreti
@@ -258,24 +242,15 @@ IDTVEC(ipi_intr_bitmap_handler)
/*
* Executed by a CPU when it receives an IPI_STOP from another CPU.
*/
- .text
- SUPERALIGN_TEXT
-IDTVEC(cpustop)
- PUSH_FRAME
-
+ INTR_HANDLER cpustop
call as_lapic_eoi
-
call cpustop_handler
jmp doreti
/*
* Executed by a CPU when it receives an IPI_SUSPEND from another CPU.
*/
- .text
- SUPERALIGN_TEXT
-IDTVEC(cpususpend)
- PUSH_FRAME
-
+ INTR_HANDLER cpususpend
call cpususpend_handler
call as_lapic_eoi
jmp doreti
@@ -285,10 +260,7 @@ IDTVEC(cpususpend)
*
* - Calls the generic rendezvous action function.
*/
- .text
- SUPERALIGN_TEXT
-IDTVEC(rendezvous)
- PUSH_FRAME
+ INTR_HANDLER rendezvous
#ifdef COUNT_IPIS
movl PCPU(CPUID), %eax
movq ipi_rendezvous_counts(,%rax,8), %rax
@@ -327,5 +299,9 @@ IDTVEC(justreturn)
popq %rcx
popq %rax
jmp doreti_iret
+
+ INTR_HANDLER justreturn1
+ call as_lapic_eoi
+ jmp doreti
#endif /* SMP */
Modified: stable/11/sys/amd64/amd64/atpic_vector.S
==============================================================================
--- stable/11/sys/amd64/amd64/atpic_vector.S Sat Feb 17 17:23:43 2018 (r329461)
+++ stable/11/sys/amd64/amd64/atpic_vector.S Sat Feb 17 18:00:01 2018 (r329462)
@@ -36,38 +36,35 @@
* master and slave interrupt controllers.
*/
+#include "assym.s"
#include <machine/asmacros.h>
-#include "assym.s"
-
/*
* Macros for interrupt entry, call to handler, and exit.
*/
-#define INTR(irq_num, vec_name) \
- .text ; \
- SUPERALIGN_TEXT ; \
-IDTVEC(vec_name) ; \
- PUSH_FRAME ; \
- FAKE_MCOUNT(TF_RIP(%rsp)) ; \
- movq %rsp, %rsi ; \
- movl $irq_num, %edi; /* pass the IRQ */ \
- call atpic_handle_intr ; \
- MEXITCOUNT ; \
+ .macro INTR irq_num, vec_name
+ INTR_HANDLER \vec_name
+ FAKE_MCOUNT(TF_RIP(%rsp))
+ movq %rsp, %rsi
+ movl $\irq_num, %edi /* pass the IRQ */
+ call atpic_handle_intr
+ MEXITCOUNT
jmp doreti
+ .endm
- INTR(0, atpic_intr0)
- INTR(1, atpic_intr1)
- INTR(2, atpic_intr2)
- INTR(3, atpic_intr3)
- INTR(4, atpic_intr4)
- INTR(5, atpic_intr5)
- INTR(6, atpic_intr6)
- INTR(7, atpic_intr7)
- INTR(8, atpic_intr8)
- INTR(9, atpic_intr9)
- INTR(10, atpic_intr10)
- INTR(11, atpic_intr11)
- INTR(12, atpic_intr12)
- INTR(13, atpic_intr13)
- INTR(14, atpic_intr14)
- INTR(15, atpic_intr15)
+ INTR 0, atpic_intr0
+ INTR 1, atpic_intr1
+ INTR 2, atpic_intr2
+ INTR 3, atpic_intr3
+ INTR 4, atpic_intr4
+ INTR 5, atpic_intr5
+ INTR 6, atpic_intr6
+ INTR 7, atpic_intr7
+ INTR 8, atpic_intr8
+ INTR 9, atpic_intr9
+ INTR 10, atpic_intr10
+ INTR 11, atpic_intr11
+ INTR 12, atpic_intr12
+ INTR 13, atpic_intr13
+ INTR 14, atpic_intr14
+ INTR 15, atpic_intr15
Modified: stable/11/sys/amd64/amd64/cpu_switch.S
==============================================================================
--- stable/11/sys/amd64/amd64/cpu_switch.S Sat Feb 17 17:23:43 2018 (r329461)
+++ stable/11/sys/amd64/amd64/cpu_switch.S Sat Feb 17 18:00:01 2018 (r329462)
@@ -215,8 +215,10 @@ done_tss:
movq %r8,PCPU(RSP0)
movq %r8,PCPU(CURPCB)
/* Update the TSS_RSP0 pointer for the next interrupt */
+ cmpb $0,pti(%rip)
+ jne 1f
movq %r8,TSS_RSP0(%rdx)
- movq %r12,PCPU(CURTHREAD) /* into next thread */
+1: movq %r12,PCPU(CURTHREAD) /* into next thread */
/* Test if debug registers should be restored. */
testl $PCB_DBREGS,PCB_FLAGS(%r8)
@@ -293,7 +295,12 @@ do_tss: movq %rdx,PCPU(TSSP)
shrq $8,%rcx
movl %ecx,8(%rax)
movb $0x89,5(%rax) /* unset busy */
- movl $TSSSEL,%eax
+ cmpb $0,pti(%rip)
+ je 1f
+ movq PCPU(PRVSPACE),%rax
+ addq $PC_PTI_STACK+PC_PTI_STACK_SZ*8,%rax
+ movq %rax,TSS_RSP0(%rdx)
+1: movl $TSSSEL,%eax
ltr %ax
jmp done_tss
Modified: stable/11/sys/amd64/amd64/db_trace.c
==============================================================================
--- stable/11/sys/amd64/amd64/db_trace.c Sat Feb 17 17:23:43 2018 (r329461)
+++ stable/11/sys/amd64/amd64/db_trace.c Sat Feb 17 18:00:01 2018 (r329462)
@@ -200,6 +200,7 @@ db_nextframe(struct amd64_frame **fp, db_addr_t *ip, s
if (name != NULL) {
if (strcmp(name, "calltrap") == 0 ||
strcmp(name, "fork_trampoline") == 0 ||
+ strcmp(name, "mchk_calltrap") == 0 ||
strcmp(name, "nmi_calltrap") == 0 ||
strcmp(name, "Xdblfault") == 0)
frame_type = TRAP;
Modified: stable/11/sys/amd64/amd64/exception.S
==============================================================================
--- stable/11/sys/amd64/amd64/exception.S Sat Feb 17 17:23:43 2018 (r329461)
+++ stable/11/sys/amd64/amd64/exception.S Sat Feb 17 18:00:01 2018 (r329462)
@@ -1,12 +1,16 @@
/*-
* Copyright (c) 1989, 1990 William F. Jolitz.
* Copyright (c) 1990 The Regents of the University of California.
- * Copyright (c) 2007 The FreeBSD Foundation
+ * Copyright (c) 2007-2018 The FreeBSD Foundation
* All rights reserved.
*
* Portions of this software were developed by A. Joseph Koshy under
* sponsorship from the FreeBSD Foundation and Google, Inc.
*
+ * Portions of this software were developed by
+ * Konstantin Belousov <kib at FreeBSD.org> under sponsorship from
+ * the FreeBSD Foundation.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -38,13 +42,13 @@
#include "opt_compat.h"
#include "opt_hwpmc_hooks.h"
+#include "assym.s"
+
#include <machine/asmacros.h>
#include <machine/psl.h>
#include <machine/trap.h>
#include <machine/specialreg.h>
-#include "assym.s"
-
#ifdef KDTRACE_HOOKS
.bss
.globl dtrace_invop_jump_addr
@@ -100,69 +104,62 @@ dtrace_invop_calltrap_addr:
MCOUNT_LABEL(user)
MCOUNT_LABEL(btrap)
-/* Traps that we leave interrupts disabled for.. */
-#define TRAP_NOEN(a) \
- subq $TF_RIP,%rsp; \
- movl $(a),TF_TRAPNO(%rsp) ; \
- movq $0,TF_ADDR(%rsp) ; \
- movq $0,TF_ERR(%rsp) ; \
+/* Traps that we leave interrupts disabled for. */
+ .macro TRAP_NOEN l, trapno
+ PTI_ENTRY \l,X\l
+ .globl X\l
+ .type X\l, at function
+X\l: subq $TF_RIP,%rsp
+ movl $\trapno,TF_TRAPNO(%rsp)
+ movq $0,TF_ADDR(%rsp)
+ movq $0,TF_ERR(%rsp)
jmp alltraps_noen
-IDTVEC(dbg)
- TRAP_NOEN(T_TRCTRAP)
-IDTVEC(bpt)
- TRAP_NOEN(T_BPTFLT)
+ .endm
+
+ TRAP_NOEN dbg, T_TRCTRAP
+ TRAP_NOEN bpt, T_BPTFLT
#ifdef KDTRACE_HOOKS
-IDTVEC(dtrace_ret)
- TRAP_NOEN(T_DTRACE_RET)
+ TRAP_NOEN dtrace_ret, T_DTRACE_RET
#endif
/* Regular traps; The cpu does not supply tf_err for these. */
-#define TRAP(a) \
- subq $TF_RIP,%rsp; \
- movl $(a),TF_TRAPNO(%rsp) ; \
- movq $0,TF_ADDR(%rsp) ; \
- movq $0,TF_ERR(%rsp) ; \
+ .macro TRAP l, trapno
+ PTI_ENTRY \l,X\l
+ .globl X\l
+ .type X\l, at function
+X\l:
+ subq $TF_RIP,%rsp
+ movl $\trapno,TF_TRAPNO(%rsp)
+ movq $0,TF_ADDR(%rsp)
+ movq $0,TF_ERR(%rsp)
jmp alltraps
-IDTVEC(div)
- TRAP(T_DIVIDE)
-IDTVEC(ofl)
- TRAP(T_OFLOW)
-IDTVEC(bnd)
- TRAP(T_BOUND)
-IDTVEC(ill)
- TRAP(T_PRIVINFLT)
-IDTVEC(dna)
- TRAP(T_DNA)
-IDTVEC(fpusegm)
- TRAP(T_FPOPFLT)
-IDTVEC(mchk)
- TRAP(T_MCHK)
-IDTVEC(rsvd)
- TRAP(T_RESERVED)
-IDTVEC(fpu)
- TRAP(T_ARITHTRAP)
-IDTVEC(xmm)
- TRAP(T_XMMFLT)
+ .endm
-/* This group of traps have tf_err already pushed by the cpu */
-#define TRAP_ERR(a) \
- subq $TF_ERR,%rsp; \
- movl $(a),TF_TRAPNO(%rsp) ; \
- movq $0,TF_ADDR(%rsp) ; \
+ TRAP div, T_DIVIDE
+ TRAP ofl, T_OFLOW
+ TRAP bnd, T_BOUND
+ TRAP ill, T_PRIVINFLT
+ TRAP dna, T_DNA
+ TRAP fpusegm, T_FPOPFLT
+ TRAP rsvd, T_RESERVED
+ TRAP fpu, T_ARITHTRAP
+ TRAP xmm, T_XMMFLT
+
+/* This group of traps have tf_err already pushed by the cpu. */
+ .macro TRAP_ERR l, trapno
+ PTI_ENTRY \l,X\l,has_err=1
+ .globl X\l
+ .type X\l, at function
+X\l:
+ subq $TF_ERR,%rsp
+ movl $\trapno,TF_TRAPNO(%rsp)
+ movq $0,TF_ADDR(%rsp)
jmp alltraps
-IDTVEC(tss)
- TRAP_ERR(T_TSSFLT)
-IDTVEC(missing)
- subq $TF_ERR,%rsp
- movl $T_SEGNPFLT,TF_TRAPNO(%rsp)
- jmp prot_addrf
-IDTVEC(stk)
- subq $TF_ERR,%rsp
- movl $T_STKFLT,TF_TRAPNO(%rsp)
- jmp prot_addrf
-IDTVEC(align)
- TRAP_ERR(T_ALIGNFLT)
+ .endm
+ TRAP_ERR tss, T_TSSFLT
+ TRAP_ERR align, T_ALIGNFLT
+
/*
* alltraps entry point. Use swapgs if this is the first time in the
* kernel from userland. Reenable interrupts if they were enabled
@@ -174,24 +171,22 @@ IDTVEC(align)
alltraps:
movq %rdi,TF_RDI(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
- jz alltraps_testi /* already running with kernel GS.base */
+ jz 1f /* already running with kernel GS.base */
swapgs
movq PCPU(CURPCB),%rdi
andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
- movw %fs,TF_FS(%rsp)
- movw %gs,TF_GS(%rsp)
- movw %es,TF_ES(%rsp)
- movw %ds,TF_DS(%rsp)
-alltraps_testi:
- testl $PSL_I,TF_RFLAGS(%rsp)
- jz alltraps_pushregs_no_rdi
- sti
-alltraps_pushregs_no_rdi:
+1: SAVE_SEGS
movq %rdx,TF_RDX(%rsp)
movq %rax,TF_RAX(%rsp)
+ movq %rcx,TF_RCX(%rsp)
+ testb $SEL_RPL_MASK,TF_CS(%rsp)
+ jz 2f
+ call handle_ibrs_entry
+2: testl $PSL_I,TF_RFLAGS(%rsp)
+ jz alltraps_pushregs_no_rax
+ sti
alltraps_pushregs_no_rax:
movq %rsi,TF_RSI(%rsp)
- movq %rcx,TF_RCX(%rsp)
movq %r8,TF_R8(%rsp)
movq %r9,TF_R9(%rsp)
movq %rbx,TF_RBX(%rsp)
@@ -249,15 +244,18 @@ calltrap:
alltraps_noen:
movq %rdi,TF_RDI(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
- jz 1f /* already running with kernel GS.base */
+ jz 1f /* already running with kernel GS.base */
swapgs
movq PCPU(CURPCB),%rdi
andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
-1: movw %fs,TF_FS(%rsp)
- movw %gs,TF_GS(%rsp)
- movw %es,TF_ES(%rsp)
- movw %ds,TF_DS(%rsp)
- jmp alltraps_pushregs_no_rdi
+1: SAVE_SEGS
+ movq %rdx,TF_RDX(%rsp)
+ movq %rax,TF_RAX(%rsp)
+ movq %rcx,TF_RCX(%rsp)
+ testb $SEL_RPL_MASK,TF_CS(%rsp)
+ jz alltraps_pushregs_no_rax
+ call handle_ibrs_entry
+ jmp alltraps_pushregs_no_rax
IDTVEC(dblfault)
subq $TF_ERR,%rsp
@@ -279,56 +277,110 @@ IDTVEC(dblfault)
movq %r13,TF_R13(%rsp)
movq %r14,TF_R14(%rsp)
movq %r15,TF_R15(%rsp)
- movw %fs,TF_FS(%rsp)
- movw %gs,TF_GS(%rsp)
- movw %es,TF_ES(%rsp)
- movw %ds,TF_DS(%rsp)
+ SAVE_SEGS
movl $TF_HASSEGS,TF_FLAGS(%rsp)
cld
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz 1f /* already running with kernel GS.base */
swapgs
1:
- movq %rsp,%rdi
+ movq PCPU(KCR3),%rax
+ cmpq $~0,%rax
+ je 2f
+ movq %rax,%cr3
+2: movq %rsp,%rdi
call dblfault_handler
-2:
- hlt
- jmp 2b
+3: hlt
+ jmp 3b
+ ALIGN_TEXT
+IDTVEC(page_pti)
+ testb $SEL_RPL_MASK,PTI_CS-2*8(%rsp)
+ jz Xpage
+ swapgs
+ pushq %rax
+ pushq %rdx
+ movq %cr3,%rax
+ movq %rax,PCPU(SAVED_UCR3)
+ PTI_UUENTRY has_err=1
+ subq $TF_ERR,%rsp
+ movq %rdi,TF_RDI(%rsp)
+ movq %rax,TF_RAX(%rsp)
+ movq %rdx,TF_RDX(%rsp)
+ movq %rcx,TF_RCX(%rsp)
+ jmp page_u
IDTVEC(page)
subq $TF_ERR,%rsp
- movl $T_PAGEFLT,TF_TRAPNO(%rsp)
- movq %rdi,TF_RDI(%rsp) /* free up a GP register */
+ movq %rdi,TF_RDI(%rsp) /* free up GP registers */
+ movq %rax,TF_RAX(%rsp)
+ movq %rdx,TF_RDX(%rsp)
+ movq %rcx,TF_RCX(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
- jz 1f /* already running with kernel GS.base */
+ jz page_cr2 /* already running with kernel GS.base */
swapgs
- movq PCPU(CURPCB),%rdi
+page_u: movq PCPU(CURPCB),%rdi
andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
-1: movq %cr2,%rdi /* preserve %cr2 before .. */
+ movq PCPU(SAVED_UCR3),%rax
+ movq %rax,PCB_SAVED_UCR3(%rdi)
+ call handle_ibrs_entry
+page_cr2:
+ movq %cr2,%rdi /* preserve %cr2 before .. */
movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */
- movw %fs,TF_FS(%rsp)
- movw %gs,TF_GS(%rsp)
- movw %es,TF_ES(%rsp)
- movw %ds,TF_DS(%rsp)
+ SAVE_SEGS
+ movl $T_PAGEFLT,TF_TRAPNO(%rsp)
testl $PSL_I,TF_RFLAGS(%rsp)
- jz alltraps_pushregs_no_rdi
+ jz alltraps_pushregs_no_rax
sti
- jmp alltraps_pushregs_no_rdi
+ jmp alltraps_pushregs_no_rax
/*
* We have to special-case this one. If we get a trap in doreti() at
* the iretq stage, we'll reenter with the wrong gs state. We'll have
* to do a special the swapgs in this case even coming from the kernel.
* XXX linux has a trap handler for their equivalent of load_gs().
+ *
+ * On the stack, we have the hardware interrupt frame to return
+ * to usermode (faulted) and another frame with error code, for
+ * fault. For PTI, copy both frames to the main thread stack.
*/
-IDTVEC(prot)
+ .macro PROTF_ENTRY name,trapno
+\name\()_pti_doreti:
+ pushq %rax
+ pushq %rdx
+ swapgs
+ movq PCPU(KCR3),%rax
+ movq %rax,%cr3
+ movq PCPU(RSP0),%rax
+ subq $2*PTI_SIZE-3*8,%rax /* no err, %rax, %rdx in faulted frame */
+ MOVE_STACKS (PTI_SIZE / 4 - 3)
+ movq %rax,%rsp
+ popq %rdx
+ popq %rax
+ swapgs
+ jmp X\name
+IDTVEC(\name\()_pti)
+ cmpq $doreti_iret,PTI_RIP-2*8(%rsp)
+ je \name\()_pti_doreti
+ testb $SEL_RPL_MASK,PTI_CS-2*8(%rsp) /* %rax, %rdx not yet pushed */
+ jz X\name
+ PTI_UENTRY has_err=1
+ swapgs
+IDTVEC(\name)
subq $TF_ERR,%rsp
- movl $T_PROTFLT,TF_TRAPNO(%rsp)
+ movl $\trapno,TF_TRAPNO(%rsp)
+ jmp prot_addrf
+ .endm
+
+ PROTF_ENTRY missing, T_SEGNPFLT
+ PROTF_ENTRY stk, T_STKFLT
+ PROTF_ENTRY prot, T_PROTFLT
+
prot_addrf:
movq $0,TF_ADDR(%rsp)
movq %rdi,TF_RDI(%rsp) /* free up a GP register */
movq %rax,TF_RAX(%rsp)
movq %rdx,TF_RDX(%rsp)
+ movq %rcx,TF_RCX(%rsp)
movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
leaq doreti_iret(%rip),%rdi
@@ -354,7 +406,8 @@ prot_addrf:
3: cmpw $KUG32SEL,TF_GS(%rsp)
jne 4f
movq %rdx,PCB_GSBASE(%rdi)
-4: orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */
+4: call handle_ibrs_entry
+ orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
testl $PSL_I,TF_RFLAGS(%rsp)
@@ -375,8 +428,18 @@ prot_addrf:
* We do not support invoking this from a custom segment registers,
* esp. %cs, %ss, %fs, %gs, e.g. using entries from an LDT.
*/
+ SUPERALIGN_TEXT
+IDTVEC(fast_syscall_pti)
+ swapgs
+ movq %rax,PCPU(SCRATCH_RAX)
+ movq PCPU(KCR3),%rax
+ movq %rax,%cr3
+ jmp fast_syscall_common
+ SUPERALIGN_TEXT
IDTVEC(fast_syscall)
swapgs
+ movq %rax,PCPU(SCRATCH_RAX)
+fast_syscall_common:
movq %rsp,PCPU(SCRATCH_RSP)
movq PCPU(RSP0),%rsp
/* Now emulate a trapframe. Make the 8 byte alignment odd for call. */
@@ -386,10 +449,11 @@ IDTVEC(fast_syscall)
movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */
movq PCPU(SCRATCH_RSP),%r11 /* %r11 already saved */
movq %r11,TF_RSP(%rsp) /* user stack pointer */
- movw %fs,TF_FS(%rsp)
- movw %gs,TF_GS(%rsp)
- movw %es,TF_ES(%rsp)
- movw %ds,TF_DS(%rsp)
+ movq PCPU(SCRATCH_RAX),%rax
+ movq %rax,TF_RAX(%rsp) /* syscall number */
+ movq %rdx,TF_RDX(%rsp) /* arg 3 */
+ SAVE_SEGS
+ call handle_ibrs_entry
movq PCPU(CURPCB),%r11
andl $~PCB_FULL_IRET,PCB_FLAGS(%r11)
sti
@@ -398,11 +462,9 @@ IDTVEC(fast_syscall)
movq $2,TF_ERR(%rsp)
movq %rdi,TF_RDI(%rsp) /* arg 1 */
movq %rsi,TF_RSI(%rsp) /* arg 2 */
- movq %rdx,TF_RDX(%rsp) /* arg 3 */
movq %r10,TF_RCX(%rsp) /* arg 4 */
movq %r8,TF_R8(%rsp) /* arg 5 */
movq %r9,TF_R9(%rsp) /* arg 6 */
- movq %rax,TF_RAX(%rsp) /* syscall number */
movq %rbx,TF_RBX(%rsp) /* C preserved */
movq %rbp,TF_RBP(%rsp) /* C preserved */
movq %r12,TF_R12(%rsp) /* C preserved */
@@ -420,11 +482,12 @@ IDTVEC(fast_syscall)
/* Disable interrupts before testing PCB_FULL_IRET. */
cli
testl $PCB_FULL_IRET,PCB_FLAGS(%rax)
- jnz 3f
+ jnz 4f
/* Check for and handle AST's on return to userland. */
movq PCPU(CURTHREAD),%rax
testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
- jne 2f
+ jne 3f
+ call handle_ibrs_exit
/* Restore preserved registers. */
MEXITCOUNT
movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */
@@ -434,16 +497,21 @@ IDTVEC(fast_syscall)
movq TF_RFLAGS(%rsp),%r11 /* original %rflags */
movq TF_RIP(%rsp),%rcx /* original %rip */
movq TF_RSP(%rsp),%rsp /* user stack pointer */
- swapgs
+ cmpb $0,pti
+ je 2f
+ movq PCPU(UCR3),%r9
+ movq %r9,%cr3
+ xorl %r9d,%r9d
+2: swapgs
sysretq
-2: /* AST scheduled. */
+3: /* AST scheduled. */
sti
movq %rsp,%rdi
call ast
jmp 1b
-3: /* Requested full context restore, use doreti for that. */
+4: /* Requested full context restore, use doreti for that. */
MEXITCOUNT
jmp doreti
@@ -499,17 +567,15 @@ IDTVEC(nmi)
movq %r13,TF_R13(%rsp)
movq %r14,TF_R14(%rsp)
movq %r15,TF_R15(%rsp)
- movw %fs,TF_FS(%rsp)
- movw %gs,TF_GS(%rsp)
- movw %es,TF_ES(%rsp)
- movw %ds,TF_DS(%rsp)
+ SAVE_SEGS
movl $TF_HASSEGS,TF_FLAGS(%rsp)
cld
xorl %ebx,%ebx
testb $SEL_RPL_MASK,TF_CS(%rsp)
jnz nmi_fromuserspace
/*
- * We've interrupted the kernel. Preserve GS.base in %r12.
+ * We've interrupted the kernel. Preserve GS.base in %r12,
+ * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
*/
movl $MSR_GSBASE,%ecx
rdmsr
@@ -521,27 +587,45 @@ IDTVEC(nmi)
movl %edx,%eax
shrq $32,%rdx
wrmsr
+ movq %cr3,%r13
+ movq PCPU(KCR3),%rax
+ cmpq $~0,%rax
+ je 1f
+ movq %rax,%cr3
+1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
+ je nmi_calltrap
+ movl $MSR_IA32_SPEC_CTRL,%ecx
+ rdmsr
+ movl %eax,%r14d
+ call handle_ibrs_entry
jmp nmi_calltrap
nmi_fromuserspace:
incl %ebx
swapgs
- testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
- jz 2f
+ movq %cr3,%r13
+ movq PCPU(KCR3),%rax
+ cmpq $~0,%rax
+ je 1f
+ movq %rax,%cr3
+1: call handle_ibrs_entry
movq PCPU(CURPCB),%rdi
testq %rdi,%rdi
- jz 2f
+ jz 3f
+ orl $PCB_FULL_IRET,PCB_FLAGS(%rdi)
+ testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
+ jz 3f
cmpw $KUF32SEL,TF_FS(%rsp)
- jne 1f
+ jne 2f
rdfsbase %rax
movq %rax,PCB_FSBASE(%rdi)
-1: cmpw $KUG32SEL,TF_GS(%rsp)
- jne 2f
+2: cmpw $KUG32SEL,TF_GS(%rsp)
+ jne 3f
movl $MSR_KGSBASE,%ecx
rdmsr
shlq $32,%rdx
orq %rdx,%rax
movq %rax,PCB_GSBASE(%rdi)
-2:
+3:
/* Note: this label is also used by ddb and gdb: */
nmi_calltrap:
FAKE_MCOUNT(TF_RIP(%rsp))
@@ -564,26 +648,29 @@ nmi_calltrap:
movq PCPU(CURTHREAD),%rax
orq %rax,%rax /* curthread present? */
jz nocallchain
- testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */
- jz nocallchain
/*
- * A user callchain is to be captured, so:
- * - Move execution to the regular kernel stack, to allow for
- * nested NMI interrupts.
- * - Take the processor out of "NMI" mode by faking an "iret".
- * - Enable interrupts, so that copyin() can work.
+ * Move execution to the regular kernel stack, because we
+ * committed to return through doreti.
*/
movq %rsp,%rsi /* source stack pointer */
movq $TF_SIZE,%rcx
movq PCPU(RSP0),%rdx
subq %rcx,%rdx
movq %rdx,%rdi /* destination stack pointer */
-
shrq $3,%rcx /* trap frame size in long words */
cld
rep
movsq /* copy trapframe */
+ movq %rdx,%rsp /* we are on the regular kstack */
+ testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */
+ jz nocallchain
+ /*
+ * A user callchain is to be captured, so:
+ * - Take the processor out of "NMI" mode by faking an "iret",
+ * to allow for nested NMI interrupts.
+ * - Enable interrupts, so that copyin() can work.
+ */
movl %ss,%eax
pushq %rax /* tf_ss */
pushq %rdx /* tf_rsp (on kernel stack) */
@@ -613,33 +700,139 @@ outofnmi:
cli
nocallchain:
#endif
- testl %ebx,%ebx
+ testl %ebx,%ebx /* %ebx == 0 => return to userland */
jnz doreti_exit
-nmi_kernelexit:
/*
+ * Restore speculation control MSR, if preserved.
+ */
+ testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
+ je 1f
+ movl %r14d,%eax
+ xorl %edx,%edx
+ movl $MSR_IA32_SPEC_CTRL,%ecx
+ wrmsr
+ /*
* Put back the preserved MSR_GSBASE value.
*/
+1: movl $MSR_GSBASE,%ecx
+ movq %r12,%rdx
+ movl %edx,%eax
+ shrq $32,%rdx
+ wrmsr
+ movq %r13,%cr3
+ RESTORE_REGS
+ addq $TF_RIP,%rsp
+ jmp doreti_iret
+
+/*
+ * MC# handling is similar to NMI.
+ *
+ * As with NMIs, machine check exceptions do not respect RFLAGS.IF and
+ * can occur at any time with a GS.base value that does not correspond
+ * to the privilege level in CS.
+ *
+ * Machine checks are not unblocked by iretq, but it is best to run
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-stable
mailing list