svn commit: r364340 - in head/sys/amd64: include vmm vmm/intel
Peter Grehan
grehan at FreeBSD.org
Tue Aug 18 07:23:48 UTC 2020
Author: grehan
Date: Tue Aug 18 07:23:47 2020
New Revision: 364340
URL: https://svnweb.freebsd.org/changeset/base/364340
Log:
Support guest rdtscp and rdpid instructions on Intel VT-x
Enable any of rdtscp and/or rdpid for bhyve guests on Intel-based hosts
that support the "enable RDTSCP" VM-execution control.
Submitted by: adam_fenn.io
Reported by: chuck
Reviewed by: chuck, grehan, jhb
Approved by: jhb (bhyve), grehan
MFC after: 3 weeks
Relnotes: Yes
Differential Revision: https://reviews.freebsd.org/D26003
Modified:
head/sys/amd64/include/vmm.h
head/sys/amd64/vmm/intel/vmx.c
head/sys/amd64/vmm/intel/vmx.h
head/sys/amd64/vmm/intel/vmx_msr.c
head/sys/amd64/vmm/intel/vmx_msr.h
head/sys/amd64/vmm/x86.c
Modified: head/sys/amd64/include/vmm.h
==============================================================================
--- head/sys/amd64/include/vmm.h Tue Aug 18 07:08:17 2020 (r364339)
+++ head/sys/amd64/include/vmm.h Tue Aug 18 07:23:47 2020 (r364340)
@@ -481,6 +481,8 @@ enum vm_cap_type {
VM_CAP_UNRESTRICTED_GUEST,
VM_CAP_ENABLE_INVPCID,
VM_CAP_BPT_EXIT,
+ VM_CAP_RDPID,
+ VM_CAP_RDTSCP,
VM_CAP_MAX
};
Modified: head/sys/amd64/vmm/intel/vmx.c
==============================================================================
--- head/sys/amd64/vmm/intel/vmx.c Tue Aug 18 07:08:17 2020 (r364339)
+++ head/sys/amd64/vmm/intel/vmx.c Tue Aug 18 07:23:47 2020 (r364340)
@@ -167,6 +167,14 @@ static int cap_pause_exit;
SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, pause_exit, CTLFLAG_RD, &cap_pause_exit,
0, "PAUSE triggers a VM-exit");
+static int cap_rdpid;
+SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, rdpid, CTLFLAG_RD, &cap_rdpid, 0,
+ "Guests are allowed to use RDPID");
+
+static int cap_rdtscp;
+SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, rdtscp, CTLFLAG_RD, &cap_rdtscp, 0,
+ "Guests are allowed to use RDTSCP");
+
static int cap_unrestricted_guest;
SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, unrestricted_guest, CTLFLAG_RD,
&cap_unrestricted_guest, 0, "Unrestricted guests");
@@ -303,6 +311,18 @@ static void vmx_inject_pir(struct vlapic *vlapic);
static int vmx_restore_tsc(void *arg, int vcpu, uint64_t now);
#endif
+static inline bool
+host_has_rdpid(void)
+{
+ return ((cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0);
+}
+
+static inline bool
+host_has_rdtscp(void)
+{
+ return ((amd_feature & AMDID_RDTSCP) != 0);
+}
+
#ifdef KTR
static const char *
exit_reason_to_str(int reason)
@@ -755,6 +775,43 @@ vmx_init(int ipinum)
PROCBASED_PAUSE_EXITING, 0,
&tmp) == 0);
+ /*
+ * Check support for RDPID and/or RDTSCP.
+ *
+ * Support a pass-through-based implementation of these via the
+ * "enable RDTSCP" VM-execution control and the "RDTSC exiting"
+ * VM-execution control.
+ *
+ * The "enable RDTSCP" VM-execution control applies to both RDPID
+ * and RDTSCP (see SDM volume 3, section 25.3, "Changes to
+ * Instruction Behavior in VMX Non-root operation"); this is why
+ * only this VM-execution control needs to be enabled in order to
+ * enable passing through whichever of RDPID and/or RDTSCP are
+ * supported by the host.
+ *
+ * The "RDTSC exiting" VM-execution control applies to both RDTSC
+ * and RDTSCP (again, per SDM volume 3, section 25.3), and is
+ * already set up for RDTSC and RDTSCP pass-through by the current
+ * implementation of RDTSC.
+ *
+ * Although RDPID and RDTSCP are optional capabilities, since there
+ * does not currently seem to be a use case for enabling/disabling
+ * these via libvmmapi, choose not to support this and, instead,
+ * just statically always enable or always disable this support
+ * across all vCPUs on all VMs. (Note that there may be some
+ * complications to providing this functionality, e.g., the MSR
+ * bitmap is currently per-VM rather than per-vCPU while the
+ * capability API wants to be able to control capabilities on a
+ * per-vCPU basis).
+ */
+ error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
+ MSR_VMX_PROCBASED_CTLS2,
+ PROCBASED2_ENABLE_RDTSCP, 0, &tmp);
+ cap_rdpid = error == 0 && host_has_rdpid();
+ cap_rdtscp = error == 0 && host_has_rdtscp();
+ if (cap_rdpid || cap_rdtscp)
+ procbased_ctls2 |= PROCBASED2_ENABLE_RDTSCP;
+
cap_unrestricted_guest = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
MSR_VMX_PROCBASED_CTLS2,
PROCBASED2_UNRESTRICTED_GUEST, 0,
@@ -1007,6 +1064,15 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
* the "use TSC offsetting" execution control is enabled and the
* difference between the host TSC and the guest TSC is written
* into the TSC offset in the VMCS.
+ *
+ * Guest TSC_AUX support is enabled if any of guest RDPID and/or
+ * guest RDTSCP support are enabled (since, as per Table 2-2 in SDM
+ * volume 4, TSC_AUX is supported if any of RDPID and/or RDTSCP are
+ * supported). If guest TSC_AUX support is enabled, TSC_AUX is
+ * exposed read-only so that the VMM can do one fewer MSR read per
+ * exit than if this register were exposed read-write; the guest
+ * restore value can be updated during guest writes (expected to be
+ * rare) instead of during all exits (common).
*/
if (guest_msr_rw(vmx, MSR_GSBASE) ||
guest_msr_rw(vmx, MSR_FSBASE) ||
@@ -1014,7 +1080,8 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) ||
guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) ||
guest_msr_rw(vmx, MSR_EFER) ||
- guest_msr_ro(vmx, MSR_TSC))
+ guest_msr_ro(vmx, MSR_TSC) ||
+ ((cap_rdpid || cap_rdtscp) && guest_msr_ro(vmx, MSR_TSC_AUX)))
panic("vmx_vminit: error setting guest msr access");
vpid_alloc(vpid, VM_MAXCPU);
@@ -1093,6 +1160,8 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
KASSERT(error == 0, ("vmx_vminit: error customizing the vmcs"));
vmx->cap[i].set = 0;
+ vmx->cap[i].set |= cap_rdpid != 0 ? 1 << VM_CAP_RDPID : 0;
+ vmx->cap[i].set |= cap_rdtscp != 0 ? 1 << VM_CAP_RDTSCP : 0;
vmx->cap[i].proc_ctls = procbased_ctls;
vmx->cap[i].proc_ctls2 = procbased_ctls2;
vmx->cap[i].exc_bitmap = exc_bitmap;
@@ -3002,11 +3071,30 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pm
sidt(&idtr);
ldt_sel = sldt();
+ /*
+ * The TSC_AUX MSR must be saved/restored while interrupts
+ * are disabled so that it is not possible for the guest
+ * TSC_AUX MSR value to be overwritten by the resume
+ * portion of the IPI_SUSPEND codepath. This is why the
+ * transition of this MSR is handled separately from those
+ * handled by vmx_msr_guest_{enter,exit}(), which are ok to
+ * be transitioned with preemption disabled but interrupts
+ * enabled.
+ *
+ * These vmx_msr_guest_{enter,exit}_tsc_aux() calls can be
+ * anywhere in this loop so long as they happen with
+ * interrupts disabled. This location is chosen for
+ * simplicity.
+ */
+ vmx_msr_guest_enter_tsc_aux(vmx, vcpu);
+
vmx_run_trace(vmx, vcpu);
vmx_dr_enter_guest(vmxctx);
rc = vmx_enter_guest(vmxctx, vmx, launched);
vmx_dr_leave_guest(vmxctx);
+ vmx_msr_guest_exit_tsc_aux(vmx, vcpu);
+
bare_lgdt(&gdtr);
lidt(&idtr);
lldt(ldt_sel);
@@ -3344,6 +3432,14 @@ vmx_getcap(void *arg, int vcpu, int type, int *retval)
if (cap_monitor_trap)
ret = 0;
break;
+ case VM_CAP_RDPID:
+ if (cap_rdpid)
+ ret = 0;
+ break;
+ case VM_CAP_RDTSCP:
+ if (cap_rdtscp)
+ ret = 0;
+ break;
case VM_CAP_UNRESTRICTED_GUEST:
if (cap_unrestricted_guest)
ret = 0;
@@ -3407,6 +3503,17 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
flag = PROCBASED_PAUSE_EXITING;
reg = VMCS_PRI_PROC_BASED_CTLS;
}
+ break;
+ case VM_CAP_RDPID:
+ case VM_CAP_RDTSCP:
+ if (cap_rdpid || cap_rdtscp)
+ /*
+ * Choose not to support enabling/disabling
+ * RDPID/RDTSCP via libvmmapi since, as per the
+ * discussion in vmx_init(), RDPID/RDTSCP are
+ * either always enabled or always disabled.
+ */
+ error = EOPNOTSUPP;
break;
case VM_CAP_UNRESTRICTED_GUEST:
if (cap_unrestricted_guest) {
Modified: head/sys/amd64/vmm/intel/vmx.h
==============================================================================
--- head/sys/amd64/vmm/intel/vmx.h Tue Aug 18 07:08:17 2020 (r364339)
+++ head/sys/amd64/vmm/intel/vmx.h Tue Aug 18 07:23:47 2020 (r364340)
@@ -117,6 +117,7 @@ enum {
IDX_MSR_SF_MASK,
IDX_MSR_KGSBASE,
IDX_MSR_PAT,
+ IDX_MSR_TSC_AUX,
GUEST_MSR_NUM /* must be the last enumeration */
};
@@ -152,5 +153,19 @@ int vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint
extern char vmx_exit_guest[];
extern char vmx_exit_guest_flush_rsb[];
+
+static inline bool
+vmx_have_msr_tsc_aux(struct vmx *vmx)
+{
+ int rdpid_rdtscp_bits = ((1 << VM_CAP_RDPID) | (1 << VM_CAP_RDTSCP));
+
+ /*
+ * Since the values of these bits are uniform across all vCPUs
+ * (see discussion in vmx_init() and initialization of these bits
+ * in vmx_vminit()), just always use vCPU-zero's capability set and
+ * remove the need to require a vcpuid argument.
+ */
+ return ((vmx->cap[0].set & rdpid_rdtscp_bits) != 0);
+}
#endif
Modified: head/sys/amd64/vmm/intel/vmx_msr.c
==============================================================================
--- head/sys/amd64/vmm/intel/vmx_msr.c Tue Aug 18 07:08:17 2020 (r364339)
+++ head/sys/amd64/vmm/intel/vmx_msr.c Tue Aug 18 07:23:47 2020 (r364340)
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include "vmx.h"
#include "vmx_msr.h"
+#include "x86.h"
static bool
vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
@@ -361,6 +362,16 @@ vmx_msr_guest_enter(struct vmx *vmx, int vcpuid)
}
void
+vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, int vcpuid)
+{
+ uint64_t guest_tsc_aux = vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX];
+ uint32_t cpuid = PCPU_GET(cpuid);
+
+ if (vmx_have_msr_tsc_aux(vmx) && (guest_tsc_aux != cpuid))
+ wrmsr(MSR_TSC_AUX, guest_tsc_aux);
+}
+
+void
vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
{
uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
@@ -381,6 +392,23 @@ vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
/* MSR_KGSBASE will be restored on the way back to userspace */
}
+void
+vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, int vcpuid)
+{
+ uint64_t guest_tsc_aux = vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX];
+ uint32_t cpuid = PCPU_GET(cpuid);
+
+ if (vmx_have_msr_tsc_aux(vmx) && (guest_tsc_aux != cpuid))
+ /*
+ * Note that it is not necessary to save the guest value
+ * here; vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX] always
+ * contains the current value since it is updated whenever
+ * the guest writes to it (which is expected to be very
+ * rare).
+ */
+ wrmsr(MSR_TSC_AUX, cpuid);
+}
+
int
vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
{
@@ -472,6 +500,17 @@ vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint
break;
case MSR_TSC:
error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc());
+ break;
+ case MSR_TSC_AUX:
+ if (vmx_have_msr_tsc_aux(vmx))
+ /*
+ * vmx_msr_guest_enter_tsc_aux() will apply this
+ * value when it is called immediately before guest
+ * entry.
+ */
+ guest_msrs[IDX_MSR_TSC_AUX] = val;
+ else
+ vm_inject_gp(vmx->vm, vcpuid);
break;
default:
error = EINVAL;
Modified: head/sys/amd64/vmm/intel/vmx_msr.h
==============================================================================
--- head/sys/amd64/vmm/intel/vmx_msr.h Tue Aug 18 07:08:17 2020 (r364339)
+++ head/sys/amd64/vmm/intel/vmx_msr.h Tue Aug 18 07:23:47 2020 (r364340)
@@ -35,8 +35,10 @@ struct vmx;
void vmx_msr_init(void);
void vmx_msr_guest_init(struct vmx *vmx, int vcpuid);
+void vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, int vcpuid);
void vmx_msr_guest_enter(struct vmx *vmx, int vcpuid);
void vmx_msr_guest_exit(struct vmx *vmx, int vcpuid);
+void vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, int vcpuid);
int vmx_rdmsr(struct vmx *, int vcpuid, u_int num, uint64_t *val, bool *retu);
int vmx_wrmsr(struct vmx *, int vcpuid, u_int num, uint64_t val, bool *retu);
Modified: head/sys/amd64/vmm/x86.c
==============================================================================
--- head/sys/amd64/vmm/x86.c Tue Aug 18 07:08:17 2020 (r364339)
+++ head/sys/amd64/vmm/x86.c Tue Aug 18 07:23:47 2020 (r364340)
@@ -92,7 +92,8 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
{
const struct xsave_limits *limits;
uint64_t cr4;
- int error, enable_invpcid, level, width, x2apic_id;
+ int error, enable_invpcid, enable_rdpid, enable_rdtscp, level,
+ width, x2apic_id;
unsigned int func, regs[4], logical_cpus;
enum x2apic_state x2apic_state;
uint16_t cores, maxcpus, sockets, threads;
@@ -195,11 +196,13 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
/* Hide mwaitx/monitorx capability from the guest */
regs[2] &= ~AMDID2_MWAITX;
- /*
- * Hide rdtscp/ia32_tsc_aux until we know how
- * to deal with them.
- */
- regs[3] &= ~AMDID_RDTSCP;
+ /* Advertise RDTSCP if it is enabled. */
+ error = vm_get_capability(vm, vcpu_id,
+ VM_CAP_RDTSCP, &enable_rdtscp);
+ if (error == 0 && enable_rdtscp)
+ regs[3] |= AMDID_RDTSCP;
+ else
+ regs[3] &= ~AMDID_RDTSCP;
break;
case CPUID_8000_0007:
@@ -442,6 +445,12 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA);
regs[2] = 0;
regs[3] &= CPUID_STDEXT3_MD_CLEAR;
+
+ /* Advertise RDPID if it is enabled. */
+ error = vm_get_capability(vm, vcpu_id,
+ VM_CAP_RDPID, &enable_rdpid);
+ if (error == 0 && enable_rdpid)
+ regs[2] |= CPUID_STDEXT2_RDPID;
/* Advertise INVPCID if it is enabled. */
error = vm_get_capability(vm, vcpu_id,
More information about the svn-src-head
mailing list