svn commit: r266339 - in stable/10: sys/amd64/amd64 sys/amd64/include sys/amd64/vmm sys/amd64/vmm/amd sys/amd64/vmm/intel sys/amd64/vmm/io usr.sbin/bhyvectl
John Baldwin
jhb at FreeBSD.org
Sat May 17 19:11:11 UTC 2014
Author: jhb
Date: Sat May 17 19:11:08 2014
New Revision: 266339
URL: http://svnweb.freebsd.org/changeset/base/266339
Log:
MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,
260531,260532,260550,260619,261170,261453,261621,263280,263290,264516:
Add support for local APIC hardware-assist.
- Restructure vlapic access and register handling to support hardware-assist
for the local APIC.
- Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing'
feature of Intel VT-x if supported by hardware.
- Add an API to rendezvous all active vcpus in a virtual machine and use
it to support level triggered interrupts with VT-x 'Virtual Interrupt
Delivery'.
- Use a cheaper IPI handler than IPI_AST for nested page table shootdowns
and avoid doing unnecessary nested TLB invalidations.
Reviewed by: neel
Added:
stable/10/sys/amd64/vmm/io/vlapic_priv.h
- copied, changed from r259863, head/sys/amd64/vmm/io/vlapic_priv.h
Modified:
stable/10/sys/amd64/amd64/pmap.c
stable/10/sys/amd64/include/pmap.h
stable/10/sys/amd64/include/vmm.h
stable/10/sys/amd64/vmm/amd/amdv.c
stable/10/sys/amd64/vmm/intel/ept.c
stable/10/sys/amd64/vmm/intel/ept.h
stable/10/sys/amd64/vmm/intel/vmcs.c
stable/10/sys/amd64/vmm/intel/vmcs.h
stable/10/sys/amd64/vmm/intel/vmx.c
stable/10/sys/amd64/vmm/intel/vmx.h
stable/10/sys/amd64/vmm/intel/vmx_controls.h
stable/10/sys/amd64/vmm/intel/vmx_genassym.c
stable/10/sys/amd64/vmm/intel/vmx_support.S
stable/10/sys/amd64/vmm/io/vioapic.c
stable/10/sys/amd64/vmm/io/vlapic.c
stable/10/sys/amd64/vmm/io/vlapic.h
stable/10/sys/amd64/vmm/vmm.c
stable/10/sys/amd64/vmm/vmm_ipi.c
stable/10/sys/amd64/vmm/vmm_ipi.h
stable/10/sys/amd64/vmm/vmm_lapic.c
stable/10/sys/amd64/vmm/vmm_lapic.h
stable/10/sys/amd64/vmm/vmm_stat.c
stable/10/sys/amd64/vmm/vmm_stat.h
stable/10/usr.sbin/bhyvectl/bhyvectl.c
Directory Properties:
stable/10/ (props changed)
Modified: stable/10/sys/amd64/amd64/pmap.c
==============================================================================
--- stable/10/sys/amd64/amd64/pmap.c Sat May 17 19:06:46 2014 (r266338)
+++ stable/10/sys/amd64/amd64/pmap.c Sat May 17 19:11:08 2014 (r266339)
@@ -1304,6 +1304,7 @@ pmap_invalidate_page_pcid(pmap_t pmap, v
static __inline void
pmap_invalidate_ept(pmap_t pmap)
{
+ int ipinum;
sched_pin();
KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
@@ -1328,11 +1329,9 @@ pmap_invalidate_ept(pmap_t pmap)
/*
* Force the vcpu to exit and trap back into the hypervisor.
- *
- * XXX this is not optimal because IPI_AST builds a trapframe
- * whereas all we need is an 'eoi' followed by 'iret'.
*/
- ipi_selected(pmap->pm_active, IPI_AST);
+ ipinum = pmap->pm_flags & PMAP_NESTED_IPIMASK;
+ ipi_selected(pmap->pm_active, ipinum);
sched_unpin();
}
Modified: stable/10/sys/amd64/include/pmap.h
==============================================================================
--- stable/10/sys/amd64/include/pmap.h Sat May 17 19:06:46 2014 (r266338)
+++ stable/10/sys/amd64/include/pmap.h Sat May 17 19:11:08 2014 (r266339)
@@ -312,9 +312,10 @@ struct pmap {
};
/* flags */
-#define PMAP_PDE_SUPERPAGE (1 << 0) /* supports 2MB superpages */
-#define PMAP_EMULATE_AD_BITS (1 << 1) /* needs A/D bits emulation */
-#define PMAP_SUPPORTS_EXEC_ONLY (1 << 2) /* execute only mappings ok */
+#define PMAP_NESTED_IPIMASK 0xff
+#define PMAP_PDE_SUPERPAGE (1 << 8) /* supports 2MB superpages */
+#define PMAP_EMULATE_AD_BITS (1 << 9) /* needs A/D bits emulation */
+#define PMAP_SUPPORTS_EXEC_ONLY (1 << 10) /* execute only mappings ok */
typedef struct pmap *pmap_t;
Modified: stable/10/sys/amd64/include/vmm.h
==============================================================================
--- stable/10/sys/amd64/include/vmm.h Sat May 17 19:06:46 2014 (r266338)
+++ stable/10/sys/amd64/include/vmm.h Sat May 17 19:11:08 2014 (r266339)
@@ -47,12 +47,12 @@ struct pmap;
enum x2apic_state;
-typedef int (*vmm_init_func_t)(void);
+typedef int (*vmm_init_func_t)(int ipinum);
typedef int (*vmm_cleanup_func_t)(void);
typedef void (*vmm_resume_func_t)(void);
typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
- struct pmap *pmap);
+ struct pmap *pmap, void *rendezvous_cookie);
typedef void (*vmi_cleanup_func_t)(void *vmi);
typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num,
uint64_t *retval);
@@ -69,6 +69,8 @@ typedef int (*vmi_get_cap_t)(void *vmi,
typedef int (*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
typedef void (*vmi_vmspace_free)(struct vmspace *vmspace);
+typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu);
+typedef void (*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
struct vmm_ops {
vmm_init_func_t init; /* module wide initialization */
@@ -87,6 +89,8 @@ struct vmm_ops {
vmi_set_cap_t vmsetcap;
vmi_vmspace_alloc vmspace_alloc;
vmi_vmspace_free vmspace_free;
+ vmi_vlapic_init vlapic_init;
+ vmi_vlapic_cleanup vlapic_cleanup;
};
extern struct vmm_ops vmm_ops_intel;
@@ -132,6 +136,31 @@ cpuset_t vm_active_cpus(struct vm *vm);
struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
/*
+ * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
+ * The rendezvous 'func(arg)' is not allowed to do anything that will
+ * cause the thread to be put to sleep.
+ *
+ * If the rendezvous is being initiated from a vcpu context then the
+ * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1.
+ *
+ * The caller cannot hold any locks when initiating the rendezvous.
+ *
+ * The implementation of this API may cause vcpus other than those specified
+ * by 'dest' to be stalled. The caller should not rely on any vcpus making
+ * forward progress when the rendezvous is in progress.
+ */
+typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
+void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
+ vm_rendezvous_func_t func, void *arg);
+
+static __inline int
+vcpu_rendezvous_pending(void *rendezvous_cookie)
+{
+
+ return (*(uintptr_t *)rendezvous_cookie != 0);
+}
+
+/*
* Return 1 if device indicated by bus/slot/func is supposed to be a
* pci passthrough device.
*
@@ -158,7 +187,7 @@ vcpu_is_running(struct vm *vm, int vcpu,
}
void *vcpu_stats(struct vm *vm, int vcpu);
-void vcpu_notify_event(struct vm *vm, int vcpuid);
+void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
struct vmspace *vm_get_vmspace(struct vm *vm);
int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
@@ -267,6 +296,8 @@ enum vm_exitcode {
VM_EXITCODE_INST_EMUL,
VM_EXITCODE_SPINUP_AP,
VM_EXITCODE_SPINDOWN_CPU,
+ VM_EXITCODE_RENDEZVOUS,
+ VM_EXITCODE_IOAPIC_EOI,
VM_EXITCODE_MAX
};
@@ -323,6 +354,9 @@ struct vm_exit {
struct {
uint64_t rflags;
} hlt;
+ struct {
+ int vector;
+ } ioapic_eoi;
} u;
};
Modified: stable/10/sys/amd64/vmm/amd/amdv.c
==============================================================================
--- stable/10/sys/amd64/vmm/amd/amdv.c Sat May 17 19:06:46 2014 (r266338)
+++ stable/10/sys/amd64/vmm/amd/amdv.c Sat May 17 19:11:08 2014 (r266339)
@@ -38,7 +38,7 @@ __FBSDID("$FreeBSD$");
#include "io/iommu.h"
static int
-amdv_init(void)
+amdv_init(int ipinum)
{
printf("amdv_init: not implemented\n");
@@ -67,7 +67,7 @@ amdv_vminit(struct vm *vm, struct pmap *
}
static int
-amdv_vmrun(void *arg, int vcpu, register_t rip, struct pmap *pmap)
+amdv_vmrun(void *arg, int vcpu, register_t rip, struct pmap *pmap, void *cookie)
{
printf("amdv_vmrun: not implemented\n");
@@ -155,6 +155,20 @@ amdv_vmspace_free(struct vmspace *vmspac
return;
}
+static struct vlapic *
+amdv_vlapic_init(void *arg, int vcpuid)
+{
+
+ panic("amdv_vlapic_init: not implmented");
+}
+
+static void
+amdv_vlapic_cleanup(void *arg, struct vlapic *vlapic)
+{
+
+ panic("amdv_vlapic_cleanup: not implemented");
+}
+
struct vmm_ops vmm_ops_amd = {
amdv_init,
amdv_cleanup,
@@ -171,6 +185,8 @@ struct vmm_ops vmm_ops_amd = {
amdv_setcap,
amdv_vmspace_alloc,
amdv_vmspace_free,
+ amdv_vlapic_init,
+ amdv_vlapic_cleanup,
};
static int
Modified: stable/10/sys/amd64/vmm/intel/ept.c
==============================================================================
--- stable/10/sys/amd64/vmm/intel/ept.c Sat May 17 19:06:46 2014 (r266338)
+++ stable/10/sys/amd64/vmm/intel/ept.c Sat May 17 19:11:08 2014 (r266339)
@@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm.h>
#include "vmx_cpufunc.h"
+#include "vmm_ipi.h"
#include "vmx_msr.h"
#include "ept.h"
@@ -76,7 +77,7 @@ SYSCTL_INT(_hw_vmm_ept, OID_AUTO, pmap_f
&ept_pmap_flags, 0, NULL);
int
-ept_init(void)
+ept_init(int ipinum)
{
int use_hw_ad_bits, use_superpages, use_exec_only;
uint64_t cap;
@@ -98,6 +99,8 @@ ept_init(void)
!INVEPT_ALL_TYPES_SUPPORTED(cap))
return (EINVAL);
+ ept_pmap_flags = ipinum & PMAP_NESTED_IPIMASK;
+
use_superpages = 1;
TUNABLE_INT_FETCH("hw.vmm.ept.use_superpages", &use_superpages);
if (use_superpages && EPT_PDE_SUPERPAGE(cap))
Modified: stable/10/sys/amd64/vmm/intel/ept.h
==============================================================================
--- stable/10/sys/amd64/vmm/intel/ept.h Sat May 17 19:06:46 2014 (r266338)
+++ stable/10/sys/amd64/vmm/intel/ept.h Sat May 17 19:11:08 2014 (r266339)
@@ -31,7 +31,7 @@
struct vmx;
-int ept_init(void);
+int ept_init(int ipinum);
void ept_invalidate_mappings(u_long eptp);
struct vmspace *ept_vmspace_alloc(vm_offset_t min, vm_offset_t max);
void ept_vmspace_free(struct vmspace *vmspace);
Modified: stable/10/sys/amd64/vmm/intel/vmcs.c
==============================================================================
--- stable/10/sys/amd64/vmm/intel/vmcs.c Sat May 17 19:06:46 2014 (r266338)
+++ stable/10/sys/amd64/vmm/intel/vmcs.c Sat May 17 19:11:08 2014 (r266339)
@@ -315,11 +315,7 @@ done:
}
int
-vmcs_set_defaults(struct vmcs *vmcs,
- u_long host_rip, u_long host_rsp, uint64_t eptp,
- uint32_t pinbased_ctls, uint32_t procbased_ctls,
- uint32_t procbased_ctls2, uint32_t exit_ctls,
- uint32_t entry_ctls, u_long msr_bitmap, uint16_t vpid)
+vmcs_init(struct vmcs *vmcs)
{
int error, codesel, datasel, tsssel;
u_long cr0, cr4, efer;
@@ -335,22 +331,6 @@ vmcs_set_defaults(struct vmcs *vmcs,
*/
VMPTRLD(vmcs);
- /*
- * Load the VMX controls
- */
- if ((error = vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls)) != 0)
- goto done;
- if ((error = vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls)) != 0)
- goto done;
- if ((error = vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2)) != 0)
- goto done;
- if ((error = vmwrite(VMCS_EXIT_CTLS, exit_ctls)) != 0)
- goto done;
- if ((error = vmwrite(VMCS_ENTRY_CTLS, entry_ctls)) != 0)
- goto done;
-
- /* Guest state */
-
/* Initialize guest IA32_PAT MSR with the default value */
pat = PAT_VALUE(0, PAT_WRITE_BACK) |
PAT_VALUE(1, PAT_WRITE_THROUGH) |
@@ -422,23 +402,7 @@ vmcs_set_defaults(struct vmcs *vmcs,
goto done;
/* instruction pointer */
- if ((error = vmwrite(VMCS_HOST_RIP, host_rip)) != 0)
- goto done;
-
- /* stack pointer */
- if ((error = vmwrite(VMCS_HOST_RSP, host_rsp)) != 0)
- goto done;
-
- /* eptp */
- if ((error = vmwrite(VMCS_EPTP, eptp)) != 0)
- goto done;
-
- /* vpid */
- if ((error = vmwrite(VMCS_VPID, vpid)) != 0)
- goto done;
-
- /* msr bitmap */
- if ((error = vmwrite(VMCS_MSR_BITMAP, msr_bitmap)) != 0)
+ if ((error = vmwrite(VMCS_HOST_RIP, (u_long)vmx_exit_guest)) != 0)
goto done;
/* exception bitmap */
@@ -509,7 +473,7 @@ DB_SHOW_COMMAND(vmcs, db_show_vmcs)
switch (exit & 0x8000ffff) {
case EXIT_REASON_EXCEPTION:
case EXIT_REASON_EXT_INTR:
- val = vmcs_read(VMCS_EXIT_INTERRUPTION_INFO);
+ val = vmcs_read(VMCS_EXIT_INTR_INFO);
db_printf("Interrupt Type: ");
switch (val >> 8 & 0x7) {
case 0:
@@ -531,7 +495,7 @@ DB_SHOW_COMMAND(vmcs, db_show_vmcs)
db_printf(" Vector: %lu", val & 0xff);
if (val & 0x800)
db_printf(" Error Code: %lx",
- vmcs_read(VMCS_EXIT_INTERRUPTION_ERROR));
+ vmcs_read(VMCS_EXIT_INTR_ERRCODE));
db_printf("\n");
break;
case EXIT_REASON_EPT_FAULT:
Modified: stable/10/sys/amd64/vmm/intel/vmcs.h
==============================================================================
--- stable/10/sys/amd64/vmm/intel/vmcs.h Sat May 17 19:06:46 2014 (r266338)
+++ stable/10/sys/amd64/vmm/intel/vmcs.h Sat May 17 19:11:08 2014 (r266339)
@@ -46,12 +46,7 @@ struct msr_entry {
};
int vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count);
-int vmcs_set_defaults(struct vmcs *vmcs, u_long host_rip, u_long host_rsp,
- uint64_t eptp,
- uint32_t pinbased_ctls, uint32_t procbased_ctls,
- uint32_t procbased_ctls2, uint32_t exit_ctls,
- uint32_t entry_ctls, u_long msr_bitmap,
- uint16_t vpid);
+int vmcs_init(struct vmcs *vmcs);
int vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *rv);
int vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val);
int vmcs_getdesc(struct vmcs *vmcs, int ident,
@@ -102,6 +97,7 @@ vmcs_write(uint32_t encoding, uint64_t v
/* 16-bit control fields */
#define VMCS_VPID 0x00000000
+#define VMCS_PIR_VECTOR 0x00000002
/* 16-bit guest-state fields */
#define VMCS_GUEST_ES_SELECTOR 0x00000800
@@ -112,6 +108,7 @@ vmcs_write(uint32_t encoding, uint64_t v
#define VMCS_GUEST_GS_SELECTOR 0x0000080A
#define VMCS_GUEST_LDTR_SELECTOR 0x0000080C
#define VMCS_GUEST_TR_SELECTOR 0x0000080E
+#define VMCS_GUEST_INTR_STATUS 0x00000810
/* 16-bit host-state fields */
#define VMCS_HOST_ES_SELECTOR 0x00000C00
@@ -133,7 +130,13 @@ vmcs_write(uint32_t encoding, uint64_t v
#define VMCS_TSC_OFFSET 0x00002010
#define VMCS_VIRTUAL_APIC 0x00002012
#define VMCS_APIC_ACCESS 0x00002014
+#define VMCS_PIR_DESC 0x00002016
#define VMCS_EPTP 0x0000201A
+#define VMCS_EOI_EXIT0 0x0000201C
+#define VMCS_EOI_EXIT1 0x0000201E
+#define VMCS_EOI_EXIT2 0x00002020
+#define VMCS_EOI_EXIT3 0x00002022
+#define VMCS_EOI_EXIT(vector) (VMCS_EOI_EXIT0 + ((vector) / 64) * 2)
/* 64-bit read-only fields */
#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400
@@ -177,8 +180,8 @@ vmcs_write(uint32_t encoding, uint64_t v
/* 32-bit read-only data fields */
#define VMCS_INSTRUCTION_ERROR 0x00004400
#define VMCS_EXIT_REASON 0x00004402
-#define VMCS_EXIT_INTERRUPTION_INFO 0x00004404
-#define VMCS_EXIT_INTERRUPTION_ERROR 0x00004406
+#define VMCS_EXIT_INTR_INFO 0x00004404
+#define VMCS_EXIT_INTR_ERRCODE 0x00004406
#define VMCS_IDT_VECTORING_INFO 0x00004408
#define VMCS_IDT_VECTORING_ERROR 0x0000440A
#define VMCS_EXIT_INSTRUCTION_LENGTH 0x0000440C
@@ -315,7 +318,8 @@ vmcs_write(uint32_t encoding, uint64_t v
#define EXIT_REASON_PAUSE 40
#define EXIT_REASON_MCE 41
#define EXIT_REASON_TPR 43
-#define EXIT_REASON_APIC 44
+#define EXIT_REASON_APIC_ACCESS 44
+#define EXIT_REASON_VIRTUALIZED_EOI 45
#define EXIT_REASON_GDTR_IDTR 46
#define EXIT_REASON_LDTR_TR 47
#define EXIT_REASON_EPT_FAULT 48
@@ -326,13 +330,15 @@ vmcs_write(uint32_t encoding, uint64_t v
#define EXIT_REASON_INVVPID 53
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
+#define EXIT_REASON_APIC_WRITE 56
/*
* VMCS interrupt information fields
*/
-#define VMCS_INTERRUPTION_INFO_VALID (1U << 31)
-#define VMCS_INTERRUPTION_INFO_HW_INTR (0 << 8)
-#define VMCS_INTERRUPTION_INFO_NMI (2 << 8)
+#define VMCS_INTR_INFO_VALID (1U << 31)
+#define VMCS_INTR_INFO_TYPE(info) (((info) >> 8) & 0x7)
+#define VMCS_INTR_INFO_HW_INTR (0 << 8)
+#define VMCS_INTR_INFO_NMI (2 << 8)
/*
* VMCS IDT-Vectoring information fields
@@ -365,4 +371,15 @@ vmcs_write(uint32_t encoding, uint64_t v
#define EPT_VIOLATION_GLA_VALID (1UL << 7)
#define EPT_VIOLATION_XLAT_VALID (1UL << 8)
+/*
+ * Exit qualification for APIC-access VM exit
+ */
+#define APIC_ACCESS_OFFSET(qual) ((qual) & 0xFFF)
+#define APIC_ACCESS_TYPE(qual) (((qual) >> 12) & 0xF)
+
+/*
+ * Exit qualification for APIC-write VM exit
+ */
+#define APIC_WRITE_OFFSET(qual) ((qual) & 0xFFF)
+
#endif
Modified: stable/10/sys/amd64/vmm/intel/vmx.c
==============================================================================
--- stable/10/sys/amd64/vmm/intel/vmx.c Sat May 17 19:06:46 2014 (r266338)
+++ stable/10/sys/amd64/vmm/intel/vmx.c Sat May 17 19:11:08 2014 (r266339)
@@ -45,15 +45,18 @@ __FBSDID("$FreeBSD$");
#include <machine/cpufunc.h>
#include <machine/md_var.h>
#include <machine/segments.h>
+#include <machine/smp.h>
#include <machine/specialreg.h>
#include <machine/vmparam.h>
#include <machine/vmm.h>
#include "vmm_host.h"
-#include "vmm_lapic.h"
+#include "vmm_ipi.h"
#include "vmm_msr.h"
#include "vmm_ktr.h"
#include "vmm_stat.h"
+#include "vlapic.h"
+#include "vlapic_priv.h"
#include "vmx_msr.h"
#include "ept.h"
@@ -92,6 +95,7 @@ __FBSDID("$FreeBSD$");
#define VM_EXIT_CTLS_ONE_SETTING \
(VM_EXIT_CTLS_ONE_SETTING_NO_PAT | \
+ VM_EXIT_ACKNOWLEDGE_INTERRUPT | \
VM_EXIT_SAVE_PAT | \
VM_EXIT_LOAD_PAT)
#define VM_EXIT_CTLS_ZERO_SETTING VM_EXIT_SAVE_DEBUG_CONTROLS
@@ -112,7 +116,8 @@ __FBSDID("$FreeBSD$");
#define HANDLED 1
#define UNHANDLED 0
-MALLOC_DEFINE(M_VMX, "vmx", "vmx");
+static MALLOC_DEFINE(M_VMX, "vmx", "vmx");
+static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic");
SYSCTL_DECL(_hw_vmm);
SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW, NULL, NULL);
@@ -164,12 +169,33 @@ static int cap_pause_exit;
static int cap_unrestricted_guest;
static int cap_monitor_trap;
static int cap_invpcid;
-
+
+static int virtual_interrupt_delivery;
+SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, virtual_interrupt_delivery, CTLFLAG_RD,
+ &virtual_interrupt_delivery, 0, "APICv virtual interrupt delivery support");
+
+static int posted_interrupts;
+SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, posted_interrupts, CTLFLAG_RD,
+ &posted_interrupts, 0, "APICv posted interrupt support");
+
+static int pirvec;
+SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, posted_interrupt_vector, CTLFLAG_RD,
+ &pirvec, 0, "APICv posted interrupt vector");
+
static struct unrhdr *vpid_unr;
static u_int vpid_alloc_failed;
SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD,
&vpid_alloc_failed, 0, NULL);
+/*
+ * Use the last page below 4GB as the APIC access address. This address is
+ * occupied by the boot firmware so it is guaranteed that it will not conflict
+ * with a page in system memory.
+ */
+#define APIC_ACCESS_ADDRESS 0xFFFFF000
+
+static void vmx_inject_pir(struct vlapic *vlapic);
+
#ifdef KTR
static const char *
exit_reason_to_str(int reason)
@@ -259,8 +285,8 @@ exit_reason_to_str(int reason)
return "mce";
case EXIT_REASON_TPR:
return "tpr";
- case EXIT_REASON_APIC:
- return "apic";
+ case EXIT_REASON_APIC_ACCESS:
+ return "apic-access";
case EXIT_REASON_GDTR_IDTR:
return "gdtridtr";
case EXIT_REASON_LDTR_TR:
@@ -281,6 +307,8 @@ exit_reason_to_str(int reason)
return "wbinvd";
case EXIT_REASON_XSETBV:
return "xsetbv";
+ case EXIT_REASON_APIC_WRITE:
+ return "apic-write";
default:
snprintf(reasonbuf, sizeof(reasonbuf), "%d", reason);
return (reasonbuf);
@@ -424,6 +452,9 @@ vmx_disable(void *arg __unused)
static int
vmx_cleanup(void)
{
+
+ if (pirvec != 0)
+ vmm_ipi_free(pirvec);
if (vpid_unr != NULL) {
delete_unrhdr(vpid_unr);
@@ -457,11 +488,11 @@ vmx_restore(void)
}
static int
-vmx_init(void)
+vmx_init(int ipinum)
{
- int error;
+ int error, use_tpr_shadow;
uint64_t fixed0, fixed1, feature_control;
- uint32_t tmp;
+ uint32_t tmp, procbased2_vid_bits;
/* CPUID.1:ECX[bit 5] must be 1 for processor to support VMX */
if (!(cpu_feature2 & CPUID2_VMX)) {
@@ -595,9 +626,58 @@ vmx_init(void)
MSR_VMX_PROCBASED_CTLS2, PROCBASED2_ENABLE_INVPCID, 0,
&tmp) == 0);
+ /*
+ * Check support for virtual interrupt delivery.
+ */
+ procbased2_vid_bits = (PROCBASED2_VIRTUALIZE_APIC_ACCESSES |
+ PROCBASED2_VIRTUALIZE_X2APIC_MODE |
+ PROCBASED2_APIC_REGISTER_VIRTUALIZATION |
+ PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY);
+
+ use_tpr_shadow = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
+ MSR_VMX_TRUE_PROCBASED_CTLS, PROCBASED_USE_TPR_SHADOW, 0,
+ &tmp) == 0);
+
+ error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2,
+ procbased2_vid_bits, 0, &tmp);
+ if (error == 0 && use_tpr_shadow) {
+ virtual_interrupt_delivery = 1;
+ TUNABLE_INT_FETCH("hw.vmm.vmx.use_apic_vid",
+ &virtual_interrupt_delivery);
+ }
+
+ if (virtual_interrupt_delivery) {
+ procbased_ctls |= PROCBASED_USE_TPR_SHADOW;
+ procbased_ctls2 |= procbased2_vid_bits;
+ procbased_ctls2 &= ~PROCBASED2_VIRTUALIZE_X2APIC_MODE;
+
+ /*
+ * Check for Posted Interrupts only if Virtual Interrupt
+ * Delivery is enabled.
+ */
+ error = vmx_set_ctlreg(MSR_VMX_PINBASED_CTLS,
+ MSR_VMX_TRUE_PINBASED_CTLS, PINBASED_POSTED_INTERRUPT, 0,
+ &tmp);
+ if (error == 0) {
+ pirvec = vmm_ipi_alloc();
+ if (pirvec == 0) {
+ if (bootverbose) {
+ printf("vmx_init: unable to allocate "
+ "posted interrupt vector\n");
+ }
+ } else {
+ posted_interrupts = 1;
+ TUNABLE_INT_FETCH("hw.vmm.vmx.use_apic_pir",
+ &posted_interrupts);
+ }
+ }
+ }
+
+ if (posted_interrupts)
+ pinbased_ctls |= PINBASED_POSTED_INTERRUPT;
/* Initialize EPT */
- error = ept_init();
+ error = ept_init(ipinum);
if (error) {
printf("vmx_init: ept initialization failed (%d)\n", error);
return (error);
@@ -638,6 +718,31 @@ vmx_init(void)
return (0);
}
+static void
+vmx_trigger_hostintr(int vector)
+{
+ uintptr_t func;
+ struct gate_descriptor *gd;
+
+ gd = &idt[vector];
+
+ KASSERT(vector >= 32 && vector <= 255, ("vmx_trigger_hostintr: "
+ "invalid vector %d", vector));
+ KASSERT(gd->gd_p == 1, ("gate descriptor for vector %d not present",
+ vector));
+ KASSERT(gd->gd_type == SDT_SYSIGT, ("gate descriptor for vector %d "
+ "has invalid type %d", vector, gd->gd_type));
+ KASSERT(gd->gd_dpl == SEL_KPL, ("gate descriptor for vector %d "
+ "has invalid dpl %d", vector, gd->gd_dpl));
+ KASSERT(gd->gd_selector == GSEL(GCODE_SEL, SEL_KPL), ("gate descriptor "
+ "for vector %d has invalid selector %d", vector, gd->gd_selector));
+ KASSERT(gd->gd_ist == 0, ("gate descriptor for vector %d has invalid "
+ "IST %d", vector, gd->gd_ist));
+
+ func = ((long)gd->gd_hioffset << 16 | gd->gd_looffset);
+ vmx_call_isr(func);
+}
+
static int
vmx_setup_cr_shadow(int which, struct vmcs *vmcs, uint32_t initial)
{
@@ -676,6 +781,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
uint16_t vpid[VM_MAXCPU];
int i, error, guest_msr_count;
struct vmx *vmx;
+ struct vmcs *vmcs;
vmx = malloc(sizeof(struct vmx), M_VMX, M_WAITOK | M_ZERO);
if ((uintptr_t)vmx & PAGE_MASK) {
@@ -740,27 +846,52 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
vpid_alloc(vpid, VM_MAXCPU);
+ if (virtual_interrupt_delivery) {
+ error = vm_map_mmio(vm, DEFAULT_APIC_BASE, PAGE_SIZE,
+ APIC_ACCESS_ADDRESS);
+ /* XXX this should really return an error to the caller */
+ KASSERT(error == 0, ("vm_map_mmio(apicbase) error %d", error));
+ }
+
for (i = 0; i < VM_MAXCPU; i++) {
- vmx->vmcs[i].identifier = vmx_revision();
- error = vmclear(&vmx->vmcs[i]);
+ vmcs = &vmx->vmcs[i];
+ vmcs->identifier = vmx_revision();
+ error = vmclear(vmcs);
if (error != 0) {
panic("vmx_vminit: vmclear error %d on vcpu %d\n",
error, i);
}
- error = vmcs_set_defaults(&vmx->vmcs[i],
- (u_long)vmx_exit_guest,
- (u_long)&vmx->ctx[i],
- vmx->eptp,
- pinbased_ctls,
- procbased_ctls,
- procbased_ctls2,
- exit_ctls, entry_ctls,
- vtophys(vmx->msr_bitmap),
- vpid[i]);
+ error = vmcs_init(vmcs);
+ KASSERT(error == 0, ("vmcs_init error %d", error));
- if (error != 0)
- panic("vmx_vminit: vmcs_set_defaults error %d", error);
+ VMPTRLD(vmcs);
+ error = 0;
+ error += vmwrite(VMCS_HOST_RSP, (u_long)&vmx->ctx[i]);
+ error += vmwrite(VMCS_EPTP, vmx->eptp);
+ error += vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls);
+ error += vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls);
+ error += vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2);
+ error += vmwrite(VMCS_EXIT_CTLS, exit_ctls);
+ error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls);
+ error += vmwrite(VMCS_MSR_BITMAP, vtophys(vmx->msr_bitmap));
+ error += vmwrite(VMCS_VPID, vpid[i]);
+ if (virtual_interrupt_delivery) {
+ error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS);
+ error += vmwrite(VMCS_VIRTUAL_APIC,
+ vtophys(&vmx->apic_page[i]));
+ error += vmwrite(VMCS_EOI_EXIT0, 0);
+ error += vmwrite(VMCS_EOI_EXIT1, 0);
+ error += vmwrite(VMCS_EOI_EXIT2, 0);
+ error += vmwrite(VMCS_EOI_EXIT3, 0);
+ }
+ if (posted_interrupts) {
+ error += vmwrite(VMCS_PIR_VECTOR, pirvec);
+ error += vmwrite(VMCS_PIR_DESC,
+ vtophys(&vmx->pir_desc[i]));
+ }
+ VMCLEAR(vmcs);
+ KASSERT(error == 0, ("vmx_vminit: error customizing the vmcs"));
vmx->cap[i].set = 0;
vmx->cap[i].proc_ctls = procbased_ctls;
@@ -771,9 +902,8 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
msr_save_area_init(vmx->guest_msrs[i], &guest_msr_count);
- error = vmcs_set_msr_save(&vmx->vmcs[i],
- vtophys(vmx->guest_msrs[i]),
- guest_msr_count);
+ error = vmcs_set_msr_save(vmcs, vtophys(vmx->guest_msrs[i]),
+ guest_msr_count);
if (error != 0)
panic("vmcs_set_msr_save error %d", error);
@@ -783,16 +913,15 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
* CR0 - 0x60000010
* CR4 - 0
*/
- error = vmx_setup_cr0_shadow(&vmx->vmcs[i], 0x60000010);
+ error = vmx_setup_cr0_shadow(vmcs, 0x60000010);
if (error != 0)
panic("vmx_setup_cr0_shadow %d", error);
- error = vmx_setup_cr4_shadow(&vmx->vmcs[i], 0);
+ error = vmx_setup_cr4_shadow(vmcs, 0);
if (error != 0)
panic("vmx_setup_cr4_shadow %d", error);
vmx->ctx[i].pmap = pmap;
- vmx->ctx[i].eptp = vmx->eptp;
}
return (vmx);
@@ -840,20 +969,20 @@ vmx_astpending_trace(struct vmx *vmx, in
#endif
}
+static VMM_STAT_INTEL(VCPU_INVVPID_SAVED, "Number of vpid invalidations saved");
+
static void
-vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu)
+vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap)
{
- int lastcpu;
struct vmxstate *vmxstate;
- struct invvpid_desc invvpid_desc = { 0 };
+ struct invvpid_desc invvpid_desc;
vmxstate = &vmx->state[vcpu];
- lastcpu = vmxstate->lastcpu;
- vmxstate->lastcpu = curcpu;
-
- if (lastcpu == curcpu)
+ if (vmxstate->lastcpu == curcpu)
return;
+ vmxstate->lastcpu = curcpu;
+
vmm_stat_incr(vmx->vm, vcpu, VCPU_MIGRATIONS, 1);
vmcs_write(VMCS_HOST_TR_BASE, vmm_get_host_trbase());
@@ -876,8 +1005,20 @@ vmx_set_pcpu_defaults(struct vmx *vmx, i
* for "all" EP4TAs.
*/
if (vmxstate->vpid != 0) {
- invvpid_desc.vpid = vmxstate->vpid;
- invvpid(INVVPID_TYPE_SINGLE_CONTEXT, invvpid_desc);
+ if (pmap->pm_eptgen == vmx->eptgen[curcpu]) {
+ invvpid_desc._res1 = 0;
+ invvpid_desc._res2 = 0;
+ invvpid_desc.vpid = vmxstate->vpid;
+ invvpid(INVVPID_TYPE_SINGLE_CONTEXT, invvpid_desc);
+ } else {
+ /*
+ * The invvpid can be skipped if an invept is going to
+ * be performed before entering the guest. The invept
+ * will invalidate combined mappings tagged with
+ * 'vmx->eptp' for all vpids.
+ */
+ vmm_stat_incr(vmx->vm, vcpu, VCPU_INVVPID_SAVED, 1);
+ }
}
}
@@ -935,7 +1076,7 @@ vmx_inject_nmi(struct vmx *vmx, int vcpu
* Inject the virtual NMI. The vector must be the NMI IDT entry
* or the VMCS entry check will fail.
*/
- info = VMCS_INTERRUPTION_INFO_NMI | VMCS_INTERRUPTION_INFO_VALID;
+ info = VMCS_INTR_INFO_NMI | VMCS_INTR_INFO_VALID;
info |= IDT_NMI;
vmcs_write(VMCS_ENTRY_INTR_INFO, info);
@@ -957,7 +1098,7 @@ nmiblocked:
}
static void
-vmx_inject_interrupts(struct vmx *vmx, int vcpu)
+vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic)
{
int vector;
uint64_t info, rflags, interruptibility;
@@ -973,7 +1114,7 @@ vmx_inject_interrupts(struct vmx *vmx, i
* because of a pending AST.
*/
info = vmcs_read(VMCS_ENTRY_INTR_INFO);
- if (info & VMCS_INTERRUPTION_INFO_VALID)
+ if (info & VMCS_INTR_INFO_VALID)
return;
/*
@@ -982,9 +1123,13 @@ vmx_inject_interrupts(struct vmx *vmx, i
if (vmx_inject_nmi(vmx, vcpu))
return;
+ if (virtual_interrupt_delivery) {
+ vmx_inject_pir(vlapic);
+ return;
+ }
+
/* Ask the local apic for a vector to inject */
- vector = lapic_pending_intr(vmx->vm, vcpu);
- if (vector < 0)
+ if (!vlapic_pending_intr(vlapic, &vector))
return;
if (vector < 32 || vector > 255)
@@ -1000,12 +1145,12 @@ vmx_inject_interrupts(struct vmx *vmx, i
goto cantinject;
/* Inject the interrupt */
- info = VMCS_INTERRUPTION_INFO_HW_INTR | VMCS_INTERRUPTION_INFO_VALID;
+ info = VMCS_INTR_INFO_HW_INTR | VMCS_INTR_INFO_VALID;
info |= vector;
vmcs_write(VMCS_ENTRY_INTR_INFO, info);
/* Update the Local APIC ISR */
- lapic_intr_accepted(vmx->vm, vcpu, vector);
+ vlapic_intr_accepted(vlapic, vector);
VCPU_CTR1(vmx->vm, vcpu, "Injecting hwintr at vector %d", vector);
@@ -1175,11 +1320,141 @@ ept_emulation_fault(uint64_t ept_qual)
}
static int
+vmx_handle_apic_write(struct vlapic *vlapic, uint64_t qual)
+{
+ int error, handled, offset;
+ bool retu;
+
+ if (!virtual_interrupt_delivery)
+ return (UNHANDLED);
+
+ handled = 1;
+ offset = APIC_WRITE_OFFSET(qual);
+ switch (offset) {
+ case APIC_OFFSET_ID:
+ vlapic_id_write_handler(vlapic);
+ break;
+ case APIC_OFFSET_LDR:
+ vlapic_ldr_write_handler(vlapic);
+ break;
+ case APIC_OFFSET_DFR:
+ vlapic_dfr_write_handler(vlapic);
+ break;
+ case APIC_OFFSET_SVR:
+ vlapic_svr_write_handler(vlapic);
+ break;
+ case APIC_OFFSET_ESR:
+ vlapic_esr_write_handler(vlapic);
+ break;
+ case APIC_OFFSET_ICR_LOW:
+ retu = false;
+ error = vlapic_icrlo_write_handler(vlapic, &retu);
+ if (error != 0 || retu)
+ handled = 0;
+ break;
+ case APIC_OFFSET_CMCI_LVT:
+ case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
+ vlapic_lvt_write_handler(vlapic, offset);
+ break;
+ case APIC_OFFSET_TIMER_ICR:
+ vlapic_icrtmr_write_handler(vlapic);
+ break;
+ case APIC_OFFSET_TIMER_DCR:
+ vlapic_dcr_write_handler(vlapic);
+ break;
+ default:
+ handled = 0;
+ break;
+ }
+ return (handled);
+}
+
+static bool
+apic_access_fault(uint64_t gpa)
+{
+
+ if (virtual_interrupt_delivery &&
+ (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE))
+ return (true);
+ else
+ return (false);
+}
+
+static int
+vmx_handle_apic_access(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
+{
+ uint64_t qual;
+ int access_type, offset, allowed;
+
+ if (!virtual_interrupt_delivery)
+ return (UNHANDLED);
+
+ qual = vmexit->u.vmx.exit_qualification;
+ access_type = APIC_ACCESS_TYPE(qual);
+ offset = APIC_ACCESS_OFFSET(qual);
+
+ allowed = 0;
+ if (access_type == 0) {
+ /*
+ * Read data access to the following registers is expected.
+ */
+ switch (offset) {
+ case APIC_OFFSET_APR:
+ case APIC_OFFSET_PPR:
+ case APIC_OFFSET_RRR:
+ case APIC_OFFSET_CMCI_LVT:
+ case APIC_OFFSET_TIMER_CCR:
+ allowed = 1;
+ break;
+ default:
+ break;
+ }
+ } else if (access_type == 1) {
+ /*
+ * Write data access to the following registers is expected.
+ */
+ switch (offset) {
+ case APIC_OFFSET_VER:
+ case APIC_OFFSET_APR:
+ case APIC_OFFSET_PPR:
+ case APIC_OFFSET_RRR:
+ case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
+ case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
+ case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
+ case APIC_OFFSET_CMCI_LVT:
+ case APIC_OFFSET_TIMER_CCR:
+ allowed = 1;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (allowed) {
+ vmexit->exitcode = VM_EXITCODE_INST_EMUL;
+ vmexit->u.inst_emul.gpa = DEFAULT_APIC_BASE + offset;
+ vmexit->u.inst_emul.gla = VIE_INVALID_GLA;
+ vmexit->u.inst_emul.cr3 = vmcs_guest_cr3();
+ }
+
+ /*
+ * Regardless of whether the APIC-access is allowed this handler
+ * always returns UNHANDLED:
+ * - if the access is allowed then it is handled by emulating the
+ * instruction that caused the VM-exit (outside the critical section)
+ * - if the access is not allowed then it will be converted to an
+ * exitcode of VM_EXITCODE_VMX and will be dealt with in userland.
+ */
+ return (UNHANDLED);
+}
+
+static int
vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
{
int error, handled;
struct vmxctx *vmxctx;
- uint32_t eax, ecx, edx, idtvec_info, idtvec_err, reason;
+ struct vlapic *vlapic;
+ uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, reason;
uint64_t qual, gpa;
bool retu;
@@ -1203,7 +1478,7 @@ vmx_exit_process(struct vmx *vmx, int vc
switch (reason) {
case EXIT_REASON_EPT_FAULT:
case EXIT_REASON_EPT_MISCONFIG:
- case EXIT_REASON_APIC:
+ case EXIT_REASON_APIC_ACCESS:
case EXIT_REASON_TASK_SWITCH:
case EXIT_REASON_EXCEPTION:
idtvec_info = vmcs_idt_vectoring_info();
@@ -1290,6 +1565,11 @@ vmx_exit_process(struct vmx *vmx, int vc
* host interrupt handler in the VM's softc. We will inject
* this virtual interrupt during the subsequent VM enter.
*/
+ intr_info = vmcs_read(VMCS_EXIT_INTR_INFO);
+ KASSERT((intr_info & VMCS_INTR_INFO_VALID) != 0 &&
+ VMCS_INTR_INFO_TYPE(intr_info) == 0,
+ ("VM exit interruption info invalid: %#x", intr_info));
+ vmx_trigger_hostintr(intr_info & 0xff);
/*
* This is special. We want to treat this as an 'handled'
@@ -1318,24 +1598,42 @@ vmx_exit_process(struct vmx *vmx, int vc
handled = vmx_handle_cpuid(vmx->vm, vcpu, vmxctx);
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-stable
mailing list