Re: git: a5a918b7a906 - main - vmm: permit some IPIs to be handled by userspace
- In reply to: Emmanuel Vadot : "git: a5a918b7a906 - main - vmm: permit some IPIs to be handled by userspace"
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 07 Sep 2022 09:31:46 UTC
Am Wed, 7 Sep 2022 07:08:58 GMT Emmanuel Vadot <manu@FreeBSD.org> schrieb: > The branch main has been updated by manu: > > URL: https://cgit.FreeBSD.org/src/commit/?id=a5a918b7a906eaa88e0833eac70a15989d535b02 > > commit a5a918b7a906eaa88e0833eac70a15989d535b02 > Author: Corvin Köhne <CorvinK@beckhoff.com> > AuthorDate: 2022-09-07 07:07:03 +0000 > Commit: Emmanuel Vadot <manu@FreeBSD.org> > CommitDate: 2022-09-07 07:07:03 +0000 > > vmm: permit some IPIs to be handled by userspace > > Add VM_EXITCODE_IPI to permit returning unhandled IPIs to userland. > INIT and Startup IPIs are now returned to userland. Due to backward > compatibility reasons, a new capability is added for enabling > VM_EXITCODE_IPI. > > MFC after: 2 weeks > Differential Revision: https://reviews.freebsd.org/D35623 > Sponsored by: Beckhoff Automation GmbH & Co. KG > --- > sys/amd64/include/vmm.h | 8 ++ > sys/amd64/vmm/amd/svm.c | 10 +++ > sys/amd64/vmm/intel/vmx.c | 8 ++ > sys/amd64/vmm/io/vlapic.c | 166 ++++++++++++++++++++++++----------------- > sys/amd64/vmm/io/vlapic_priv.h | 2 + > usr.sbin/bhyve/bhyverun.c | 34 +++++++++ > usr.sbin/bhyve/spinup_ap.c | 3 + > 7 files changed, 162 insertions(+), 69 deletions(-) > > diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h > index dcf862c34264..37a74f053fb3 100644 > --- a/sys/amd64/include/vmm.h > +++ b/sys/amd64/include/vmm.h > @@ -31,6 +31,7 @@ > #ifndef _VMM_H_ > #define _VMM_H_ > > +#include <sys/cpuset.h> > #include <sys/sdt.h> > #include <x86/segments.h> > > @@ -483,6 +484,7 @@ enum vm_cap_type { > VM_CAP_BPT_EXIT, > VM_CAP_RDPID, > VM_CAP_RDTSCP, > + VM_CAP_IPI_EXIT, > VM_CAP_MAX > }; > > @@ -630,6 +632,7 @@ enum vm_exitcode { > VM_EXITCODE_DEBUG, > VM_EXITCODE_VMINSN, > VM_EXITCODE_BPT, > + VM_EXITCODE_IPI, > VM_EXITCODE_MAX > }; > > @@ -737,6 +740,11 @@ struct vm_exit { > struct { > enum vm_suspend_how how; > } suspended; > + struct { > + uint32_t mode; > + uint8_t vector; > + cpuset_t dmask; > + } ipi; > struct vm_task_switch task_switch; > } u; > }; > diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c > index 35e8d9833d0e..4195cc5bd049 100644 > --- a/sys/amd64/vmm/amd/svm.c > +++ b/sys/amd64/vmm/amd/svm.c > @@ -2315,6 +2315,7 @@ static int > svm_setcap(void *arg, int vcpu, int type, int val) > { > struct svm_softc *sc; > + struct vlapic *vlapic; > int error; > > sc = arg; > @@ -2333,6 +2334,10 @@ svm_setcap(void *arg, int vcpu, int type, int val) > if (val == 0) > error = EINVAL; > break; > + case VM_CAP_IPI_EXIT: > + vlapic = vm_lapic(sc->vm, vcpu); > + vlapic->ipi_exit = val; > + break; > default: > error = ENOENT; > break; > @@ -2344,6 +2349,7 @@ static int > svm_getcap(void *arg, int vcpu, int type, int *retval) > { > struct svm_softc *sc; > + struct vlapic *vlapic; > int error; > > sc = arg; > @@ -2361,6 +2367,10 @@ svm_getcap(void *arg, int vcpu, int type, int *retval) > case VM_CAP_UNRESTRICTED_GUEST: > *retval = 1; /* unrestricted guest is always enabled */ > break; > + case VM_CAP_IPI_EXIT: > + vlapic = vm_lapic(sc->vm, vcpu); > + *retval = vlapic->ipi_exit; > + break; > default: > error = ENOENT; > break; > diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c > index 64544a6e7955..857028dcd0f1 100644 > --- a/sys/amd64/vmm/intel/vmx.c > +++ b/sys/amd64/vmm/intel/vmx.c > @@ -3504,6 +3504,7 @@ vmx_getcap(void *arg, int vcpu, int type, int *retval) > ret = 0; > break; > case VM_CAP_BPT_EXIT: > + case VM_CAP_IPI_EXIT: > ret = 0; > break; > default: > @@ -3521,6 +3522,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) > { > struct vmx *vmx = arg; > struct vmcs *vmcs = &vmx->vmcs[vcpu]; > + struct vlapic *vlapic; > uint32_t baseval; > uint32_t *pptr; > int error; > @@ -3599,6 +3601,12 @@ vmx_setcap(void *arg, int vcpu, int type, int val) > reg = VMCS_EXCEPTION_BITMAP; > } > break; > + case VM_CAP_IPI_EXIT: > + retval = 0; > + > + vlapic = vm_lapic(vmx->vm, vcpu); > + vlapic->ipi_exit = val; > + break; > default: > break; > } > diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c > index 9599b4b4e62c..dc9b00d2316e 100644 > --- a/sys/amd64/vmm/io/vlapic.c > +++ b/sys/amd64/vmm/io/vlapic.c > @@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$"); > > static void vlapic_set_error(struct vlapic *, uint32_t, bool); > static void vlapic_callout_handler(void *arg); > +static void vlapic_reset(struct vlapic *vlapic); > > static __inline uint32_t > vlapic_get_id(struct vlapic *vlapic) > @@ -957,9 +958,9 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) > { > int i; > bool phys; > - cpuset_t dmask; > + cpuset_t dmask, ipimask; > uint64_t icrval; > - uint32_t dest, vec, mode; > + uint32_t dest, vec, mode, shorthand; > struct vlapic *vlapic2; > struct vm_exit *vmexit; > struct LAPIC *lapic; > @@ -975,97 +976,122 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) > dest = icrval >> (32 + 24); > vec = icrval & APIC_VECTOR_MASK; > mode = icrval & APIC_DELMODE_MASK; > + phys = (icrval & APIC_DESTMODE_LOG) == 0; > + shorthand = icrval & APIC_DEST_MASK; > + > + maxcpus = vm_get_maxcpus(vlapic->vm); > > - if (mode == APIC_DELMODE_FIXED && vec < 16) { > - vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false); > - VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); > - return (0); > - } > > VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec); > > - if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) { > - switch (icrval & APIC_DEST_MASK) { > - case APIC_DEST_DESTFLD: > - phys = ((icrval & APIC_DESTMODE_LOG) == 0); > - vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, > - x2apic(vlapic)); > - break; > - case APIC_DEST_SELF: > - CPU_SETOF(vlapic->vcpuid, &dmask); > - break; > - case APIC_DEST_ALLISELF: > - dmask = vm_active_cpus(vlapic->vm); > - break; > - case APIC_DEST_ALLESELF: > - dmask = vm_active_cpus(vlapic->vm); > - CPU_CLR(vlapic->vcpuid, &dmask); > - break; > - default: > - CPU_ZERO(&dmask); /* satisfy gcc */ > - break; > + switch (shorthand) { > + case APIC_DEST_DESTFLD: > + vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, x2apic(vlapic)); > + break; > + case APIC_DEST_SELF: > + CPU_SETOF(vlapic->vcpuid, &dmask); > + break; > + case APIC_DEST_ALLISELF: > + dmask = vm_active_cpus(vlapic->vm); > + break; > + case APIC_DEST_ALLESELF: > + dmask = vm_active_cpus(vlapic->vm); > + CPU_CLR(vlapic->vcpuid, &dmask); > + break; > + default: > + __assert_unreachable(); > + } > + > + /* > + * ipimask is a set of vCPUs needing userland handling of the current > + * IPI. > + */ > + CPU_ZERO(&ipimask); > + > + switch (mode) { > + case APIC_DELMODE_FIXED: > + if (vec < 16) { > + vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, > + false); > + VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); > + return (0); > } > > CPU_FOREACH_ISSET(i, &dmask) { > - if (mode == APIC_DELMODE_FIXED) { > - lapic_intr_edge(vlapic->vm, i, vec); > - vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, > - IPIS_SENT, i, 1); > - VLAPIC_CTR2(vlapic, "vlapic sending ipi %d " > - "to vcpuid %d", vec, i); > - } else { > - vm_inject_nmi(vlapic->vm, i); > - VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi " > - "to vcpuid %d", i); > - } > + lapic_intr_edge(vlapic->vm, i, vec); > + vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, > + IPIS_SENT, i, 1); > + VLAPIC_CTR2(vlapic, > + "vlapic sending ipi %d to vcpuid %d", vec, i); > } > > - return (0); /* handled completely in the kernel */ > - } > + break; > + case APIC_DELMODE_NMI: > + CPU_FOREACH_ISSET(i, &dmask) { > + vm_inject_nmi(vlapic->vm, i); > + VLAPIC_CTR1(vlapic, > + "vlapic sending ipi nmi to vcpuid %d", i); > + } > > - maxcpus = vm_get_maxcpus(vlapic->vm); > - if (mode == APIC_DELMODE_INIT) { > + break; > + case APIC_DELMODE_INIT: > if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) > - return (0); > - > - if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) { > - vlapic2 = vm_lapic(vlapic->vm, dest); > - > - /* move from INIT to waiting-for-SIPI state */ > - if (vlapic2->boot_state == BS_INIT) { > - vlapic2->boot_state = BS_SIPI; > - } > + break; > > - return (0); > + CPU_FOREACH_ISSET(i, &dmask) { > + vlapic2 = vm_lapic(vlapic->vm, i); > + vlapic2->boot_state = BS_SIPI; > + CPU_SET(i, &ipimask); > } > - } > - > - if (mode == APIC_DELMODE_STARTUP) { > - if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) { > - vlapic2 = vm_lapic(vlapic->vm, dest); > > + break; > + case APIC_DELMODE_STARTUP: > + CPU_FOREACH_ISSET(i, &dmask) { > + vlapic2 = vm_lapic(vlapic->vm, i); > /* > * Ignore SIPIs in any state other than wait-for-SIPI > */ > if (vlapic2->boot_state != BS_SIPI) > - return (0); > - > + continue; > + /* > + * TODO: > + * This should be triggered from userspace. > + */ > + vlapic_reset(vlapic2); > vlapic2->boot_state = BS_RUNNING; > + CPU_SET(i, &ipimask); > + } > > - *retu = true; > - vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid); > - vmexit->exitcode = VM_EXITCODE_SPINUP_AP; > - vmexit->u.spinup_ap.vcpu = dest; > - vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT; > + break; > + default: > + return (1); > + } > > - return (0); > + if (!CPU_EMPTY(&ipimask)) { > + vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid); > + vmexit->exitcode = VM_EXITCODE_IPI; > + vmexit->u.ipi.mode = mode; > + vmexit->u.ipi.vector = vec; > + vmexit->u.ipi.dmask = dmask; > + > + *retu = true; > + > + /* > + * Old bhyve versions don't support the IPI exit. Translate it > + * into the old style. > + */ > + if (!vlapic->ipi_exit) { > + if (mode == APIC_DELMODE_STARTUP) { > + vmexit->exitcode = VM_EXITCODE_SPINUP_AP; > + vmexit->u.spinup_ap.vcpu = CPU_FFS(&ipimask) - 1; > + vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT; > + } else { > + *retu = false; > + } > } > } > > - /* > - * This will cause a return to userland. > - */ > - return (1); > + return (0); > } > > void > @@ -1467,6 +1493,8 @@ vlapic_init(struct vlapic *vlapic) > if (vlapic->vcpuid == 0) > vlapic->msr_apicbase |= APICBASE_BSP; > > + vlapic->ipi_exit = false; > + > vlapic_reset(vlapic); > } > > diff --git a/sys/amd64/vmm/io/vlapic_priv.h b/sys/amd64/vmm/io/vlapic_priv.h > index fe7965cb65d7..4b3e9009e68c 100644 > --- a/sys/amd64/vmm/io/vlapic_priv.h > +++ b/sys/amd64/vmm/io/vlapic_priv.h > @@ -183,6 +183,8 @@ struct vlapic { > */ > uint32_t svr_last; > uint32_t lvt_last[VLAPIC_MAXLVT_INDEX + 1]; > + > + bool ipi_exit; > }; > > void vlapic_init(struct vlapic *vlapic); > diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c > index 550cc9d15477..a1849519996c 100644 > --- a/usr.sbin/bhyve/bhyverun.c > +++ b/usr.sbin/bhyve/bhyverun.c > @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); > #endif > > #include <amd64/vmm/intel/vmcs.h> > +#include <x86/apicreg.h> > > #include <machine/atomic.h> > #include <machine/segments.h> > @@ -935,6 +936,35 @@ vmexit_breakpoint(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) > return (VMEXIT_CONTINUE); > } > > +static int > +vmexit_ipi(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) > +{ > + int error = -1; > + int i; > + switch (vmexit->u.ipi.mode) { > + case APIC_DELMODE_INIT: > + CPU_FOREACH_ISSET (i, &vmexit->u.ipi.dmask) { > + error = vm_suspend_cpu(ctx, i); > + if (error) { > + warnx("%s: failed to suspend cpu %d\n", > + __func__, i); > + break; > + } > + } > + break; > + case APIC_DELMODE_STARTUP: > + CPU_FOREACH_ISSET (i, &vmexit->u.ipi.dmask) { > + spinup_ap(ctx, i, vmexit->u.ipi.vector << PAGE_SHIFT); > + } > + error = 0; > + break; > + default: > + break; > + } > + > + return (error); > +} > + > static vmexit_handler_t handler[VM_EXITCODE_MAX] = { > [VM_EXITCODE_INOUT] = vmexit_inout, > [VM_EXITCODE_INOUT_STR] = vmexit_inout, > @@ -951,6 +981,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = { > [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, > [VM_EXITCODE_DEBUG] = vmexit_debug, > [VM_EXITCODE_BPT] = vmexit_breakpoint, > + [VM_EXITCODE_IPI] = vmexit_ipi, > }; > > static void > @@ -1151,6 +1182,9 @@ spinup_vcpu(struct vmctx *ctx, int vcpu, bool suspend) > error = vm_set_capability(ctx, vcpu, VM_CAP_UNRESTRICTED_GUEST, 1); > assert(error == 0); > > + error = vm_set_capability(ctx, vcpu, VM_CAP_IPI_EXIT, 1); > + assert(error == 0); > + > fbsdrun_addcpu(ctx, vcpu, rip, suspend); > } > > diff --git a/usr.sbin/bhyve/spinup_ap.c b/usr.sbin/bhyve/spinup_ap.c > index 2b7e602f8003..438091e564e7 100644 > --- a/usr.sbin/bhyve/spinup_ap.c > +++ b/usr.sbin/bhyve/spinup_ap.c > @@ -98,6 +98,9 @@ spinup_ap(struct vmctx *ctx, int newcpu, uint64_t rip) > error = vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1); > assert(error == 0); > > + error = vm_set_capability(ctx, newcpu, VM_CAP_IPI_EXIT, 1); > + assert(error == 0); > + > spinup_ap_realmode(ctx, newcpu, &rip); > > vm_resume_cpu(ctx, newcpu); > Buildkernel dies with: [...] /usr/src/sys/amd64/vmm/io/vlapic.c:967:11: error: variable 'maxcpus' set but not used [-Werror,-Wunused-but-set-variable] uint16_t maxcpus; Regards oh -- O. Hartmann