git: 67b69e76e8ee - main - vmm: Use an sx lock to protect the memory map.
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 18 Nov 2022 18:26:52 UTC
The branch main has been updated by jhb: URL: https://cgit.FreeBSD.org/src/commit/?id=67b69e76e8eecfd204f6de636d622a1d681c8d7e commit 67b69e76e8eecfd204f6de636d622a1d681c8d7e Author: John Baldwin <jhb@FreeBSD.org> AuthorDate: 2022-11-18 18:04:37 +0000 Commit: John Baldwin <jhb@FreeBSD.org> CommitDate: 2022-11-18 18:25:38 +0000 vmm: Use an sx lock to protect the memory map. Previously bhyve obtained a "read lock" on the memory map for ioctls needing to read the map by locking the last vCPU. This is now replaced by a new per-VM sx lock. Modifying the map requires exclusively locking the sx lock as well as locking all existing vCPUs. Reading the map requires either locking one vCPU or the sx lock. This permits safely modifying or querying the memory map while some vCPUs do not exist which will be true in a future commit. Reviewed by: corvink, markj Differential Revision: https://reviews.freebsd.org/D37172 --- sys/amd64/include/vmm.h | 3 +++ sys/amd64/vmm/vmm.c | 55 +++++++++++++++++++++++++++-------------- sys/amd64/vmm/vmm_dev.c | 66 ++++++++++++++++++++++++------------------------- 3 files changed, 72 insertions(+), 52 deletions(-) diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 23ab8427cb74..b4f3312794dd 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -231,6 +231,9 @@ int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, /* * APIs that modify the guest memory map require all vcpus to be frozen. */ +void vm_slock_memsegs(struct vm *vm); +void vm_xlock_memsegs(struct vm *vm); +void vm_unlock_memsegs(struct vm *vm); int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off, size_t len, int prot, int flags); int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index c8a45c7aa811..89e406efcdb0 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include <sys/rwlock.h> #include <sys/sched.h> #include <sys/smp.h> +#include <sys/sx.h> #include <sys/vnode.h> #include <vm/vm.h> @@ -155,6 +156,11 @@ struct mem_map { * (o) initialized the first time the VM is created * (i) initialized when VM is created and when it is reinitialized * (x) initialized before use + * + * Locking: + * [m] mem_segs_lock + * [r] rendezvous_mtx + * [v] reads require one frozen vcpu, writes require freezing all vcpus */ struct vm { void *cookie; /* (i) cpu-specific data */ @@ -170,13 +176,13 @@ struct vm { int suspend; /* (i) stop VM execution */ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ - cpuset_t rendezvous_req_cpus; /* (x) rendezvous requested */ - cpuset_t rendezvous_done_cpus; /* (x) rendezvous finished */ - void *rendezvous_arg; /* (x) rendezvous func/arg */ + cpuset_t rendezvous_req_cpus; /* (x) [r] rendezvous requested */ + cpuset_t rendezvous_done_cpus; /* (x) [r] rendezvous finished */ + void *rendezvous_arg; /* (x) [r] rendezvous func/arg */ vm_rendezvous_func_t rendezvous_func; struct mtx rendezvous_mtx; /* (o) rendezvous lock */ - struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ - struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ + struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) [m+v] guest address space */ + struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) [m+v] guest memory regions */ struct vmspace *vmspace; /* (o) guest's address space */ char name[VM_MAX_NAMELEN+1]; /* (o) virtual machine name */ struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */ @@ -185,6 +191,7 @@ struct vm { uint16_t cores; /* (o) num of cores/socket */ uint16_t threads; /* (o) num of threads/core */ uint16_t maxcpus; /* (o) max pluggable cpus */ + struct sx mem_segs_lock; /* (o) */ }; #define VMM_CTR0(vcpu, format) \ @@ -518,6 +525,7 @@ vm_create(const char *name, struct vm **retvm) strcpy(vm->name, name); vm->vmspace = vmspace; mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); + sx_init(&vm->mem_segs_lock, "vm mem_segs"); vm->sockets = 1; vm->cores = cores_per_package; /* XXX backwards compatibility */ @@ -609,6 +617,7 @@ vm_cleanup(struct vm *vm, bool destroy) vmmops_vmspace_free(vm->vmspace); vm->vmspace = NULL; + sx_destroy(&vm->mem_segs_lock); mtx_destroy(&vm->rendezvous_mtx); } } @@ -645,6 +654,24 @@ vm_name(struct vm *vm) return (vm->name); } +void +vm_slock_memsegs(struct vm *vm) +{ + sx_slock(&vm->mem_segs_lock); +} + +void +vm_xlock_memsegs(struct vm *vm) +{ + sx_xlock(&vm->mem_segs_lock); +} + +void +vm_unlock_memsegs(struct vm *vm) +{ + sx_unlock(&vm->mem_segs_lock); +} + int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) { @@ -702,6 +729,8 @@ vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) struct mem_seg *seg; vm_object_t obj; + sx_assert(&vm->mem_segs_lock, SX_XLOCKED); + if (ident < 0 || ident >= VM_MAX_MEMSEGS) return (EINVAL); @@ -732,6 +761,8 @@ vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, { struct mem_seg *seg; + sx_assert(&vm->mem_segs_lock, SX_LOCKED); + if (ident < 0 || ident >= VM_MAX_MEMSEGS) return (EINVAL); @@ -1075,19 +1106,7 @@ void * vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, void **cookie) { -#ifdef INVARIANTS - /* - * All vcpus are frozen by ioctls that modify the memory map - * (e.g. VM_MMAP_MEMSEG). Therefore 'vm->memmap[]' stability is - * guaranteed if at least one vcpu is in the VCPU_FROZEN state. - */ - int state; - for (int i = 0; i < vm->maxcpus; i++) { - state = vcpu_get_state(vm_vcpu(vm, i), NULL); - KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", - __func__, state)); - } -#endif + sx_assert(&vm->mem_segs_lock, SX_LOCKED); return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); } diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c index 249131b16464..1b8b1e6d388f 100644 --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -221,8 +221,6 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) vm_paddr_t gpa, maxaddr; void *hpa, *cookie; struct vmmdev_softc *sc; - struct vcpu *vcpu; - uint16_t lastcpu; error = vmm_priv_check(curthread->td_ucred); if (error) @@ -233,13 +231,9 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) return (ENXIO); /* - * Get a read lock on the guest memory map by freezing any vcpu. + * Get a read lock on the guest memory map. */ - lastcpu = vm_get_maxcpus(sc->vm) - 1; - error = vcpu_lock_one(sc, lastcpu); - if (error) - return (error); - vcpu = vm_vcpu(sc->vm, lastcpu); + vm_slock_memsegs(sc->vm); prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); maxaddr = vmm_sysmem_maxaddr(sc->vm); @@ -256,7 +250,7 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) * Since this device does not support lseek(2), dd(1) will * read(2) blocks of data to simulate the lseek(2). */ - hpa = vm_gpa_hold(vcpu, gpa, c, prot, &cookie); + hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); if (hpa == NULL) { if (uio->uio_rw == UIO_READ && gpa < maxaddr) error = uiomove(__DECONST(void *, zero_region), @@ -268,7 +262,7 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) vm_gpa_release(cookie); } } - vcpu_unlock_one(sc, lastcpu); + vm_unlock_memsegs(sc->vm); return (error); } @@ -420,6 +414,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct vm_readwrite_kernemu_device *kernemu; uint64_t *regvals; int *regnums; + bool memsegs_locked; #ifdef BHYVE_SNAPSHOT struct vm_snapshot_meta *snapshot_meta; #endif @@ -435,6 +430,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, vcpuid = -1; vcpu = NULL; state_changed = 0; + memsegs_locked = false; /* * For VMM ioctls that operate on a single vCPU, lookup the @@ -476,10 +472,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, vcpu = vm_vcpu(sc->vm, vcpuid); break; - case VM_MAP_PPTDEV_MMIO: - case VM_UNMAP_PPTDEV_MMIO: - case VM_BIND_PPTDEV: - case VM_UNBIND_PPTDEV: #ifdef COMPAT_FREEBSD12 case VM_ALLOC_MEMSEG_FBSD12: #endif @@ -487,6 +479,21 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, case VM_MMAP_MEMSEG: case VM_MUNMAP_MEMSEG: case VM_REINIT: + /* + * ioctls that modify the memory map must lock memory + * segments exclusively. + */ + vm_xlock_memsegs(sc->vm); + memsegs_locked = true; + /* FALLTHROUGH */ + case VM_MAP_PPTDEV_MMIO: + case VM_UNMAP_PPTDEV_MMIO: + case VM_BIND_PPTDEV: + case VM_UNBIND_PPTDEV: +#ifdef BHYVE_SNAPSHOT + case VM_SNAPSHOT_REQ: + case VM_RESTORE_TIME: +#endif /* * ioctls that operate on the entire virtual machine must * prevent all vcpus from running. @@ -503,14 +510,10 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, case VM_GET_MEMSEG: case VM_MMAP_GETNEXT: /* - * Lock a vcpu to make sure that the memory map cannot be - * modified while it is being inspected. + * Lock the memory map while it is being inspected. */ - vcpuid = vm_get_maxcpus(sc->vm) - 1; - error = vcpu_lock_one(sc, vcpuid); - if (error) - goto done; - state_changed = 1; + vm_slock_memsegs(sc->vm); + memsegs_locked = true; break; #ifdef COMPAT_FREEBSD13 @@ -958,6 +961,8 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, vcpu_unlock_one(sc, vcpuid); else if (state_changed == 2) vcpu_unlock_all(sc); + if (memsegs_locked) + vm_unlock_memsegs(sc->vm); done: /* @@ -978,7 +983,6 @@ vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, size_t len; vm_ooffset_t segoff, first, last; int error, found, segid; - uint16_t lastcpu; bool sysmem; error = vmm_priv_check(curthread->td_ucred); @@ -997,12 +1001,9 @@ vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, } /* - * Get a read lock on the guest memory map by freezing any vcpu. + * Get a read lock on the guest memory map. */ - lastcpu = vm_get_maxcpus(sc->vm) - 1; - error = vcpu_lock_one(sc, lastcpu); - if (error) - return (error); + vm_slock_memsegs(sc->vm); gpa = 0; found = 0; @@ -1029,7 +1030,7 @@ vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, error = EINVAL; } } - vcpu_unlock_one(sc, lastcpu); + vm_unlock_memsegs(sc->vm); return (error); } @@ -1242,7 +1243,6 @@ devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, vm_ooffset_t first, last; size_t seglen; int error; - uint16_t lastcpu; bool sysmem; dsc = cdev->si_drv1; @@ -1256,16 +1256,14 @@ devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, if ((nprot & PROT_EXEC) || first < 0 || first >= last) return (EINVAL); - lastcpu = vm_get_maxcpus(dsc->sc->vm) - 1; - error = vcpu_lock_one(dsc->sc, lastcpu); - if (error) - return (error); + vm_slock_memsegs(dsc->sc->vm); error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); KASSERT(error == 0 && !sysmem && *objp != NULL, ("%s: invalid devmem segment %d", __func__, dsc->segid)); - vcpu_unlock_one(dsc->sc, lastcpu); + + vm_unlock_memsegs(dsc->sc->vm); if (seglen >= last) { vm_object_reference(*objp);