git: a97f683fe3c4 - main - vmm: Add a device file interface for creating and destroying VMs

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Tue, 05 Nov 2024 01:40:50 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=a97f683fe3c425b425cf8cc466319f54ea957c20

commit a97f683fe3c425b425cf8cc466319f54ea957c20
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2024-11-05 01:36:06 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2024-11-05 01:40:41 +0000

    vmm: Add a device file interface for creating and destroying VMs
    
    This supersedes the sysctl interface, which has the limitations of being
    root-only and not supporting automatic resource destruction, i.e., we
    cannot easily destroy VMs automatically when bhyve terminates.
    
    For now, two ioctls are implemented VMMCTL_VM_CREATE and
    VMMCTL_VM_DESTROY.  Eventually I would like to support tying a VM's
    lifetime to that of the descriptor, so that it is automatically
    destroyed when the descriptor is closed.  However, this will require
    some work in bhyve: when the guest wants to reboot, bhyve exits with a
    status that indicates that it is to be restarted.  This is incompatible
    with the idea of tying a VM's lifetime to that of a descriptor, since we
    want to avoid creating and destroying a VM across each reboot (as this
    involves freeing all of the guest memory, among other things).  One
    possible design would be to decompose bhyve into two processes, a parent
    which handles reboots, and a child which runs in capability mode and
    handles guest execution.
    
    In any case, this gets us closer to addressing the shortcomings
    mentioned above.
    
    Reviewed by:    jhb
    Differential Revision:  https://reviews.freebsd.org/D47028
---
 sys/amd64/vmm/vmm.c   |  4 ++-
 sys/arm64/vmm/vmm.c   |  4 ++-
 sys/dev/vmm/vmm_dev.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 sys/dev/vmm/vmm_dev.h | 15 +++++++++-
 4 files changed, 99 insertions(+), 4 deletions(-)

diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 07d3f74b8365..77e0adda86f5 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -465,7 +465,9 @@ vmm_handler(module_t mod, int what, void *arg)
 	switch (what) {
 	case MOD_LOAD:
 		if (vmm_is_hw_supported()) {
-			vmmdev_init();
+			error = vmmdev_init();
+			if (error != 0)
+				break;
 			error = vmm_init();
 			if (error == 0)
 				vmm_initialized = 1;
diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c
index 4127fad5cd59..2f4f3a2b59ea 100644
--- a/sys/arm64/vmm/vmm.c
+++ b/sys/arm64/vmm/vmm.c
@@ -362,7 +362,9 @@ vmm_handler(module_t mod, int what, void *arg)
 	switch (what) {
 	case MOD_LOAD:
 		/* TODO: if (vmm_is_hw_supported()) { */
-		vmmdev_init();
+		error = vmmdev_init();
+		if (error != 0)
+			break;
 		error = vmm_init();
 		if (error == 0)
 			vmm_initialized = true;
diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c
index a43d642b3925..4bea4360a51c 100644
--- a/sys/dev/vmm/vmm_dev.c
+++ b/sys/dev/vmm/vmm_dev.c
@@ -8,6 +8,7 @@
 
 #include <sys/param.h>
 #include <sys/conf.h>
+#include <sys/fcntl.h>
 #include <sys/ioccom.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
@@ -917,11 +918,88 @@ SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
     NULL, 0, sysctl_vmm_create, "A",
     NULL);
 
-void
+static int
+vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
+{
+	int error;
+
+	error = vmm_priv_check(td->td_ucred);
+	if (error != 0)
+		return (error);
+
+	if ((flags & FWRITE) == 0)
+		return (EPERM);
+
+	return (0);
+}
+
+static int
+vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
+    struct thread *td)
+{
+	int error;
+
+	switch (cmd) {
+	case VMMCTL_VM_CREATE: {
+		struct vmmctl_vm_create *vmc;
+
+		vmc = (struct vmmctl_vm_create *)data;
+		vmc->name[VM_MAX_NAMELEN] = '\0';
+		for (size_t i = 0; i < nitems(vmc->reserved); i++) {
+			if (vmc->reserved[i] != 0) {
+				error = EINVAL;
+				return (error);
+			}
+		}
+
+		error = vmmdev_create(vmc->name, td->td_ucred);
+		break;
+	}
+	case VMMCTL_VM_DESTROY: {
+		struct vmmctl_vm_destroy *vmd;
+
+		vmd = (struct vmmctl_vm_destroy *)data;
+		vmd->name[VM_MAX_NAMELEN] = '\0';
+		for (size_t i = 0; i < nitems(vmd->reserved); i++) {
+			if (vmd->reserved[i] != 0) {
+				error = EINVAL;
+				return (error);
+			}
+		}
+
+		error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred);
+		break;
+	}
+	default:
+		error = ENOTTY;
+		break;
+	}
+
+	return (error);
+}
+
+static struct cdevsw vmmctlsw = {
+	.d_name		= "vmmctl",
+	.d_version	= D_VERSION,
+	.d_open		= vmmctl_open,
+	.d_ioctl	= vmmctl_ioctl,
+};
+
+int
 vmmdev_init(void)
 {
+	struct cdev *cdev;
+	int error;
+
+	error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmctlsw, NULL,
+	    UID_ROOT, GID_WHEEL, 0600, "vmmctl");
+	if (error)
+		return (error);
+
 	pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
 	    "Allow use of vmm in a jail.");
+
+	return (0);
 }
 
 int
diff --git a/sys/dev/vmm/vmm_dev.h b/sys/dev/vmm/vmm_dev.h
index a2dc4d11f359..410066c49cf2 100644
--- a/sys/dev/vmm/vmm_dev.h
+++ b/sys/dev/vmm/vmm_dev.h
@@ -18,7 +18,7 @@ struct thread;
 struct vm;
 struct vcpu;
 
-void	vmmdev_init(void);
+int	vmmdev_init(void);
 int	vmmdev_cleanup(void);
 int	vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd,
 	    caddr_t data, int fflag, struct thread *td);
@@ -54,4 +54,17 @@ extern const size_t vmmdev_machdep_ioctl_count;
 
 #endif /* _KERNEL */
 
+struct vmmctl_vm_create {
+	char name[VM_MAX_NAMELEN + 1];
+	int reserved[16];
+};
+
+struct vmmctl_vm_destroy {
+	char name[VM_MAX_NAMELEN + 1];
+	int reserved[16];
+};
+
+#define	VMMCTL_VM_CREATE	_IOWR('V', 0, struct vmmctl_vm_create)
+#define	VMMCTL_VM_DESTROY	_IOWR('V', 1, struct vmmctl_vm_destroy)
+
 #endif /* _DEV_VMM_DEV_H_ */