git: ee98f99d7a68 - main - vmm: Convert VM_MAXCPU into a loader tunable hw.vmm.maxcpu.

From: John Baldwin <jhb_at_FreeBSD.org>
Date: Fri, 18 Nov 2022 18:26:57 UTC
The branch main has been updated by jhb:

URL: https://cgit.FreeBSD.org/src/commit/?id=ee98f99d7a68b284a669fefb969cbfc31df2d0ab

commit ee98f99d7a68b284a669fefb969cbfc31df2d0ab
Author:     John Baldwin <jhb@FreeBSD.org>
AuthorDate: 2022-11-18 18:06:08 +0000
Commit:     John Baldwin <jhb@FreeBSD.org>
CommitDate: 2022-11-18 18:25:39 +0000

    vmm: Convert VM_MAXCPU into a loader tunable hw.vmm.maxcpu.
    
    The default is now the number of physical CPUs in the system rather
    than 16.
    
    Reviewed by:    corvink, markj
    Differential Revision:  https://reviews.freebsd.org/D37175
---
 sys/amd64/include/vmm.h   |  6 ++----
 sys/amd64/vmm/intel/vmx.c | 10 +++++-----
 sys/amd64/vmm/vmm.c       | 29 +++++++++++++++++++++++++++--
 sys/amd64/vmm/vmm_stat.c  |  2 +-
 4 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index c6194c32b095..fddc15d2f17c 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -218,6 +218,8 @@ struct vmm_ops {
 extern const struct vmm_ops vmm_ops_intel;
 extern const struct vmm_ops vmm_ops_amd;
 
+extern u_int vm_maxcpu;			/* maximum virtual cpus */
+
 int vm_create(const char *name, struct vm **retvm);
 struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid);
 void vm_disable_vcpu_creation(struct vm *vm);
@@ -481,10 +483,6 @@ int vcpu_trace_exceptions(struct vcpu *vcpu);
 int vcpu_trap_wbinvd(struct vcpu *vcpu);
 #endif	/* KERNEL */
 
-#ifdef _KERNEL
-#define	VM_MAXCPU	16			/* maximum virtual cpus */
-#endif
-
 /*
  * Identifiers for optional vmm capabilities
  */
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 52573416ded7..baf62c1f8e8a 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -519,11 +519,11 @@ vpid_free(int vpid)
 		panic("vpid_free: invalid vpid %d", vpid);
 
 	/*
-	 * VPIDs [0,VM_MAXCPU] are special and are not allocated from
+	 * VPIDs [0,vm_maxcpu] are special and are not allocated from
 	 * the unit number allocator.
 	 */
 
-	if (vpid > VM_MAXCPU)
+	if (vpid > vm_maxcpu)
 		free_unr(vpid_unr, vpid);
 }
 
@@ -550,7 +550,7 @@ vpid_alloc(int vcpuid)
 
 		/*
 		 * If the unit number allocator does not have enough unique
-		 * VPIDs then we need to allocate from the [1,VM_MAXCPU] range.
+		 * VPIDs then we need to allocate from the [1,vm_maxcpu] range.
 		 *
 		 * These VPIDs are not be unique across VMs but this does not
 		 * affect correctness because the combined mappings are also
@@ -572,13 +572,13 @@ vpid_init(void)
 	 * VPID 0 is required when the "enable VPID" execution control is
 	 * disabled.
 	 *
-	 * VPIDs [1,VM_MAXCPU] are used as the "overflow namespace" when the
+	 * VPIDs [1,vm_maxcpu] are used as the "overflow namespace" when the
 	 * unit number allocator does not have sufficient unique VPIDs to
 	 * satisfy the allocation.
 	 *
 	 * The remaining VPIDs are managed by the unit number allocator.
 	 */
-	vpid_unr = new_unrhdr(VM_MAXCPU + 1, 0xffff, NULL);
+	vpid_unr = new_unrhdr(vm_maxcpu + 1, 0xffff, NULL);
 }
 
 static void
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 7cde7b4005c4..0ebf80e94131 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -186,7 +186,7 @@ struct vm {
 	struct mem_seg	mem_segs[VM_MAX_MEMSEGS]; /* (o) [m+v] guest memory regions */
 	struct vmspace	*vmspace;		/* (o) guest's address space */
 	char		name[VM_MAX_NAMELEN+1];	/* (o) virtual machine name */
-	struct vcpu	*vcpu[VM_MAXCPU];	/* (x) guest vcpus */
+	struct vcpu	**vcpu;			/* (o) guest vcpus */
 	/* The following describe the vm cpu topology */
 	uint16_t	sockets;		/* (o) num of sockets */
 	uint16_t	cores;			/* (o) num of cores/socket */
@@ -295,10 +295,22 @@ static int trap_wbinvd;
 SYSCTL_INT(_hw_vmm, OID_AUTO, trap_wbinvd, CTLFLAG_RDTUN, &trap_wbinvd, 0,
     "WBINVD triggers a VM-exit");
 
+u_int vm_maxcpu;
+SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
+    &vm_maxcpu, 0, "Maximum number of vCPUs");
+
 static void vm_free_memmap(struct vm *vm, int ident);
 static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
 static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr);
 
+/*
+ * Upper limit on vm_maxcpu.  Limited by use of uint16_t types for CPU
+ * counts as well as range of vpid values for VT-x and by the capacity
+ * of cpuset_t masks.  The call to new_unrhdr() in vpid_init() in
+ * vmx.c requires 'vm_maxcpu + 1 <= 0xffff', hence the '- 1' below.
+ */
+#define	VM_MAXCPU	MIN(0xffff - 1, CPU_SETSIZE)
+
 #ifdef KTR
 static const char *
 vcpu_state2str(enum vcpu_state state)
@@ -395,6 +407,16 @@ vmm_init(void)
 	if (!vmm_is_hw_supported())
 		return (ENXIO);
 
+	vm_maxcpu = mp_ncpus;
+	TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
+
+	if (vm_maxcpu > VM_MAXCPU) {
+		printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
+		vm_maxcpu = VM_MAXCPU;
+	}
+	if (vm_maxcpu == 0)
+		vm_maxcpu = 1;
+
 	vmm_host_state_init();
 
 	vmm_ipinum = lapic_ipi_alloc(pti ? &IDTVEC(justreturn1_pti) :
@@ -579,11 +601,13 @@ vm_create(const char *name, struct vm **retvm)
 	mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
 	sx_init(&vm->mem_segs_lock, "vm mem_segs");
 	sx_init(&vm->vcpus_init_lock, "vm vcpus");
+	vm->vcpu = malloc(sizeof(*vm->vcpu) * vm_maxcpu, M_VM, M_WAITOK |
+	    M_ZERO);
 
 	vm->sockets = 1;
 	vm->cores = cores_per_package;	/* XXX backwards compatibility */
 	vm->threads = threads_per_core;	/* XXX backwards compatibility */
-	vm->maxcpus = VM_MAXCPU;	/* XXX temp to keep code working */
+	vm->maxcpus = vm_maxcpu;
 
 	vm_init(vm, true);
 
@@ -669,6 +693,7 @@ vm_cleanup(struct vm *vm, bool destroy)
 		vmmops_vmspace_free(vm->vmspace);
 		vm->vmspace = NULL;
 
+		free(vm->vcpu, M_VM);
 		sx_destroy(&vm->vcpus_init_lock);
 		sx_destroy(&vm->mem_segs_lock);
 		mtx_destroy(&vm->rendezvous_mtx);
diff --git a/sys/amd64/vmm/vmm_stat.c b/sys/amd64/vmm/vmm_stat.c
index 168a380b221b..2750982185aa 100644
--- a/sys/amd64/vmm/vmm_stat.c
+++ b/sys/amd64/vmm/vmm_stat.c
@@ -71,7 +71,7 @@ vmm_stat_register(void *arg)
 		return;
 
 	if (vst->nelems == VMM_STAT_NELEMS_VCPU)
-		vst->nelems = VM_MAXCPU;
+		vst->nelems = vm_maxcpu;
 
 	if (vst_num_elems + vst->nelems >= MAX_VMM_STAT_ELEMS) {
 		printf("Cannot accommodate vmm stat type \"%s\"!\n", vst->desc);