git: 0b32ef71f9f1 - main - vmm: Correctly suspend and resume the vmm driver.

From: Warner Losh <imp_at_FreeBSD.org>
Date: Fri, 29 Nov 2024 21:39:24 UTC
The branch main has been updated by imp:

URL: https://cgit.FreeBSD.org/src/commit/?id=0b32ef71f9f154f4da5037bfcbb4916960d38452

commit 0b32ef71f9f154f4da5037bfcbb4916960d38452
Author:     Joshua Rogers <Joshua@Joshua.Hu>
AuthorDate: 2024-09-11 17:42:25 +0000
Commit:     Warner Losh <imp@FreeBSD.org>
CommitDate: 2024-11-29 21:38:35 +0000

    vmm: Correctly suspend and resume the vmm driver.
    
    Previously, VMXON would be executed on a resume, contrary to proper
    initalization. The contents of MSR_IA32_FEATURE_CONTROL may be lost on
    suspension, therefore must be restored. Likewise, the VMX Enable bit may be
    cleared upon suspend, requiring it to be re-set.
    
    Concretely disable VMX on suspend, and re-enable it on resume.
    
    Note: any IOMMU context will remain lost for any enabled vmm devices.
    
    Signed-off-by: Joshua Rogers <Joshua@Joshua.Hu>
    Reviewed by: jhb,imp
    Pull Request: https://github.com/freebsd/freebsd-src/pull/1419
---
 sys/amd64/acpica/acpi_wakeup.c |  3 +++
 sys/amd64/amd64/machdep.c      |  1 +
 sys/amd64/include/cpu.h        |  3 ++-
 sys/amd64/include/vmm.h        |  2 ++
 sys/amd64/vmm/amd/svm.c        |  7 +++++++
 sys/amd64/vmm/intel/vmx.c      | 11 ++++++++++-
 sys/amd64/vmm/vmm.c            |  3 +++
 sys/x86/x86/mp_x86.c           |  5 +++++
 8 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/sys/amd64/acpica/acpi_wakeup.c b/sys/amd64/acpica/acpi_wakeup.c
index 351cd16ff9ca..51d6d5e36840 100644
--- a/sys/amd64/acpica/acpi_wakeup.c
+++ b/sys/amd64/acpica/acpi_wakeup.c
@@ -202,6 +202,9 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state)
 
 	intr_suspend();
 
+	if (vmm_suspend_p != NULL)
+		vmm_suspend_p();
+
 	pcb = &susppcbs[0]->sp_pcb;
 	if (savectx(pcb)) {
 		fpususpend(susppcbs[0]->sp_fpususpend);
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index e2ae3843a119..73bfdd4d07f5 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -213,6 +213,7 @@ struct mem_range_softc mem_range_softc;
 
 struct mtx dt_lock;	/* lock for GDT and LDT */
 
+void (*vmm_suspend_p)(void);
 void (*vmm_resume_p)(void);
 
 bool efi_boot;
diff --git a/sys/amd64/include/cpu.h b/sys/amd64/include/cpu.h
index 99b8806ba0ba..57f2a0b59bbb 100644
--- a/sys/amd64/include/cpu.h
+++ b/sys/amd64/include/cpu.h
@@ -70,7 +70,8 @@ extern char	btext[];
 extern char	_end[];
 extern char	etext[];
 
-/* Resume hook for VMM. */
+/* Suspend and resume hook for VMM. */
+extern	void (*vmm_suspend_p)(void);
 extern	void (*vmm_resume_p)(void);
 
 void	cpu_halt(void);
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index be88fc867e98..dd8e76962caf 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -170,6 +170,7 @@ struct vm_eventinfo {
 
 typedef int	(*vmm_init_func_t)(int ipinum);
 typedef int	(*vmm_cleanup_func_t)(void);
+typedef void	(*vmm_suspend_func_t)(void);
 typedef void	(*vmm_resume_func_t)(void);
 typedef void *	(*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
 typedef int	(*vmi_run_func_t)(void *vcpui, register_t rip,
@@ -194,6 +195,7 @@ typedef int	(*vmi_restore_tsc_t)(void *vcpui, uint64_t now);
 struct vmm_ops {
 	vmm_init_func_t		modinit;	/* module wide initialization */
 	vmm_cleanup_func_t	modcleanup;
+	vmm_resume_func_t	modsuspend;
 	vmm_resume_func_t	modresume;
 
 	vmi_init_func_t		init;		/* vm-specific initialization */
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index cc0b1c0c8725..2d40d3756f4a 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -277,6 +277,13 @@ svm_modinit(int ipinum)
 	return (0);
 }
 
+static void
+svm_modsuspend(void)
+{
+
+	return;
+}
+
 static void
 svm_modresume(void)
 {
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 34b5fecc149c..13a53fa8eed6 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -648,12 +648,20 @@ vmx_enable(void *arg __unused)
 		vmxon_enabled[curcpu] = 1;
 }
 
+static void
+vmx_modsuspend(void)
+{
+
+	if (vmxon_enabled[curcpu])
+		vmx_disable(NULL);
+}
+
 static void
 vmx_modresume(void)
 {
 
 	if (vmxon_enabled[curcpu])
-		vmxon(&vmxon_region[curcpu * PAGE_SIZE]);
+		vmx_enable(NULL);
 }
 
 static int
@@ -4271,6 +4279,7 @@ vmx_restore_tsc(void *vcpui, uint64_t offset)
 const struct vmm_ops vmm_ops_intel = {
 	.modinit	= vmx_modinit,
 	.modcleanup	= vmx_modcleanup,
+	.modsuspend	= vmx_modsuspend,
 	.modresume	= vmx_modresume,
 	.init		= vmx_init,
 	.run		= vmx_run,
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 9569f8ace909..d1f57a717fdf 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -232,6 +232,7 @@ vmmops_panic(void)
 
 DEFINE_VMMOPS_IFUNC(int, modinit, (int ipinum))
 DEFINE_VMMOPS_IFUNC(int, modcleanup, (void))
+DEFINE_VMMOPS_IFUNC(void, modsuspend, (void))
 DEFINE_VMMOPS_IFUNC(void, modresume, (void))
 DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap))
 DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t rip, struct pmap *pmap,
@@ -452,6 +453,7 @@ vmm_init(void)
 	if (error)
 		return (error);
 
+	vmm_suspend_p = vmmops_modsuspend;
 	vmm_resume_p = vmmops_modresume;
 
 	return (vmmops_modinit(vmm_ipinum));
@@ -479,6 +481,7 @@ vmm_handler(module_t mod, int what, void *arg)
 		if (vmm_is_hw_supported()) {
 			error = vmmdev_cleanup();
 			if (error == 0) {
+				vmm_suspend_p = NULL;
 				vmm_resume_p = NULL;
 				iommu_cleanup();
 				if (vmm_ipinum != IPI_AST)
diff --git a/sys/x86/x86/mp_x86.c b/sys/x86/x86/mp_x86.c
index 0a683b3e841a..493017e303e3 100644
--- a/sys/x86/x86/mp_x86.c
+++ b/sys/x86/x86/mp_x86.c
@@ -1591,6 +1591,11 @@ cpususpend_handler(void)
 
 	mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);
 
+#ifdef __amd64__
+	if (vmm_suspend_p)
+		vmm_suspend_p();
+#endif
+
 	cpu = PCPU_GET(cpuid);
 
 #ifdef XENHVM