git: fd67ff5c7a6c - main - Use the correct idle routine on recent AMD EPYC servers

From: Andrew Gallatin <gallatin_at_FreeBSD.org>
Date: Fri, 08 Nov 2024 22:12:48 UTC
The branch main has been updated by gallatin:

URL: https://cgit.FreeBSD.org/src/commit/?id=fd67ff5c7a6cd9a2e82e6a02ea249cec76a4c030

commit fd67ff5c7a6cd9a2e82e6a02ea249cec76a4c030
Author:     Andrew Gallatin <gallatin@FreeBSD.org>
AuthorDate: 2024-11-08 21:37:32 +0000
Commit:     Andrew Gallatin <gallatin@FreeBSD.org>
CommitDate: 2024-11-08 22:10:44 +0000

    Use the correct idle routine on recent AMD EPYC servers
    
    We have been incorrectly choosing the "hlt" idle method on modern AMD
    EPYC servers for C1 idle. This is because AMD also uses the Functional
    Fixed Hardware interface. Due to not parsing the table properly for
    AMD, and due to a weird quirk where the mwait latency for C1 is
    mis-interpreted as the latency for hlt, we wind up choosing hlt for
    c1, which has a far higher wake up latency (similar to IO) of roughly
    400us on my test system (AMD 7502P).
    
    This patch fixes this by:
    
    - Looking for AMD in addition to Intel in the FFH
     (Note the vendor id of "2" for AMD is not publically documented, but
     AMD has confirmed they are using "2" and has promised to document it.)
    
    - Using mwait on AMD when specified in the table, and when CPUid says
     its supported
    
    - Fixing a weird issue where we copy the contents of cx_ptr for C1 and
     when moving to C2, we do not reinitialize cx_ptr. This leads to
     mwait being selected, and ignoring the specified i/o halt method
     unless we clear mwait before looking at the table for C2.
    
    Differential Revision: https://reviews.freebsd.org/D47444
    Reviewed by: dab, kib, vangyzen
    Sponsored by: Netflix
---
 sys/dev/acpica/acpi_cpu.c | 9 +++++++--
 sys/x86/include/x86_var.h | 1 +
 sys/x86/x86/identcpu.c    | 2 ++
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/sys/dev/acpica/acpi_cpu.c b/sys/dev/acpica/acpi_cpu.c
index 80855cf168e9..63e17de1ff28 100644
--- a/sys/dev/acpica/acpi_cpu.c
+++ b/sys/dev/acpica/acpi_cpu.c
@@ -131,6 +131,7 @@ struct acpi_cpu_device {
 #define PIIX4_PCNTRL_BST_EN	(1<<10)
 
 #define	CST_FFH_VENDOR_INTEL	1
+#define	CST_FFH_VENDOR_AMD	2
 #define	CST_FFH_INTEL_CL_C1IO	1
 #define	CST_FFH_INTEL_CL_MWAIT	2
 #define	CST_FFH_MWAIT_HW_COORD	0x0001
@@ -855,7 +856,8 @@ acpi_cpu_cx_cst(struct acpi_cpu_softc *sc)
 	    acpi_cpu_cx_cst_free_plvlx(sc->cpu_dev, cx_ptr);
 #if defined(__i386__) || defined(__amd64__)
 	    if (acpi_PkgFFH_IntelCpu(pkg, 0, &vendor, &class, &address,
-	      &accsize) == 0 && vendor == CST_FFH_VENDOR_INTEL) {
+	      &accsize) == 0 &&
+		(vendor == CST_FFH_VENDOR_INTEL || vendor == CST_FFH_VENDOR_AMD)) {
 		if (class == CST_FFH_INTEL_CL_C1IO) {
 		    /* C1 I/O then Halt */
 		    cx_ptr->res_rid = sc->cpu_cx_count;
@@ -872,7 +874,9 @@ acpi_cpu_cx_cst(struct acpi_cpu_softc *sc)
 			  "degrading to C1 Halt", (int)address);
 		    }
 		} else if (class == CST_FFH_INTEL_CL_MWAIT) {
-		    acpi_cpu_cx_cst_mwait(cx_ptr, address, accsize);
+		    if (vendor == CST_FFH_VENDOR_INTEL ||
+			(vendor == CST_FFH_VENDOR_AMD && cpu_mon_mwait_edx != 0))
+		        acpi_cpu_cx_cst_mwait(cx_ptr, address, accsize);
 		}
 	    }
 #endif
@@ -922,6 +926,7 @@ acpi_cpu_cx_cst(struct acpi_cpu_softc *sc)
 	    acpi_PkgGas(sc->cpu_dev, pkg, 0, &cx_ptr->res_type,
 		&cx_ptr->res_rid, &cx_ptr->p_lvlx, RF_SHAREABLE);
 	    if (cx_ptr->p_lvlx) {
+		cx_ptr->do_mwait = false;
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
 		     "acpi_cpu%d: Got C%d - %d latency\n",
 		     device_get_unit(sc->cpu_dev), cx_ptr->type,
diff --git a/sys/x86/include/x86_var.h b/sys/x86/include/x86_var.h
index f19c557e270b..6609871bf89e 100644
--- a/sys/x86/include/x86_var.h
+++ b/sys/x86/include/x86_var.h
@@ -62,6 +62,7 @@ extern	char	cpu_vendor[];
 extern	char	cpu_model[];
 extern	u_int	cpu_vendor_id;
 extern	u_int	cpu_mon_mwait_flags;
+extern	u_int	cpu_mon_mwait_edx;
 extern	u_int	cpu_mon_min_size;
 extern	u_int	cpu_mon_max_size;
 extern	u_int	cpu_maxphyaddr;
diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c
index d3aec5b5e0c6..3f8f11fda011 100644
--- a/sys/x86/x86/identcpu.c
+++ b/sys/x86/x86/identcpu.c
@@ -117,6 +117,7 @@ u_int	cpu_stdext_feature3;	/* %edx */
 uint64_t cpu_ia32_arch_caps;
 u_int	cpu_max_ext_state_size;
 u_int	cpu_mon_mwait_flags;	/* MONITOR/MWAIT flags (CPUID.05H.ECX) */
+u_int	cpu_mon_mwait_edx;	/* MONITOR/MWAIT supported on AMD (CPUID.05H.EDX) */
 u_int	cpu_mon_min_size;	/* MONITOR minimum range size, bytes */
 u_int	cpu_mon_max_size;	/* MONITOR minimum range size, bytes */
 u_int	cpu_maxphyaddr;		/* Max phys addr width in bits */
@@ -1634,6 +1635,7 @@ finishidentcpu(void)
 	if (cpu_high >= 5 && (cpu_feature2 & CPUID2_MON) != 0) {
 		do_cpuid(5, regs);
 		cpu_mon_mwait_flags = regs[2];
+		cpu_mon_mwait_edx = regs[3];
 		cpu_mon_min_size = regs[0] &  CPUID5_MON_MIN_SIZE;
 		cpu_mon_max_size = regs[1] &  CPUID5_MON_MAX_SIZE;
 	}