git: 459dc427873c - main - x86: Refactor kernel-mode NMI handling

From: Bojan Novković <bnovkov_at_FreeBSD.org>
Date: Sun, 15 Dec 2024 15:40:34 UTC
The branch main has been updated by bnovkov:

URL: https://cgit.FreeBSD.org/src/commit/?id=459dc427873c9a294387ec74a96e6f7824de7435

commit 459dc427873c9a294387ec74a96e6f7824de7435
Author:     Bojan Novković <bnovkov@FreeBSD.org>
AuthorDate: 2024-12-15 13:56:40 +0000
Commit:     Bojan Novković <bnovkov@FreeBSD.org>
CommitDate: 2024-12-15 15:39:36 +0000

    x86: Refactor kernel-mode NMI handling
    
    This refactor aims to add the ability to share performance counter
    interrupts by refactoring the kernel-mode NMI handler. The handler now
    allows multiple drivers to service the same interrupt (e.g. hwpmc(4)
    and hwt(4)'s Intel Processor Trace backend).
    
    Reviewed by:    kib, avg
    Differential Revision:  https://reviews.freebsd.org/D46421
---
 sys/amd64/amd64/trap.c    | 32 ++--------------
 sys/i386/i386/trap.c      | 26 +++----------
 sys/x86/include/x86_var.h |  4 +-
 sys/x86/x86/cpu_machdep.c | 95 +++++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 104 insertions(+), 53 deletions(-)

diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 6ceeea41ea91..4590be501d64 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -230,38 +230,22 @@ trap(struct trapframe *frame)
 	VM_CNT_INC(v_trap);
 	type = frame->tf_trapno;
 
-#ifdef SMP
-	/* Handler for NMI IPIs used for stopping CPUs. */
-	if (type == T_NMI && ipi_nmi_handler() == 0)
-		return;
-#endif
-
 #ifdef KDB
 	if (kdb_active) {
 		kdb_reenter();
 		return;
 	}
 #endif
+	if (type == T_NMI) {
+		nmi_handle_intr(frame);
+		return;
+	}
 
 	if (type == T_RESERVED) {
 		trap_fatal(frame, 0);
 		return;
 	}
 
-	if (type == T_NMI) {
-#ifdef HWPMC_HOOKS
-		/*
-		 * CPU PMCs interrupt using an NMI.  If the PMC module is
-		 * active, pass the 'rip' value to the PMC module's interrupt
-		 * handler.  A non-zero return value from the handler means that
-		 * the NMI was consumed by it and we can return immediately.
-		 */
-		if (pmc_intr != NULL &&
-		    (*pmc_intr)(frame) != 0)
-			return;
-#endif
-	}
-
 	if ((frame->tf_rflags & PSL_I) == 0) {
 		/*
 		 * Buggy application or kernel code has disabled
@@ -392,10 +376,6 @@ trap(struct trapframe *frame)
 			signo = SIGFPE;
 			break;
 
-		case T_NMI:
-			nmi_handle_intr(type, frame);
-			return;
-
 		case T_OFLOW:		/* integer overflow fault */
 			ucode = FPE_INTOVF;
 			signo = SIGFPE;
@@ -619,10 +599,6 @@ trap(struct trapframe *frame)
 				return;
 #endif
 			break;
-
-		case T_NMI:
-			nmi_handle_intr(type, frame);
-			return;
 		}
 
 		trap_fatal(frame, 0);
diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index 693e3a2f94b4..9e310c049daa 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -237,12 +237,6 @@ trap(struct trapframe *frame)
 	KASSERT((read_eflags() & PSL_I) == 0,
 	    ("trap: interrupts enabled, type %d frame %p", type, frame));
 
-#ifdef SMP
-	/* Handler for NMI IPIs used for stopping CPUs. */
-	if (type == T_NMI && ipi_nmi_handler() == 0)
-		return;
-#endif /* SMP */
-
 #ifdef KDB
 	if (kdb_active) {
 		kdb_reenter();
@@ -251,24 +245,14 @@ trap(struct trapframe *frame)
 #endif
 	trap_check_kstack();
 
-	if (type == T_RESERVED) {
-		trap_fatal(frame, 0);
+	if (type == T_NMI) {
+		nmi_handle_intr(frame);
 		return;
 	}
 
-	if (type == T_NMI) {
-#ifdef HWPMC_HOOKS
-		/*
-		 * CPU PMCs interrupt using an NMI so we check for that first.
-		 * If the HWPMC module is active, 'pmc_hook' will point to
-		 * the function to be called.  A non-zero return value from the
-		 * hook means that the NMI was consumed by it and that we can
-		 * return immediately.
-		 */
-		if (pmc_intr != NULL &&
-		    (*pmc_intr)(frame) != 0)
-			return;
-#endif
+	if (type == T_RESERVED) {
+		trap_fatal(frame, 0);
+		return;
 	}
 
 	if (type == T_MCHK) {
diff --git a/sys/x86/include/x86_var.h b/sys/x86/include/x86_var.h
index 6609871bf89e..dbb4e9557ed0 100644
--- a/sys/x86/include/x86_var.h
+++ b/sys/x86/include/x86_var.h
@@ -148,7 +148,9 @@ void	zenbleed_sanitize_enable(void);
 void	zenbleed_check_and_apply(bool all_cpus);
 void	nmi_call_kdb(u_int cpu, u_int type, struct trapframe *frame);
 void	nmi_call_kdb_smp(u_int type, struct trapframe *frame);
-void	nmi_handle_intr(u_int type, struct trapframe *frame);
+void	nmi_register_handler(int (*handler)(struct trapframe *));
+void	nmi_remove_handler(int (*handler)(struct trapframe *));
+void	nmi_handle_intr(struct trapframe *frame);
 void	pagecopy(void *from, void *to);
 void	printcpuinfo(void);
 int	pti_get_default(void);
diff --git a/sys/x86/x86/cpu_machdep.c b/sys/x86/x86/cpu_machdep.c
index 5f6cc35dda6f..4df652f1f2a8 100644
--- a/sys/x86/x86/cpu_machdep.c
+++ b/sys/x86/x86/cpu_machdep.c
@@ -76,6 +76,7 @@
 #include <machine/cputypes.h>
 #include <machine/specialreg.h>
 #include <machine/md_var.h>
+#include <machine/trap.h>
 #include <machine/tss.h>
 #ifdef SMP
 #include <machine/smp.h>
@@ -885,17 +886,105 @@ nmi_call_kdb(u_int cpu, u_int type, struct trapframe *frame)
 		panic("NMI");
 }
 
+/*
+ * Dynamically registered NMI handlers.
+ */
+struct nmi_handler {
+	int running;
+	int (*func)(struct trapframe *);
+	struct nmi_handler *next;
+};
+static struct nmi_handler *nmi_handlers_head = NULL;
+MALLOC_DEFINE(M_NMI, "NMI handlers",
+    "List entries for dynamically registered NMI handlers");
+
 void
-nmi_handle_intr(u_int type, struct trapframe *frame)
+nmi_register_handler(int (*handler)(struct trapframe *))
 {
+	struct nmi_handler *hp;
+	int (*hpf)(struct trapframe *);
+
+	hp = (struct nmi_handler *)atomic_load_acq_ptr(
+	    (uintptr_t *)&nmi_handlers_head);
+	while (hp != NULL) {
+		hpf = hp->func;
+		MPASS(hpf != handler);
+		if (hpf == NULL &&
+		    atomic_cmpset_ptr((volatile uintptr_t *)&hp->func,
+		    (uintptr_t)NULL, (uintptr_t)handler) != 0) {
+			hp->running = 0;
+			return;
+		}
+		hp = (struct nmi_handler *)atomic_load_acq_ptr(
+		    (uintptr_t *)&hp->next);
+	}
+	hp = malloc(sizeof(struct nmi_handler), M_NMI, M_WAITOK | M_ZERO);
+	hp->func = handler;
+	hp->next = nmi_handlers_head;
+	while (atomic_fcmpset_rel_ptr(
+	    (volatile uintptr_t *)&nmi_handlers_head,
+	    (uintptr_t *)&hp->next, (uintptr_t)hp) == 0)
+	        ;
+}
 
+void
+nmi_remove_handler(int (*handler)(struct trapframe *))
+{
+	struct nmi_handler *hp;
+
+	hp = (struct nmi_handler *)atomic_load_acq_ptr(
+	    (uintptr_t *)&nmi_handlers_head);
+	while (hp != NULL) {
+		if (hp->func == handler) {
+			hp->func = NULL;
+			/* Wait for the handler to exit before returning. */
+			while (atomic_load_int(&hp->running) != 0)
+				cpu_spinwait();
+			return;
+		}
+		hp = (struct nmi_handler *)atomic_load_acq_ptr(
+		    (uintptr_t *)&hp->next);
+	}
+
+	panic("%s: attempting to remove an unregistered NMI handler %p\n",
+	    __func__, handler);
+}
+
+void
+nmi_handle_intr(struct trapframe *frame)
+{
+	int (*func)(struct trapframe *);
+	struct nmi_handler *hp;
+	bool handled;
+
+#ifdef SMP
+	/* Handler for NMI IPIs used for stopping CPUs. */
+	if (ipi_nmi_handler() == 0)
+		return;
+#endif
+	handled = false;
+	hp = (struct nmi_handler *)atomic_load_acq_ptr(
+	    (uintptr_t *)&nmi_handlers_head);
+	while (hp != NULL) {
+		func = hp->func;
+		if (func != NULL) {
+			atomic_add_int(&hp->running, 1);
+			if (func(frame) != 0)
+				handled = true;
+			atomic_subtract_int(&hp->running, 1);
+		}
+		hp = (struct nmi_handler *)atomic_load_acq_ptr(
+		    (uintptr_t *)&hp->next);
+	}
+	if (handled)
+		return;
 #ifdef SMP
 	if (nmi_is_broadcast) {
-		nmi_call_kdb_smp(type, frame);
+		nmi_call_kdb_smp(T_NMI, frame);
 		return;
 	}
 #endif
-	nmi_call_kdb(PCPU_GET(cpuid), type, frame);
+	nmi_call_kdb(PCPU_GET(cpuid), T_NMI, frame);
 }
 
 static int hw_ibrs_active;