git: 6b3531017348 - main - SCHEDULER_STOPPED(): Rely on a global variable

From: Olivier Certner <olce_at_FreeBSD.org>
Date: Fri, 26 Jan 2024 21:11:30 UTC
The branch main has been updated by olce:

URL: https://cgit.FreeBSD.org/src/commit/?id=6b35310173482a100304bf9129e1c9ac9e0e6d06

commit 6b35310173482a100304bf9129e1c9ac9e0e6d06
Author:     Olivier Certner <olce@FreeBSD.org>
AuthorDate: 2024-01-18 13:10:18 +0000
Commit:     Olivier Certner <olce@FreeBSD.org>
CommitDate: 2024-01-26 21:09:38 +0000

    SCHEDULER_STOPPED(): Rely on a global variable
    
    A commit from 2012 (5d7380f8e34f0083, r228424) introduced
    'td_stopsched', on the ground that a global variable would cause all
    CPUs to have a copy of it in their cache, and consequently of all other
    variables sharing the same cache line.
    
    This is really a problem only if that cache line sees relatively
    frequent modifications.  This was unlikely to be the case back then
    because nearby variables are almost never modified as well.  In any
    case, today we have a new tool at our disposal to ensure that this
    variable goes into a read-mostly section containing frequently-accessed
    variables ('__read_frequently').  Most of the cache lines covering this
    section are likely to always be in every CPU cache.  This makes the
    second reason stated in the commit message (ensuring the field is in the
    same cache line as some lock-related fields, since these are accessed in
    close proximity) moot, as well as the second order effect of requiring
    an additional line to be present in the cache (the one containing the
    new 'scheduler_stopped' boolean, see below).
    
    From a pure logical point of view, whether the scheduler is stopped is
    a global state and is certainly not a per-thread quality.
    
    Consequently, remove 'td_stopsched', which immediately frees a byte in
    'struct thread'.  Currently, the latter's size (and layout) stays
    unchanged, but some of the later re-orderings will probably benefit from
    this removal.  Available bytes at the original position for
    'td_stopsched' have been made explicit with the addition of the
    '_td_pad0' member.
    
    Store the global state in the new 'scheduler_stopped' boolean, which is
    annotated with '__read_frequently'.
    
    Replace uses of SCHEDULER_STOPPED_TD() with SCHEDULER_STOPPER() and
    remove the former as it is now unnecessary.
    
    Reviewed by:            markj, kib
    Approved by:            markj (mentor)
    MFC after:              2 weeks
    Sponsored by:           The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D43572
---
 sys/kern/kern_condvar.c  | 10 +++++-----
 sys/kern/kern_mutex.c    |  4 ++--
 sys/kern/kern_rwlock.c   |  4 ++--
 sys/kern/kern_shutdown.c |  3 ++-
 sys/kern/kern_sx.c       |  2 +-
 sys/kern/kern_synch.c    |  6 +++---
 sys/kern/subr_kdb.c      |  4 ++--
 sys/sys/proc.h           |  4 ++--
 sys/sys/systm.h          |  8 +++-----
 9 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/sys/kern/kern_condvar.c b/sys/kern/kern_condvar.c
index 2731f581a29f..0d470aeafcd5 100644
--- a/sys/kern/kern_condvar.c
+++ b/sys/kern/kern_condvar.c
@@ -120,7 +120,7 @@ _cv_wait(struct cv *cvp, struct lock_object *lock)
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return;
 
 #ifdef KTRACE
@@ -184,7 +184,7 @@ _cv_wait_unlock(struct cv *cvp, struct lock_object *lock)
 	    ("cv_wait_unlock cannot be used with Giant"));
 	class = LOCK_CLASS(lock);
 
-	if (SCHEDULER_STOPPED_TD(td)) {
+	if (SCHEDULER_STOPPED()) {
 		class->lc_unlock(lock);
 		return;
 	}
@@ -241,7 +241,7 @@ _cv_wait_sig(struct cv *cvp, struct lock_object *lock)
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return (0);
 
 #ifdef KTRACE
@@ -309,7 +309,7 @@ _cv_timedwait_sbt(struct cv *cvp, struct lock_object *lock, sbintime_t sbt,
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return (0);
 
 #ifdef KTRACE
@@ -379,7 +379,7 @@ _cv_timedwait_sig_sbt(struct cv *cvp, struct lock_object *lock,
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return (0);
 
 #ifdef KTRACE
diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
index 6071ac7fd6f1..92be72546b46 100644
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@@ -424,7 +424,7 @@ _mtx_trylock_flags_int(struct mtx *m, int opts LOCK_FILE_LINE_ARG_DEF)
 
 	td = curthread;
 	tid = (uintptr_t)td;
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td),
@@ -534,7 +534,7 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v)
 	doing_lockprof = 1;
 #endif
 
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return;
 
 	if (__predict_false(v == MTX_UNOWNED))
diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c
index 83d5862a6667..28dddb950966 100644
--- a/sys/kern/kern_rwlock.c
+++ b/sys/kern/kern_rwlock.c
@@ -307,7 +307,7 @@ __rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 
 	td = curthread;
 	tid = (uintptr_t)td;
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td),
@@ -666,7 +666,7 @@ __rw_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 
 	td = curthread;
 
-	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED_TD(td) ||
+	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
 	    !TD_IS_IDLETHREAD(td),
 	    ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
 	    td, rw->lock_object.lo_name, file, line));
diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c
index 17d40ff0429c..ee666281418f 100644
--- a/sys/kern/kern_shutdown.c
+++ b/sys/kern/kern_shutdown.c
@@ -225,6 +225,7 @@ SYSCTL_INT(_kern, OID_AUTO, kerneldump_gzlevel, CTLFLAG_RWTUN,
  * to indicate that the kernel has already called panic.
  */
 const char *panicstr __read_mostly;
+bool scheduler_stopped __read_frequently;
 
 int dumping __read_mostly;		/* system is dumping */
 int rebooting __read_mostly;		/* system is rebooting */
@@ -926,7 +927,7 @@ vpanic(const char *fmt, va_list ap)
 	 * Ensure that the scheduler is stopped while panicking, even if panic
 	 * has been entered from kdb.
 	 */
-	td->td_stopsched = 1;
+	scheduler_stopped = true;
 
 	bootopt = RB_AUTOBOOT;
 	newpanic = 0;
diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c
index 706ae90ef9af..d302fa45161e 100644
--- a/sys/kern/kern_sx.c
+++ b/sys/kern/kern_sx.c
@@ -350,7 +350,7 @@ sx_try_xlock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 
 	td = curthread;
 	tid = (uintptr_t)td;
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td),
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 8cb847fe2a2d..f12054a04b23 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -158,7 +158,7 @@ _sleep(const void *ident, struct lock_object *lock, int priority,
 	else
 		class = NULL;
 
-	if (SCHEDULER_STOPPED_TD(td)) {
+	if (SCHEDULER_STOPPED()) {
 		if (lock != NULL && priority & PDROP)
 			class->lc_unlock(lock);
 		return (0);
@@ -247,7 +247,7 @@ msleep_spin_sbt(const void *ident, struct mtx *mtx, const char *wmesg,
 	KASSERT(ident != NULL, ("msleep_spin_sbt: NULL ident"));
 	KASSERT(TD_IS_RUNNING(td), ("msleep_spin_sbt: curthread not running"));
 
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return (0);
 
 	sleepq_lock(ident);
@@ -511,7 +511,7 @@ mi_switch(int flags)
 	 */
 	if (kdb_active)
 		kdb_switch();
-	if (SCHEDULER_STOPPED_TD(td))
+	if (SCHEDULER_STOPPED())
 		return;
 	if (flags & SW_VOL) {
 		td->td_ru.ru_nvcsw++;
diff --git a/sys/kern/subr_kdb.c b/sys/kern/subr_kdb.c
index 86f392485a4b..a7fc2284cbcf 100644
--- a/sys/kern/subr_kdb.c
+++ b/sys/kern/subr_kdb.c
@@ -764,7 +764,7 @@ kdb_trap(int type, int code, struct trapframe *tf)
 		CPU_CLR(PCPU_GET(cpuid), &other_cpus);
 		stop_cpus_hard(other_cpus);
 #endif
-		curthread->td_stopsched = 1;
+		scheduler_stopped = true;
 		did_stop_cpus = 1;
 	} else
 		did_stop_cpus = 0;
@@ -801,7 +801,7 @@ kdb_trap(int type, int code, struct trapframe *tf)
 	kdb_active--;
 
 	if (did_stop_cpus) {
-		curthread->td_stopsched = 0;
+		scheduler_stopped = false;
 #ifdef SMP
 		CPU_AND(&other_cpus, &other_cpus, &stopped_cpus);
 		restart_cpus(other_cpus);
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 33a878dc46aa..b08226c89dfd 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -270,7 +270,7 @@ struct thread {
 	const char	*td_wmesg;	/* (t) Reason for sleep. */
 	volatile u_char td_owepreempt;  /* (k*) Preempt on last critical_exit */
 	u_char		td_tsqueue;	/* (t) Turnstile queue blocked on. */
-	u_char		td_stopsched;	/* (k) Scheduler stopped. */
+	u_char		_td_pad0[2];	/* Available. */
 	int		td_locks;	/* (k) Debug: count of non-spin locks */
 	int		td_rw_rlocks;	/* (k) Count of rwlock read locks. */
 	int		td_sx_slocks;	/* (k) Count of sx shared locks. */
@@ -429,7 +429,7 @@ do {									\
 
 #define	TD_LOCKS_INC(td)	((td)->td_locks++)
 #define	TD_LOCKS_DEC(td) do {						\
-	KASSERT(SCHEDULER_STOPPED_TD(td) || (td)->td_locks > 0,		\
+	KASSERT(SCHEDULER_STOPPED() || (td)->td_locks > 0,		\
 	    ("Thread %p owns no locks", (td)));				\
 	(td)->td_locks--;						\
 } while (0)
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index 0d3f9fe98893..29c8bfc3c768 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -99,17 +99,15 @@ struct ucred;
 #include <sys/pcpu.h>		/* curthread */
 #include <sys/kpilite.h>
 
+extern bool scheduler_stopped;
+
 /*
  * If we have already panic'd and this is the thread that called
  * panic(), then don't block on any mutexes but silently succeed.
  * Otherwise, the kernel will deadlock since the scheduler isn't
  * going to run the thread that holds any lock we need.
  */
-#define	SCHEDULER_STOPPED_TD(td)  ({					\
-	MPASS((td) == curthread);					\
-	__predict_false((td)->td_stopsched);				\
-})
-#define	SCHEDULER_STOPPED() SCHEDULER_STOPPED_TD(curthread)
+#define	SCHEDULER_STOPPED()	__predict_false(scheduler_stopped)
 
 extern int osreldate;