git: 589aed00e36c - main - sched: separate out schedinit_ap()
Date: Wed, 03 Nov 2021 20:55:21 UTC
The branch main has been updated by kevans: URL: https://cgit.FreeBSD.org/src/commit/?id=589aed00e36c22733d3fd9c9016deccf074830b1 commit 589aed00e36c22733d3fd9c9016deccf074830b1 Author: Kyle Evans <kevans@FreeBSD.org> AuthorDate: 2021-11-02 18:06:47 +0000 Commit: Kyle Evans <kevans@FreeBSD.org> CommitDate: 2021-11-03 20:54:59 +0000 sched: separate out schedinit_ap() schedinit_ap() sets up an AP for a later call to sched_throw(NULL). Currently, ULE sets up some pcpu bits and fixes the idlethread lock with a call to sched_throw(NULL); this results in a window where curthread is setup in platforms' init_secondary(), but it has the wrong td_lock. Typical platform AP startup procedure looks something like: - Setup curthread - ... other stuff, including cpu_initclocks_ap() - Signal smp_started - sched_throw(NULL) to enter the scheduler cpu_initclocks_ap() may have callouts to process (e.g., nvme) and attempt to sched_add() for this AP, but this attempt fails because of the noted violated assumption leading to locking heartburn in sched_setpreempt(). Interrupts are still disabled until cpu_throw() so we're not really at risk of being preempted -- just let the scheduler in on it a little earlier as part of setting up curthread. Reviewed by: alfredo, kib, markj Triage help from: andrew, markj Smoke-tested by: alfredo (ppc), kevans (arm64, x86), mhorne (arm) Differential Revision: https://reviews.freebsd.org/D32797 --- sys/arm/arm/mp_machdep.c | 1 + sys/arm64/arm64/mp_machdep.c | 1 + sys/kern/sched_4bsd.c | 7 +++++++ sys/kern/sched_ule.c | 29 ++++++++++++++++++++++------- sys/mips/mips/mp_machdep.c | 1 + sys/powerpc/aim/mp_cpudep.c | 2 ++ sys/powerpc/booke/mp_cpudep.c | 2 ++ sys/riscv/riscv/mp_machdep.c | 1 + sys/sys/sched.h | 5 +++++ sys/x86/x86/mp_x86.c | 1 + 10 files changed, 43 insertions(+), 7 deletions(-) diff --git a/sys/arm/arm/mp_machdep.c b/sys/arm/arm/mp_machdep.c index 6368f7b72da9..4089af5929eb 100644 --- a/sys/arm/arm/mp_machdep.c +++ b/sys/arm/arm/mp_machdep.c @@ -182,6 +182,7 @@ init_secondary(int cpu) pc->pc_curthread = pc->pc_idlethread; pc->pc_curpcb = pc->pc_idlethread->td_pcb; set_curthread(pc->pc_idlethread); + schedinit_ap(); #ifdef VFP vfp_init(); #endif diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c index 15e05ef46262..b42f65b9e399 100644 --- a/sys/arm64/arm64/mp_machdep.c +++ b/sys/arm64/arm64/mp_machdep.c @@ -255,6 +255,7 @@ init_secondary(uint64_t cpu) /* Initialize curthread */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); pcpup->pc_curthread = pcpup->pc_idlethread; + schedinit_ap(); /* Initialize curpmap to match TTBR0's current setting. */ pmap0 = vmspace_pmap(&vmspace0); diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c index ddd65b94f0ff..6ba41eb80dcc 100644 --- a/sys/kern/sched_4bsd.c +++ b/sys/kern/sched_4bsd.c @@ -678,6 +678,13 @@ schedinit(void) mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN); } +void +schedinit_ap(void) +{ + + /* Nothing needed. */ +} + int sched_runnable(void) { diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c index 1b9473b93773..ce7ce4cd2bd8 100644 --- a/sys/kern/sched_ule.c +++ b/sys/kern/sched_ule.c @@ -1743,6 +1743,26 @@ schedinit(void) ts0->ts_cpu = curcpu; /* set valid CPU number */ } +/* + * schedinit_ap() is needed prior to calling sched_throw(NULL) to ensure that + * the pcpu requirements are met for any calls in the period between curthread + * initialization and sched_throw(). One can safely add threads to the queue + * before sched_throw(), for instance, as long as the thread lock is setup + * correctly. + * + * TDQ_SELF() relies on the below sched pcpu setting; it may be used only + * after schedinit_ap(). + */ +void +schedinit_ap(void) +{ + +#ifdef SMP + PCPU_SET(sched, DPCPU_PTR(tdq)); +#endif + PCPU_GET(idlethread)->td_lock = TDQ_LOCKPTR(TDQ_SELF()); +} + /* * This is only somewhat accurate since given many processes of the same * priority they will switch when their slices run out, which will be @@ -2973,19 +2993,14 @@ sched_throw(struct thread *td) struct thread *newtd; struct tdq *tdq; + tdq = TDQ_SELF(); if (__predict_false(td == NULL)) { -#ifdef SMP - PCPU_SET(sched, DPCPU_PTR(tdq)); -#endif - /* Correct spinlock nesting and acquire the correct lock. */ - tdq = TDQ_SELF(); TDQ_LOCK(tdq); + /* Correct spinlock nesting. */ spinlock_exit(); PCPU_SET(switchtime, cpu_ticks()); PCPU_SET(switchticks, ticks); - PCPU_GET(idlethread)->td_lock = TDQ_LOCKPTR(tdq); } else { - tdq = TDQ_SELF(); THREAD_LOCK_ASSERT(td, MA_OWNED); THREAD_LOCKPTR_ASSERT(td, TDQ_LOCKPTR(tdq)); tdq_load_rem(tdq, td); diff --git a/sys/mips/mips/mp_machdep.c b/sys/mips/mips/mp_machdep.c index 1a5a023db381..dc089db1d189 100644 --- a/sys/mips/mips/mp_machdep.c +++ b/sys/mips/mips/mp_machdep.c @@ -311,6 +311,7 @@ smp_init_secondary(u_int32_t cpuid) /* Initialize curthread. */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); PCPU_SET(curthread, PCPU_GET(idlethread)); + schedinit_ap(); mtx_lock_spin(&ap_boot_mtx); diff --git a/sys/powerpc/aim/mp_cpudep.c b/sys/powerpc/aim/mp_cpudep.c index 33aae520c4b2..a73246487683 100644 --- a/sys/powerpc/aim/mp_cpudep.c +++ b/sys/powerpc/aim/mp_cpudep.c @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include <sys/bus.h> #include <sys/pcpu.h> #include <sys/proc.h> +#include <sys/sched.h> #include <sys/smp.h> #include <machine/bus.h> @@ -134,6 +135,7 @@ cpudep_ap_bootstrap(void) #endif pcpup->pc_curpcb = pcpup->pc_curthread->td_pcb; sp = pcpup->pc_curpcb->pcb_sp; + schedinit_ap(); return (sp); } diff --git a/sys/powerpc/booke/mp_cpudep.c b/sys/powerpc/booke/mp_cpudep.c index c07e8c06ce05..709578d8e1b4 100644 --- a/sys/powerpc/booke/mp_cpudep.c +++ b/sys/powerpc/booke/mp_cpudep.c @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include <sys/bus.h> #include <sys/pcpu.h> #include <sys/proc.h> +#include <sys/sched.h> #include <sys/smp.h> #include <machine/pcb.h> @@ -85,6 +86,7 @@ cpudep_ap_bootstrap() #endif pcpup->pc_curpcb = pcpup->pc_curthread->td_pcb; sp = pcpup->pc_curpcb->pcb_sp; + schedinit_ap(); /* XXX shouldn't the pcb_sp be checked/forced for alignment here?? */ diff --git a/sys/riscv/riscv/mp_machdep.c b/sys/riscv/riscv/mp_machdep.c index 1dadf19ce51f..57d5606a3b88 100644 --- a/sys/riscv/riscv/mp_machdep.c +++ b/sys/riscv/riscv/mp_machdep.c @@ -248,6 +248,7 @@ init_secondary(uint64_t hart) /* Initialize curthread */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); pcpup->pc_curthread = pcpup->pc_idlethread; + schedinit_ap(); /* * Identify current CPU. This is necessary to setup diff --git a/sys/sys/sched.h b/sys/sys/sched.h index 64651ffa9c90..8041a2bc12d4 100644 --- a/sys/sys/sched.h +++ b/sys/sys/sched.h @@ -226,6 +226,11 @@ SYSINIT(name, SI_SUB_LAST, SI_ORDER_MIDDLE, name ## _add_proc, NULL); * Fixup scheduler state for proc0 and thread0 */ void schedinit(void); + +/* + * Fixup scheduler state for secondary APs + */ +void schedinit_ap(void); #endif /* _KERNEL */ /* POSIX 1003.1b Process Scheduling */ diff --git a/sys/x86/x86/mp_x86.c b/sys/x86/x86/mp_x86.c index ca1125886619..1fac244cbed7 100644 --- a/sys/x86/x86/mp_x86.c +++ b/sys/x86/x86/mp_x86.c @@ -1040,6 +1040,7 @@ init_secondary_tail(void) /* Initialize curthread. */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); PCPU_SET(curthread, PCPU_GET(idlethread)); + schedinit_ap(); mtx_lock_spin(&ap_boot_mtx);