PERFORCE change 129909 for review
Peter Wemm
peter at FreeBSD.org
Sat Dec 1 14:32:50 PST 2007
http://perforce.freebsd.org/chv.cgi?CH=129909
Change 129909 by peter at peter_daintree on 2007/12/01 22:30:49
Revert to vendor. too painful to merge, will redo.
Affected files ...
.. //depot/projects/bike_sched/sys/kern/sched_4bsd.c#8 integrate
.. //depot/projects/bike_sched/sys/kern/sched_ule.c#6 integrate
Differences ...
==== //depot/projects/bike_sched/sys/kern/sched_4bsd.c#8 (text+ko) ====
@@ -33,12 +33,10 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/sched_4bsd.c,v 1.86 2006/07/02 20:53:52 maxim Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/sched_4bsd.c,v 1.110 2007/11/14 06:21:22 julian Exp $");
#include "opt_hwpmc_hooks.h"
-#define kse td_sched
-
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@@ -53,6 +51,7 @@
#include <sys/sysctl.h>
#include <sys/sx.h>
#include <sys/turnstile.h>
+#include <sys/umtx.h>
#include <machine/pcb.h>
#include <machine/smp.h>
@@ -76,56 +75,40 @@
/*
* The schedulable entity that runs a context.
- * A process may have several of these. Probably one per processor
- * but possibly a few more.
+ * This is an extension to the thread structure and is tailored to
+ * the requirements of this scheduler
*/
-struct kse {
- TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */
- struct thread *ke_thread; /* (*) Active associated thread. */
- fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */
- u_char ke_rqindex; /* (j) Run queue index. */
- enum {
- KES_THREAD = 0x0, /* slaved to thread state */
- KES_ONRUNQ
- } ke_state; /* (j) KSE status. */
- int ke_cpticks; /* (j) Ticks of cpu time. */
- struct runq *ke_runq; /* runq the kse is currently on */
+struct td_sched {
+ TAILQ_ENTRY(td_sched) ts_procq; /* (j/z) Run queue. */
+ struct thread *ts_thread; /* (*) Active associated thread. */
+ fixpt_t ts_pctcpu; /* (j) %cpu during p_swtime. */
+ u_char ts_rqindex; /* (j) Run queue index. */
+ int ts_cpticks; /* (j) Ticks of cpu time. */
+ int ts_slptime; /* (j) Seconds !RUNNING. */
+ struct runq *ts_runq; /* runq the thread is currently on */
};
-#define td_kse td_sched
-
/* flags kept in td_flags */
-#define TDF_DIDRUN TDF_SCHED0 /* KSE actually ran. */
-#define TDF_EXIT TDF_SCHED1 /* KSE is being killed. */
+#define TDF_DIDRUN TDF_SCHED0 /* thread actually ran. */
+#define TDF_EXIT TDF_SCHED1 /* thread is being killed. */
#define TDF_BOUND TDF_SCHED2
-#define ke_flags ke_thread->td_flags
-#define KEF_DIDRUN TDF_DIDRUN /* KSE actually ran. */
-#define KEF_EXIT TDF_EXIT /* KSE is being killed. */
-#define KEF_BOUND TDF_BOUND /* stuck to one CPU */
+#define ts_flags ts_thread->td_flags
+#define TSF_DIDRUN TDF_DIDRUN /* thread actually ran. */
+#define TSF_EXIT TDF_EXIT /* thread is being killed. */
+#define TSF_BOUND TDF_BOUND /* stuck to one CPU */
-#define SKE_RUNQ_PCPU(ke) \
- ((ke)->ke_runq != 0 && (ke)->ke_runq != &runq)
+#define SKE_RUNQ_PCPU(ts) \
+ ((ts)->ts_runq != 0 && (ts)->ts_runq != &runq)
-/*
- * KSE_CAN_MIGRATE macro returns true if the kse can migrate between
- * cpus.
- */
-#define KSE_CAN_MIGRATE(ke) \
- ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0)
-
-static struct kse kse0;
+static struct td_sched td_sched0;
+struct mtx sched_lock;
static int sched_tdcnt; /* Total runnable threads in the system. */
static int sched_quantum; /* Roundrobin scheduling quantum in ticks. */
#define SCHED_QUANTUM (hz / 10) /* Default sched quantum */
-static struct callout roundrobin_callout;
-
-static struct thread *sched_choose(void);
-
static void setup_runqs(void);
-static void roundrobin(void *arg);
static void schedcpu(void);
static void schedcpu_thread(void);
static void sched_priority(struct thread *td, u_char prio);
@@ -236,6 +219,12 @@
"account for htt");
#endif
+#if 0
+static int sched_followon = 0;
+SYSCTL_INT(_kern_sched, OID_AUTO, followon, CTLFLAG_RW,
+ &sched_followon, 0,
+ "allow threads to share a quantum");
+#endif
static __inline void
sched_load_add(void)
@@ -258,36 +247,15 @@
maybe_resched(struct thread *td)
{
- mtx_assert(&sched_lock, MA_OWNED);
+ THREAD_LOCK_ASSERT(td, MA_OWNED);
if (td->td_priority < curthread->td_priority)
curthread->td_flags |= TDF_NEEDRESCHED;
}
/*
- * Force switch among equal priority processes every 100ms.
- * We don't actually need to force a context switch of the current process.
- * The act of firing the event triggers a context switch to softclock() and
- * then switching back out again which is equivalent to a preemption, thus
- * no further work is needed on the local CPU.
- */
-/* ARGSUSED */
-static void
-roundrobin(void *arg)
-{
-
-#ifdef SMP
- mtx_lock_spin(&sched_lock);
- forward_roundrobin();
- mtx_unlock_spin(&sched_lock);
-#endif
-
- callout_reset(&roundrobin_callout, sched_quantum, roundrobin, NULL);
-}
-
-/*
* Constants for digital decay and forget:
* 90% of (td_estcpu) usage in 5 * loadav time
- * 95% of (ke_pctcpu) usage in 60 seconds (load insensitive)
+ * 95% of (ts_pctcpu) usage in 60 seconds (load insensitive)
* Note that, as ps(1) mentions, this can let percentages
* total over 100% (I've seen 137.9% for 3 processes).
*
@@ -352,7 +320,7 @@
#define loadfactor(loadav) (2 * (loadav))
#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
-/* decay 95% of `ke_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
+/* decay 95% of `ts_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, "");
@@ -381,77 +349,70 @@
register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
struct thread *td;
struct proc *p;
- struct kse *ke;
+ struct td_sched *ts;
int awake, realstathz;
realstathz = stathz ? stathz : hz;
sx_slock(&allproc_lock);
FOREACH_PROC_IN_SYSTEM(p) {
- /*
- * Prevent state changes and protect run queue.
- */
- mtx_lock_spin(&sched_lock);
- /*
- * Increment time in/out of memory. We ignore overflow; with
- * 16-bit int's (remember them?) overflow takes 45 days.
- */
- p->p_swtime++;
+ PROC_SLOCK(p);
FOREACH_THREAD_IN_PROC(p, td) {
awake = 0;
- ke = td->td_kse;
+ thread_lock(td);
+ ts = td->td_sched;
/*
* Increment sleep time (if sleeping). We
* ignore overflow, as above.
*/
/*
- * The kse slptimes are not touched in wakeup
- * because the thread may not HAVE a KSE.
+ * The td_sched slptimes are not touched in wakeup
+ * because the thread may not HAVE everything in
+ * memory? XXX I think this is out of date.
*/
- if (ke->ke_state == KES_ONRUNQ) {
+ if (TD_ON_RUNQ(td)) {
awake = 1;
- ke->ke_flags &= ~KEF_DIDRUN;
- } else if ((ke->ke_state == KES_THREAD) &&
- (TD_IS_RUNNING(td))) {
+ ts->ts_flags &= ~TSF_DIDRUN;
+ } else if (TD_IS_RUNNING(td)) {
awake = 1;
- /* Do not clear KEF_DIDRUN */
- } else if (ke->ke_flags & KEF_DIDRUN) {
+ /* Do not clear TSF_DIDRUN */
+ } else if (ts->ts_flags & TSF_DIDRUN) {
awake = 1;
- ke->ke_flags &= ~KEF_DIDRUN;
+ ts->ts_flags &= ~TSF_DIDRUN;
}
/*
- * ke_pctcpu is only for ps and ttyinfo().
- * Do it per kse, and add them up at the end?
+ * ts_pctcpu is only for ps and ttyinfo().
+ * Do it per td_sched, and add them up at the end?
* XXXKSE
*/
- ke->ke_pctcpu = (ke->ke_pctcpu * ccpu) >>
- FSHIFT;
+ ts->ts_pctcpu = (ts->ts_pctcpu * ccpu) >> FSHIFT;
/*
- * If the kse has been idle the entire second,
+ * If the td_sched has been idle the entire second,
* stop recalculating its priority until
* it wakes up.
*/
- if (ke->ke_cpticks == 0)
- continue;
+ if (ts->ts_cpticks != 0) {
#if (FSHIFT >= CCPU_SHIFT)
- ke->ke_pctcpu += (realstathz == 100)
- ? ((fixpt_t) ke->ke_cpticks) <<
- (FSHIFT - CCPU_SHIFT) :
- 100 * (((fixpt_t) ke->ke_cpticks)
- << (FSHIFT - CCPU_SHIFT)) / realstathz;
+ ts->ts_pctcpu += (realstathz == 100)
+ ? ((fixpt_t) ts->ts_cpticks) <<
+ (FSHIFT - CCPU_SHIFT) :
+ 100 * (((fixpt_t) ts->ts_cpticks)
+ << (FSHIFT - CCPU_SHIFT)) / realstathz;
#else
- ke->ke_pctcpu += ((FSCALE - ccpu) *
- (ke->ke_cpticks *
- FSCALE / realstathz)) >> FSHIFT;
+ ts->ts_pctcpu += ((FSCALE - ccpu) *
+ (ts->ts_cpticks *
+ FSCALE / realstathz)) >> FSHIFT;
#endif
- ke->ke_cpticks = 0;
-
+ ts->ts_cpticks = 0;
+ }
/*
* If there are ANY running threads in this process,
* then don't count it as sleeping.
+XXX this is broken
+
*/
if (awake) {
- if (td->td_slptime > 1) {
+ if (ts->ts_slptime > 1) {
/*
* In an ideal world, this should not
* happen, because whoever woke us
@@ -463,16 +424,19 @@
*/
updatepri(td);
}
- td->td_slptime = 0;
+ ts->ts_slptime = 0;
} else
- td->td_slptime++;
- if (td->td_slptime > 1)
+ ts->ts_slptime++;
+ if (ts->ts_slptime > 1) {
+ thread_unlock(td);
continue;
+ }
td->td_estcpu = decay_cpu(loadfac, td->td_estcpu);
resetpriority(td);
resetpriority_thread(td);
+ thread_unlock(td);
} /* end of thread loop */
- mtx_unlock_spin(&sched_lock);
+ PROC_SUNLOCK(p);
} /* end of process loop */
sx_sunlock(&allproc_lock);
}
@@ -483,11 +447,10 @@
static void
schedcpu_thread(void)
{
- int nowake;
for (;;) {
schedcpu();
- tsleep(&nowake, 0, "-", hz);
+ pause("-", hz);
}
}
@@ -499,16 +462,18 @@
static void
updatepri(struct thread *td)
{
- register fixpt_t loadfac;
- register unsigned int newcpu;
+ struct td_sched *ts;
+ fixpt_t loadfac;
+ unsigned int newcpu;
+ ts = td->td_sched;
loadfac = loadfactor(averunnable.ldavg[0]);
- if (td->td_slptime > 5 * loadfac)
+ if (ts->ts_slptime > 5 * loadfac)
td->td_estcpu = 0;
else {
newcpu = td->td_estcpu;
- td->td_slptime--; /* was incremented in schedcpu() */
- while (newcpu && --td->td_slptime)
+ ts->ts_slptime--; /* was incremented in schedcpu() */
+ while (newcpu && --ts->ts_slptime)
newcpu = decay_cpu(loadfac, newcpu);
td->td_estcpu = newcpu;
}
@@ -529,12 +494,12 @@
NICE_WEIGHT * (td->td_proc->p_nice - PRIO_MIN);
newpriority = min(max(newpriority, PRI_MIN_TIMESHARE),
PRI_MAX_TIMESHARE);
- td->td_user_pri = newpriority;
+ sched_user_prio(td, newpriority);
}
}
/*
- * Update the thread's priority when the associated ksegroup's user
+ * Update the thread's priority when the associated process's user
* priority changes.
*/
static void
@@ -562,11 +527,6 @@
sched_quantum = SCHED_QUANTUM;
hogticks = 2 * sched_quantum;
- callout_init(&roundrobin_callout, CALLOUT_MPSAFE);
-
- /* Kick off timeout driven events by calling first time. */
- roundrobin(NULL);
-
/* Account for thread0. */
sched_load_add();
}
@@ -585,9 +545,10 @@
* Set up the scheduler specific parts of proc0.
*/
proc0.p_sched = NULL; /* XXX */
- thread0.td_sched = &kse0;
- kse0.ke_thread = &thread0;
- kse0.ke_state = KES_THREAD;
+ thread0.td_sched = &td_sched0;
+ thread0.td_lock = &sched_lock;
+ td_sched0.ts_thread = &thread0;
+ mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE);
}
int
@@ -625,17 +586,25 @@
void
sched_clock(struct thread *td)
{
- struct kse *ke;
+ struct td_sched *ts;
- mtx_assert(&sched_lock, MA_OWNED);
- ke = td->td_kse;
+ THREAD_LOCK_ASSERT(td, MA_OWNED);
+ ts = td->td_sched;
- ke->ke_cpticks++;
+ ts->ts_cpticks++;
td->td_estcpu = ESTCPULIM(td->td_estcpu + 1);
if ((td->td_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) {
resetpriority(td);
resetpriority_thread(td);
}
+
+ /*
+ * Force a context switch if the current thread has used up a full
+ * quantum (default quantum is 100ms).
+ */
+ if (!TD_IS_IDLETHREAD(td) &&
+ ticks - PCPU_GET(switchticks) >= sched_quantum)
+ td->td_flags |= TDF_NEEDRESCHED;
}
/*
@@ -644,20 +613,39 @@
void
sched_exit(struct proc *p, struct thread *td)
{
- struct thread *parent = FIRST_THREAD_IN_PROC(p);
CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d",
- td, td->td_proc->p_comm, td->td_priority);
+ td, td->td_name, td->td_priority);
+ PROC_SLOCK_ASSERT(p, MA_OWNED);
+ sched_exit_thread(FIRST_THREAD_IN_PROC(p), td);
+}
+
+void
+sched_exit_thread(struct thread *td, struct thread *child)
+{
- parent->td_estcpu = ESTCPULIM(parent->td_estcpu + td->td_estcpu);
- if ((td->td_proc->p_flag & P_NOLOAD) == 0)
+ CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d",
+ child, child->td_name, child->td_priority);
+ thread_lock(td);
+ td->td_estcpu = ESTCPULIM(td->td_estcpu + child->td_estcpu);
+ thread_unlock(td);
+ mtx_lock_spin(&sched_lock);
+ if ((child->td_proc->p_flag & P_NOLOAD) == 0)
sched_load_rem();
+ mtx_unlock_spin(&sched_lock);
}
void
sched_fork(struct thread *td, struct thread *childtd)
{
+ sched_fork_thread(td, childtd);
+}
+
+void
+sched_fork_thread(struct thread *td, struct thread *childtd)
+{
childtd->td_estcpu = td->td_estcpu;
+ childtd->td_lock = &sched_lock;
sched_newthread(childtd);
}
@@ -667,18 +655,20 @@
struct thread *td;
PROC_LOCK_ASSERT(p, MA_OWNED);
- mtx_assert(&sched_lock, MA_OWNED);
+ PROC_SLOCK_ASSERT(p, MA_OWNED);
p->p_nice = nice;
FOREACH_THREAD_IN_PROC(p, td) {
+ thread_lock(td);
resetpriority(td);
resetpriority_thread(td);
+ thread_unlock(td);
}
}
void
sched_class(struct thread *td, int class)
{
- mtx_assert(&sched_lock, MA_OWNED);
+ THREAD_LOCK_ASSERT(td, MA_OWNED);
td->td_pri_class = class;
}
@@ -689,16 +679,17 @@
sched_priority(struct thread *td, u_char prio)
{
CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)",
- td, td->td_proc->p_comm, td->td_priority, prio, curthread,
- curthread->td_proc->p_comm);
+ td, td->td_name, td->td_priority, prio, curthread,
+ curthread->td_name);
- mtx_assert(&sched_lock, MA_OWNED);
+ THREAD_LOCK_ASSERT(td, MA_OWNED);
if (td->td_priority == prio)
return;
- if (TD_ON_RUNQ(td)) {
- adjustrunqueue(td, prio);
- } else {
- td->td_priority = prio;
+ td->td_priority = prio;
+ if (TD_ON_RUNQ(td) &&
+ td->td_sched->ts_rqindex != (prio / RQ_PPQ)) {
+ sched_rem(td);
+ sched_add(td, SRQ_BORING);
}
}
@@ -767,26 +758,78 @@
}
void
+sched_user_prio(struct thread *td, u_char prio)
+{
+ u_char oldprio;
+
+ td->td_base_user_pri = prio;
+ if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio)
+ return;
+ oldprio = td->td_user_pri;
+ td->td_user_pri = prio;
+
+ if (TD_ON_UPILOCK(td) && oldprio != prio)
+ umtx_pi_adjust(td, oldprio);
+}
+
+void
+sched_lend_user_prio(struct thread *td, u_char prio)
+{
+ u_char oldprio;
+
+ td->td_flags |= TDF_UBORROWING;
+
+ oldprio = td->td_user_pri;
+ td->td_user_pri = prio;
+
+ if (TD_ON_UPILOCK(td) && oldprio != prio)
+ umtx_pi_adjust(td, oldprio);
+}
+
+void
+sched_unlend_user_prio(struct thread *td, u_char prio)
+{
+ u_char base_pri;
+
+ base_pri = td->td_base_user_pri;
+ if (prio >= base_pri) {
+ td->td_flags &= ~TDF_UBORROWING;
+ sched_user_prio(td, base_pri);
+ } else
+ sched_lend_user_prio(td, prio);
+}
+
+void
sched_sleep(struct thread *td)
{
- mtx_assert(&sched_lock, MA_OWNED);
- td->td_slptime = 0;
+ THREAD_LOCK_ASSERT(td, MA_OWNED);
+ td->td_slptick = ticks;
+ td->td_sched->ts_slptime = 0;
}
void
sched_switch(struct thread *td, struct thread *newtd, int flags)
{
- struct kse *ke;
+ struct td_sched *ts;
struct proc *p;
- ke = td->td_kse;
+ ts = td->td_sched;
p = td->td_proc;
- mtx_assert(&sched_lock, MA_OWNED);
+ THREAD_LOCK_ASSERT(td, MA_OWNED);
+ /*
+ * Switch to the sched lock to fix things up and pick
+ * a new thread.
+ */
+ if (td->td_lock != &sched_lock) {
+ mtx_lock_spin(&sched_lock);
+ thread_unlock(td);
+ }
if ((p->p_flag & P_NOLOAD) == 0)
sched_load_rem();
+
if (newtd)
newtd->td_flags |= (td->td_flags & TDF_NEEDRESCHED);
@@ -800,12 +843,15 @@
* or stopped or any thing else similar. We never put the idle
* threads on the run queue, however.
*/
- if (td == PCPU_GET(idlethread))
+ if (td->td_flags & TDF_IDLETD) {
TD_SET_CAN_RUN(td);
- else {
+#ifdef SMP
+ idle_cpus_mask &= ~PCPU_GET(cpumask);
+#endif
+ } else {
if (TD_IS_RUNNING(td)) {
- /* Put us back on the run queue (kse and all). */
- setrunqueue(td, (flags & SW_PREEMPT) ?
+ /* Put us back on the run queue. */
+ sched_add(td, (flags & SW_PREEMPT) ?
SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED :
SRQ_OURSELF|SRQ_YIELDING);
}
@@ -816,45 +862,72 @@
* as if it had been added to the run queue and selected.
* It came from:
* * A preemption
+ * * An upcall
* * A followon
*/
KASSERT((newtd->td_inhibitors == 0),
- ("trying to run inhibitted thread"));
- newtd->td_kse->ke_flags |= KEF_DIDRUN;
+ ("trying to run inhibited thread"));
+ newtd->td_sched->ts_flags |= TSF_DIDRUN;
TD_SET_RUNNING(newtd);
if ((newtd->td_proc->p_flag & P_NOLOAD) == 0)
sched_load_add();
} else {
newtd = choosethread();
}
+ MPASS(newtd->td_lock == &sched_lock);
if (td != newtd) {
#ifdef HWPMC_HOOKS
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
#endif
- cpu_switch(td, newtd);
+
+ /* I feel sleepy */
+ cpu_switch(td, newtd, td->td_lock);
+ /*
+ * Where am I? What year is it?
+ * We are in the same thread that went to sleep above,
+ * but any amount of time may have passed. All out context
+ * will still be available as will local variables.
+ * PCPU values however may have changed as we may have
+ * changed CPU so don't trust cached values of them.
+ * New threads will go to fork_exit() instead of here
+ * so if you change things here you may need to change
+ * things there too.
+ * If the thread above was exiting it will never wake
+ * up again here, so either it has saved everything it
+ * needed to, or the thread_wait() or wait() will
+ * need to reap it.
+ */
#ifdef HWPMC_HOOKS
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);
#endif
}
+#ifdef SMP
+ if (td->td_flags & TDF_IDLETD)
+ idle_cpus_mask |= PCPU_GET(cpumask);
+#endif
sched_lock.mtx_lock = (uintptr_t)td;
td->td_oncpu = PCPU_GET(cpuid);
+ MPASS(td->td_lock == &sched_lock);
}
void
sched_wakeup(struct thread *td)
{
+ struct td_sched *ts;
- mtx_assert(&sched_lock, MA_OWNED);
- if (td->td_slptime > 1) {
+ THREAD_LOCK_ASSERT(td, MA_OWNED);
+ ts = td->td_sched;
+ if (ts->ts_slptime > 1) {
updatepri(td);
resetpriority(td);
}
- td->td_slptime = 0;
- setrunqueue(td, SRQ_BORING);
+ td->td_slptick = ticks;
+ ts->ts_slptime = 0;
+ sched_add(td, SRQ_BORING);
}
#ifdef SMP
@@ -984,45 +1057,56 @@
sched_add(struct thread *td, int flags)
#ifdef SMP
{
- struct kse *ke;
+ struct td_sched *ts;
int forwarded = 0;
int cpu;
int single_cpu = 0;
- ke = td->td_kse;
- mtx_assert(&sched_lock, MA_OWNED);
- KASSERT(ke->ke_state != KES_ONRUNQ,
- ("sched_add: kse %p (%s) already in run queue", ke,
- td->td_proc->p_comm));
- KASSERT(td->td_proc->p_sflag & PS_INMEM,
- ("sched_add: process swapped out"));
+ ts = td->td_sched;
+ THREAD_LOCK_ASSERT(td, MA_OWNED);
+ KASSERT((td->td_inhibitors == 0),
+ ("sched_add: trying to run inhibited thread"));
+ KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
+ ("sched_add: bad thread state"));
+ KASSERT(td->td_flags & TDF_INMEM,
+ ("sched_add: thread swapped out"));
CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)",
- td, td->td_proc->p_comm, td->td_priority, curthread,
- curthread->td_proc->p_comm);
+ td, td->td_name, td->td_priority, curthread,
+ curthread->td_name);
+ /*
+ * Now that the thread is moving to the run-queue, set the lock
+ * to the scheduler's lock.
+ */
+ if (td->td_lock != &sched_lock) {
+ mtx_lock_spin(&sched_lock);
+ thread_lock_set(td, &sched_lock);
+ }
+ TD_SET_RUNQ(td);
if (td->td_pinned != 0) {
cpu = td->td_lastcpu;
- ke->ke_runq = &runq_pcpu[cpu];
+ ts->ts_runq = &runq_pcpu[cpu];
single_cpu = 1;
CTR3(KTR_RUNQ,
- "sched_add: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu);
- } else if ((ke)->ke_flags & KEF_BOUND) {
+ "sched_add: Put td_sched:%p(td:%p) on cpu%d runq", ts, td, cpu);
+ } else if ((ts)->ts_flags & TSF_BOUND) {
/* Find CPU from bound runq */
- KASSERT(SKE_RUNQ_PCPU(ke),("sched_add: bound kse not on cpu runq"));
- cpu = ke->ke_runq - &runq_pcpu[0];
+ KASSERT(SKE_RUNQ_PCPU(ts),("sched_add: bound td_sched not on cpu runq"));
+ cpu = ts->ts_runq - &runq_pcpu[0];
single_cpu = 1;
CTR3(KTR_RUNQ,
- "sched_add: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu);
+ "sched_add: Put td_sched:%p(td:%p) on cpu%d runq", ts, td, cpu);
} else {
CTR2(KTR_RUNQ,
- "sched_add: adding kse:%p (td:%p) to gbl runq", ke, td);
+ "sched_add: adding td_sched:%p (td:%p) to gbl runq", ts, td);
cpu = NOCPU;
- ke->ke_runq = &runq;
+ ts->ts_runq = &runq;
}
if (single_cpu && (cpu != PCPU_GET(cpuid))) {
kick_other_cpu(td->td_priority,cpu);
} else {
+
if (!single_cpu) {
cpumask_t me = PCPU_GET(cpumask);
int idle = idle_cpus_mask & me;
@@ -1031,6 +1115,7 @@
(idle_cpus_mask & ~(hlt_cpus_mask | me)))
forwarded = forward_wakeup(cpu);
}
+
if (!forwarded) {
if ((flags & SRQ_YIELDING) == 0 && maybe_preempt(td))
return;
@@ -1041,24 +1126,33 @@
if ((td->td_proc->p_flag & P_NOLOAD) == 0)
sched_load_add();
- runq_add(ke->ke_runq, ke, flags);
- ke->ke_state = KES_ONRUNQ;
+ runq_add(ts->ts_runq, ts, flags);
}
#else /* SMP */
{
- struct kse *ke;
- ke = td->td_kse;
- mtx_assert(&sched_lock, MA_OWNED);
- KASSERT(ke->ke_state != KES_ONRUNQ,
- ("sched_add: kse %p (%s) already in run queue", ke,
- td->td_proc->p_comm));
- KASSERT(td->td_proc->p_sflag & PS_INMEM,
- ("sched_add: process swapped out"));
+ struct td_sched *ts;
+ ts = td->td_sched;
+ THREAD_LOCK_ASSERT(td, MA_OWNED);
+ KASSERT((td->td_inhibitors == 0),
+ ("sched_add: trying to run inhibited thread"));
+ KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
+ ("sched_add: bad thread state"));
+ KASSERT(td->td_flags & TDF_INMEM,
+ ("sched_add: thread swapped out"));
CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)",
- td, td->td_proc->p_comm, td->td_priority, curthread,
- curthread->td_proc->p_comm);
- CTR2(KTR_RUNQ, "sched_add: adding kse:%p (td:%p) to runq", ke, td);
- ke->ke_runq = &runq;
+ td, td->td_name, td->td_priority, curthread,
+ curthread->td_name);
+ /*
+ * Now that the thread is moving to the run-queue, set the lock
+ * to the scheduler's lock.
+ */
+ if (td->td_lock != &sched_lock) {
+ mtx_lock_spin(&sched_lock);
+ thread_lock_set(td, &sched_lock);
+ }
+ TD_SET_RUNQ(td);
+ CTR2(KTR_RUNQ, "sched_add: adding td_sched:%p (td:%p) to runq", ts, td);
+ ts->ts_runq = &runq;
/*
* If we are yielding (on the way out anyhow)
@@ -1077,70 +1171,30 @@
}
if ((td->td_proc->p_flag & P_NOLOAD) == 0)
sched_load_add();
- runq_add(ke->ke_runq, ke, flags);
- ke->ke_state = KES_ONRUNQ;
+ runq_add(ts->ts_runq, ts, flags);
maybe_resched(td);
}
#endif /* SMP */
void
-sched_run_ithread(struct thread *td)
-{
- struct kse *ke = td->td_kse;
-
- /* Inline of setrunqueue */
- CTR2(KTR_RUNQ, "sched_run_ithread: td:%p pid:%d",
- td, td->td_proc->p_pid);
- CTR5(KTR_SCHED, "sched_run_ithread: %p(%s) prio %d by %p(%s)",
- td, td->td_proc->p_comm, td->td_priority, ctd,
- ctd->td_proc->p_comm);
- mtx_assert(&sched_lock, MA_OWNED);
- KASSERT((td->td_inhibitors == 0),
- ("sched_run_ithread: trying to run inhibitted thread"));
- KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
- ("sched_run_ithread: bad thread state"));
- KASSERT(ke->ke_state != KES_ONRUNQ,
- ("sched_run_ithread: kse %p (%s) already in run queue", ke,
- td->td_proc->p_comm));
- KASSERT(td->td_proc->p_sflag & PS_INMEM,
- ("sched_run_ithread: process swapped out"));
- CTR5(KTR_SCHED, "sched_run_ithread: %p(%s) prio %d by %p(%s)",
- td, td->td_proc->p_comm, td->td_priority, curthread,
- curthread->td_proc->p_comm);
- CTR2(KTR_RUNQ, "sched_run_ithread: adding kse:%p (td:%p) to runq", ke, td);
-
- TD_SET_RUNQ(td);
- ke->ke_runq = &runq;
- /* Preempt if we can. If we did, we're finished */
- if (maybe_preempt(td))
- return;
- /* We didn't preempt. Place on runq */
- if ((td->td_proc->p_flag & P_NOLOAD) == 0)
- sched_load_add();
- runq_add(ke->ke_runq, ke, SRQ_INTR);
- ke->ke_state = KES_ONRUNQ;
- maybe_resched(td);
-}
-
-void
sched_rem(struct thread *td)
{
- struct kse *ke;
+ struct td_sched *ts;
- ke = td->td_kse;
- KASSERT(td->td_proc->p_sflag & PS_INMEM,
- ("sched_rem: process swapped out"));
- KASSERT((ke->ke_state == KES_ONRUNQ),
- ("sched_rem: KSE not on run queue"));
+ ts = td->td_sched;
+ KASSERT(td->td_flags & TDF_INMEM,
+ ("sched_rem: thread swapped out"));
+ KASSERT(TD_ON_RUNQ(td),
+ ("sched_rem: thread not on run queue"));
mtx_assert(&sched_lock, MA_OWNED);
CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)",
- td, td->td_proc->p_comm, td->td_priority, curthread,
- curthread->td_proc->p_comm);
+ td, td->td_name, td->td_priority, curthread,
+ curthread->td_name);
if ((td->td_proc->p_flag & P_NOLOAD) == 0)
sched_load_rem();
- runq_remove(ke->ke_runq, ke);
- ke->ke_state = KES_THREAD;
+ runq_remove(ts->ts_runq, ts);
+ TD_SET_CAN_RUN(td);
}
/*
@@ -1150,58 +1204,83 @@
struct thread *
sched_choose(void)
{
- struct kse *ke;
+ struct td_sched *ts;
struct runq *rq;
+ mtx_assert(&sched_lock, MA_OWNED);
#ifdef SMP
- struct kse *kecpu;
+ struct td_sched *kecpu;
rq = &runq;
- ke = runq_choose(&runq);
+ ts = runq_choose(&runq);
kecpu = runq_choose(&runq_pcpu[PCPU_GET(cpuid)]);
- if (ke == NULL ||
+ if (ts == NULL ||
(kecpu != NULL &&
- kecpu->ke_thread->td_priority < ke->ke_thread->td_priority)) {
- CTR2(KTR_RUNQ, "choosing kse %p from pcpu runq %d", kecpu,
+ kecpu->ts_thread->td_priority < ts->ts_thread->td_priority)) {
+ CTR2(KTR_RUNQ, "choosing td_sched %p from pcpu runq %d", kecpu,
PCPU_GET(cpuid));
- ke = kecpu;
+ ts = kecpu;
rq = &runq_pcpu[PCPU_GET(cpuid)];
} else {
- CTR1(KTR_RUNQ, "choosing kse %p from main runq", ke);
+ CTR1(KTR_RUNQ, "choosing td_sched %p from main runq", ts);
}
#else
rq = &runq;
- ke = runq_choose(&runq);
+ ts = runq_choose(&runq);
#endif
- if (ke) {
- runq_remove(rq, ke);
- ke->ke_state = KES_THREAD;
+ if (ts) {
+ runq_remove(rq, ts);
+ ts->ts_flags |= TSF_DIDRUN;
+
+ KASSERT(ts->ts_thread->td_flags & TDF_INMEM,
+ ("sched_choose: thread swapped out"));
+ return (ts->ts_thread);
+ }
+ return (PCPU_GET(idlethread));
+}
- KASSERT(ke->ke_thread->td_proc->p_sflag & PS_INMEM,
- ("sched_choose: process swapped out"));
- return (ke->ke_thread);
+void
+sched_userret(struct thread *td)
+{
+ /*
+ * XXX we cheat slightly on the locking here to avoid locking in
+ * the usual case. Setting td_priority here is essentially an
+ * incomplete workaround for not setting it properly elsewhere.
+ * Now that some interrupt handlers are threads, not setting it
+ * properly elsewhere can clobber it in the window between setting
+ * it here and returning to user mode, so don't waste time setting
+ * it perfectly here.
+ */
+ KASSERT((td->td_flags & TDF_BORROWING) == 0,
+ ("thread with borrowed priority returning to userland"));
+ if (td->td_priority != td->td_user_pri) {
+ thread_lock(td);
+ td->td_priority = td->td_user_pri;
+ td->td_base_pri = td->td_user_pri;
+ thread_unlock(td);
}
- return (NULL);
}
void
sched_bind(struct thread *td, int cpu)
{
- struct kse *ke;
+ struct td_sched *ts;
- mtx_assert(&sched_lock, MA_OWNED);
+ THREAD_LOCK_ASSERT(td, MA_OWNED);
KASSERT(TD_IS_RUNNING(td),
("sched_bind: cannot bind non-running thread"));
- ke = td->td_kse;
- ke->ke_flags |= KEF_BOUND;
+
+ ts = td->td_sched;
+
+ ts->ts_flags |= TSF_BOUND;
#ifdef SMP
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list