PERFORCE change 99640 for review
Kip Macy
kmacy at FreeBSD.org
Mon Jun 19 21:37:02 UTC 2006
http://perforce.freebsd.org/chv.cgi?CH=99640
Change 99640 by kmacy at kmacy_storage:sun4v_work_sleepq on 2006/06/19 21:36:23
add per-cpu runqueues
Affected files ...
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_idle.c#6 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_switch.c#8 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/sched_4bsd.c#8 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/subr_sleepqueue.c#7 edit
.. //depot/projects/kmacy_sun4v/src/sys/sys/runq.h#4 edit
.. //depot/projects/kmacy_sun4v/src/sys/sys/sched.h#4 edit
Differences ...
==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_idle.c#6 (text+ko) ====
@@ -94,36 +94,29 @@
static void
idle_proc(void *dummy)
{
- struct proc *p;
- struct thread *td;
+ int i;
#ifdef SMP
cpumask_t mycpu;
#endif
- td = curthread;
- p = td->td_proc;
#ifdef SMP
mycpu = PCPU_GET(cpumask);
atomic_set_int(&idle_cpus_mask, mycpu);
#endif
+ i = 0;
for (;;) {
mtx_assert(&Giant, MA_NOTOWNED);
- while (sched_runnable() == 0)
+ while (sched_runnable() == 0 && (i++%10 != 0))
cpu_idle();
#ifdef SMP
atomic_clear_int(&idle_cpus_mask, mycpu);
#endif
- spinlock_enter(); /* avoid preemption after choosethread */
- if ((td = choosethread()) != curthread) {
- mtx_lock_spin(&sched_lock);
- spinlock_exit();
- sched_switch(curthread, td, SW_VOL);
- mtx_unlock_spin(&sched_lock);
- } else
- spinlock_exit();
+ mtx_lock_spin(&sched_lock);
+ mi_switch(SW_VOL, NULL);
+ mtx_unlock_spin(&sched_lock);
#ifdef SMP
atomic_set_int(&idle_cpus_mask, mycpu);
#endif
==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_switch.c#8 (text+ko) ====
@@ -47,11 +47,7 @@
#if defined(SMP) && defined(SCHED_4BSD)
#include <sys/sysctl.h>
#endif
-#ifndef SMP
-#error "use SMP!"
-#define runq_lock(a, b)
-#define runq_unlock(a, b)
-#endif
+
/* Uncomment this to enable logging of critical_enter/exit. */
#if 0
@@ -80,6 +76,7 @@
#else
static int kern_sched_preemption = 0;
#endif
+
SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD,
&kern_sched_preemption, 0, "Kernel preemption enabled");
@@ -139,7 +136,10 @@
adjustrunqueue(struct thread *td, int newpri)
{
struct kse *ke;
+ int cpu, single_cpu;
+ struct runq *rq;
+ single_cpu = 0;
mtx_assert(&sched_lock, MA_OWNED);
KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue"));
@@ -147,15 +147,42 @@
CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td);
/* We only care about the kse in the run queue. */
td->td_priority = newpri;
+
if (ke->ke_rqindex != (newpri / RQ_PPQ)) {
+ if (td->td_pinned != 0) {
+ cpu = td->td_lastcpu;
+ rq = &runq_pcpu[cpu];
+ single_cpu = 1;
+ CTR3(KTR_RUNQ,
+ "setrunqueue: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu);
+ } else if ((ke)->ke_flags & KEF_BOUND) {
+ /* Find CPU from bound runq */
+ rq = ke->ke_runq;
+ cpu = ke->ke_runq - &runq_pcpu[0];
+ single_cpu = 1;
+ CTR3(KTR_RUNQ,
+ "setrunqueue: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu);
+ } else {
+ CTR2(KTR_RUNQ,
+ "setrunqueue: adding kse:%p (td:%p) to gbl runq", ke, td);
+ cpu = NOCPU;
+ rq = &runq_global[td->td_lastcpu];
+ }
+ KASSERT(rq == ke->ke_runq, ("runq mismatch"));
sched_rem(td);
- sched_add(td, SRQ_BORING);
+ sched_add(td, SRQ_BORING, cpu, single_cpu);
}
}
void
setrunqueue(struct thread *td, int flags)
{
+ struct runq *rq;
+ struct kse *ke;
+ int cpu, single_cpu;
+
+ ke = td->td_kse;
+ single_cpu = 0;
CTR2(KTR_RUNQ, "setrunqueue: td:%p pid:%d",
td, td->td_proc->p_pid);
@@ -167,7 +194,30 @@
("setrunqueue: trying to run inhibitted thread"));
KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
("setrunqueue: bad thread state"));
- sched_add(td, flags);
+
+ if (td->td_pinned != 0) {
+ cpu = td->td_lastcpu;
+ rq = ke->ke_runq = &runq_pcpu[cpu];
+ single_cpu = 1;
+ CTR3(KTR_RUNQ,
+ "setrunqueue: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu);
+ } else if ((ke)->ke_flags & KEF_BOUND) {
+ /* Find CPU from bound runq */
+ rq = ke->ke_runq;
+ cpu = ke->ke_runq - &runq_pcpu[0];
+ single_cpu = 1;
+ CTR3(KTR_RUNQ,
+ "setrunqueue: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu);
+ } else {
+ CTR2(KTR_RUNQ,
+ "setrunqueue: adding kse:%p (td:%p) to gbl runq", ke, td);
+ cpu = NOCPU;
+ rq = ke->ke_runq = &runq_global[td->td_lastcpu];
+ }
+
+ mtx_lock_spin(&rq->rq_lock);
+ sched_add(td, flags, cpu, single_cpu);
+ mtx_unlock_spin(&rq->rq_lock);
}
/*
@@ -316,8 +366,10 @@
int i;
bzero(rq, sizeof *rq);
- for (i = 0; i < RQ_NQS; i++)
+ for (i = 0; i < RQ_NQS; i++) {
TAILQ_INIT(&rq->rq_queues[i]);
+ }
+ mtx_init(&rq->rq_lock, "runq lock", NULL, MTX_SPIN);
}
/*
@@ -334,7 +386,7 @@
rqb->rqb_bits[RQB_WORD(pri)],
rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
RQB_BIT(pri), RQB_WORD(pri));
- atomic_clear_long(&rqb->rqb_bits[RQB_WORD(pri)], RQB_BIT(pri));
+ rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
}
static __inline int
@@ -355,30 +407,11 @@
struct rqbits *rqb;
int pri;
int i;
-#ifdef SMP
- u_long lockbits;
-#endif
+
rqb = &rq->rq_status;
for (i = 0; i < RQB_LEN; i++)
if (rqb->rqb_bits[i]) {
pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
-#ifdef SMP
- lockbits = rq->rq_lockbits[i];
- if (!atomic_cmpset_acq_long(&rq->rq_lockbits[i],
- (lockbits & ~RQB_BIT(pri)),
- (lockbits | RQB_BIT(pri))))
- {
- i = 0;
- continue;
- }
- if (!runq_isset(rq, pri)) {
- atomic_clear_rel_long(&rq->rq_lockbits[RQB_WORD(pri)],
- RQB_BIT(pri));
- i = 0;
- continue;
- }
- runq_clrbit(rq, pri);
-#endif
CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
rqb->rqb_bits[i], i, pri);
return (pri);
@@ -401,16 +434,15 @@
rqb->rqb_bits[RQB_WORD(pri)],
rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
RQB_BIT(pri), RQB_WORD(pri));
- /* XXX only works on 64-bit - 32 bit will need a mutex */
- atomic_set_long(&rqb->rqb_bits[RQB_WORD(pri)], RQB_BIT(pri));
+ rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
}
/*
* Add the KSE to the queue specified by its priority, and set the
* corresponding status bit.
*/
-void
-runq_add(struct runq *rq, struct kse *ke, int flags)
+static void
+_runq_add(struct runq *rq, struct kse *ke, int flags)
{
struct rqhead *rqh;
int pri;
@@ -420,15 +452,25 @@
rqh = &rq->rq_queues[pri];
CTR5(KTR_RUNQ, "runq_add: td=%p ke=%p pri=%d %d rqh=%p",
ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
- runq_lock(ke->ke_runq, ke);
if (flags & SRQ_PREEMPTED)
TAILQ_INSERT_HEAD(rqh, ke, ke_procq);
else
TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
- runq_unlock(ke->ke_runq, ke);
-#ifndef SMP
runq_setbit(rq, pri);
-#endif
+}
+
+void
+runq_add_unlocked(struct runq *rq, struct kse *ke, int flags)
+{
+ _runq_add(rq, ke, flags);
+}
+
+void
+runq_add(struct runq *rq, struct kse *ke, int flags)
+{
+ mtx_lock_spin(&rq->rq_lock);
+ _runq_add(rq, ke, flags);
+ mtx_unlock_spin(&rq->rq_lock);
}
/*
@@ -441,8 +483,8 @@
{
struct rqbits *rqb;
int i;
+ rqb = &rq->rq_status;
- rqb = &rq->rq_status;
for (i = 0; i < RQB_LEN; i++)
if (rqb->rqb_bits[i]) {
CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
@@ -459,30 +501,6 @@
SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, "");
#endif
-static struct kse *
-runq_check_lastcpu(struct rqhead *rqh, int count)
-{
- /*
- * In the first couple of entries, check if
- * there is one for our CPU as a preference.
- */
- int cpu = PCPU_GET(cpuid);
- struct kse *ke, *ke2;
- ke2 = ke = TAILQ_FIRST(rqh);
-
- while (count-- && ke2) {
- if (ke->ke_thread->td_lastcpu == cpu) {
- ke = ke2;
- break;
- }
- ke2 = TAILQ_NEXT(ke2, ke_procq);
- }
- KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
- CTR2(KTR_RUNQ,
- "runq_choose: kse=%p rqh=%p", ke, rqh);
- return (ke);
-}
-
/*
* Find the highest priority process on the run queue.
*/
@@ -492,30 +510,24 @@
struct rqhead *rqh;
struct kse *ke;
int pri;
+
+ ke = NULL;
- while ((pri = runq_findbit(rq)) != -1) {
+ mtx_lock_spin(&rq->rq_lock);
+ if ((pri = runq_findbit(rq)) != -1) {
rqh = &rq->rq_queues[pri];
-#if defined(SMP) && defined(SCHED_4BSD)
- /* fuzz == 1 is normal.. 0 or less are ignored */
- if (runq_fuzz > 1)
- ke = runq_check_lastcpu(rqh, runq_fuzz);
- else
-#endif
- ke = TAILQ_FIRST(rqh);
- if (ke) {
- runq_remove_unlocked(rq, ke);
- runq_unlock(rq, ke);
- } else
- panic("bit set but runq empty for bit %d - lockbits=0x%lx availbits=0x%lx",
- pri, rq->rq_lockbits[0], rq->rq_status.rqb_bits[0]);
+
+ ke = TAILQ_FIRST(rqh);
+ runq_remove_unlocked(rq, ke);
+
KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
CTR3(KTR_RUNQ,
"runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
- return (ke);
+
}
- CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
+ mtx_unlock_spin(&rq->rq_lock);
- return (NULL);
+ return (ke);
}
/*
@@ -537,20 +549,18 @@
ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
TAILQ_REMOVE(rqh, ke, ke_procq);
-#ifndef SMP
if (TAILQ_EMPTY(rqh)) {
CTR0(KTR_RUNQ, "runq_remove: empty");
runq_clrbit(rq, pri);
}
-#endif
}
void
runq_remove(struct runq *rq, struct kse *ke)
{
- runq_lock(rq, ke);
+ mtx_lock_spin(&rq->rq_lock);
_runq_remove(rq, ke);
- runq_unlock(rq, ke);
+ mtx_unlock_spin(&rq->rq_lock);
}
void
@@ -559,34 +569,6 @@
_runq_remove(rq, ke);
}
-#ifdef SMP
-void
-runq_lock(struct runq *rq, struct kse *ke)
-{
- int pri;
- u_long lockbits;
-
- pri = ke->ke_rqindex;
- do {
- lockbits = (rq->rq_lockbits[RQB_WORD(pri)] & ~RQB_BIT(pri));
- } while (!atomic_cmpset_acq_long(&rq->rq_lockbits[RQB_WORD(pri)], lockbits,
- (lockbits | RQB_BIT(pri))));
- runq_clrbit(rq, pri);
-}
-
-void
-runq_unlock(struct runq *rq, struct kse *ke)
-{
- struct rqhead *rqh;
- int pri;
-
- pri = ke->ke_rqindex;
- rqh = &rq->rq_queues[pri];
- if (!TAILQ_EMPTY(rqh))
- runq_setbit(rq, pri);
- atomic_clear_rel_long(&rq->rq_lockbits[RQB_WORD(pri)], RQB_BIT(pri));
-}
-#endif
/****** functions that are temporarily here ***********/
#include <vm/uma.h>
extern struct mtx kse_zombie_lock;
==== //depot/projects/kmacy_sun4v/src/sys/kern/sched_4bsd.c#8 (text+ko) ====
@@ -118,6 +118,8 @@
static struct callout roundrobin_callout;
static struct thread *sched_choose(void);
+static unsigned long choose_cpu;
+
static void setup_runqs(void);
static void roundrobin(void *arg);
@@ -144,13 +146,16 @@
/*
* Global run queue.
*/
-static struct runq runq;
+
#ifdef SMP
/*
* Per-CPU run queues
*/
static struct runq runq_pcpu[MAXCPU];
+static struct runq runq_global[MAXCPU];
+#else
+static struct runq runq_global[1];
#endif
static void
@@ -159,11 +164,15 @@
#ifdef SMP
int i;
- for (i = 0; i < MAXCPU; ++i)
+ for (i = 0; i < MAXCPU; i++) {
runq_init(&runq_pcpu[i]);
+ runq_init(&runq_global[i]);
+ }
+#else
+ runq_init(&runq_global[0]);
#endif
- runq_init(&runq);
+
}
static int
@@ -466,6 +475,7 @@
if (td->td_slptime > 1)
continue;
td->td_estcpu = decay_cpu(loadfac, td->td_estcpu);
+
resetpriority(td);
resetpriority_thread(td);
} /* end of thread loop */
@@ -592,9 +602,9 @@
sched_runnable(void)
{
#ifdef SMP
- return runq_check(&runq) + runq_check(&runq_pcpu[curcpu]);
+ return runq_check(&runq_global[curcpu]) + runq_check(&runq_pcpu[curcpu]);
#else
- return runq_check(&runq);
+ return runq_check(&runq_global[0]);
#endif
}
@@ -694,11 +704,20 @@
mtx_assert(&sched_lock, MA_OWNED);
if (td->td_priority == prio)
return;
- if (TD_ON_RUNQ(td)) {
- adjustrunqueue(td, prio);
+
+ if (!TD_ON_RUNQ(td)) {
+ td->td_priority = prio;
} else {
- td->td_priority = prio;
+ struct runq *rq;
+ rq = td->td_kse->ke_runq;
+ mtx_lock_spin(&td->td_kse->ke_runq->rq_lock);
+ if (TD_ON_RUNQ(td))
+ adjustrunqueue(td, prio);
+ if (rq != td->td_kse->ke_runq)
+ panic("runq changed");
+ mtx_unlock_spin(&td->td_kse->ke_runq->rq_lock);
}
+
}
/*
@@ -979,13 +998,11 @@
#endif /* SMP */
void
-sched_add(struct thread *td, int flags)
+sched_add(struct thread *td, int flags, int cpu, int single_cpu)
#ifdef SMP
{
struct kse *ke;
int forwarded = 0;
- int cpu;
- int single_cpu = 0;
ke = td->td_kse;
mtx_assert(&sched_lock, MA_OWNED);
@@ -998,28 +1015,10 @@
td, td->td_proc->p_comm, td->td_priority, curthread,
curthread->td_proc->p_comm);
- if (td->td_pinned != 0) {
- cpu = td->td_lastcpu;
- ke->ke_runq = &runq_pcpu[cpu];
- single_cpu = 1;
- CTR3(KTR_RUNQ,
- "sched_add: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu);
- } else if ((ke)->ke_flags & KEF_BOUND) {
- /* Find CPU from bound runq */
- KASSERT(SKE_RUNQ_PCPU(ke),("sched_add: bound kse not on cpu runq"));
- cpu = ke->ke_runq - &runq_pcpu[0];
- single_cpu = 1;
- CTR3(KTR_RUNQ,
- "sched_add: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu);
- } else {
- CTR2(KTR_RUNQ,
- "sched_add: adding kse:%p (td:%p) to gbl runq", ke, td);
- cpu = NOCPU;
- ke->ke_runq = &runq;
- }
+
if (single_cpu && (cpu != curcpu)) {
- kick_other_cpu(td->td_priority,cpu);
+ kick_other_cpu(td->td_priority, cpu);
} else {
if (!single_cpu) {
cpumask_t me = PCPU_GET(cpumask);
@@ -1039,7 +1038,7 @@
if ((td->td_proc->p_flag & P_NOLOAD) == 0)
sched_load_add();
- runq_add(ke->ke_runq, ke, flags);
+ runq_add_unlocked(ke->ke_runq, ke, flags);
TD_SET_RUNQ(td);
}
#else /* SMP */
@@ -1056,7 +1055,7 @@
td, td->td_proc->p_comm, td->td_priority, curthread,
curthread->td_proc->p_comm);
CTR2(KTR_RUNQ, "sched_add: adding kse:%p (td:%p) to runq", ke, td);
- ke->ke_runq = &runq;
+ ke->ke_runq = &runq_global[0];
/*
* If we are yielding (on the way out anyhow)
@@ -1098,7 +1097,7 @@
if ((td->td_proc->p_flag & P_NOLOAD) == 0)
sched_load_rem();
- runq_remove(ke->ke_runq, ke);
+ runq_remove_unlocked(ke->ke_runq, ke);
TD_SET_NOT_ON_RUNQ(td);
}
@@ -1114,10 +1113,12 @@
struct thread *td = NULL;
#ifdef SMP
+ int i;
struct kse *kecpu;
+ unsigned long pick;
- rq = &runq;
- kesel = ke = runq_choose(&runq);
+ rq = &runq_global[curcpu];
+ kesel = ke = runq_choose(rq);
kecpu = runq_choose(&runq_pcpu[curcpu]);
if (ke == NULL ||
@@ -1129,19 +1130,31 @@
rq = &runq_pcpu[curcpu];
if (ke)
runq_add(rq, ke, SRQ_PREEMPTED);
- } else {
- if (kecpu)
- runq_add(rq, kecpu, SRQ_PREEMPTED);
+ } else if (kecpu) {
+ runq_add(rq, kecpu, SRQ_PREEMPTED);
CTR1(KTR_RUNQ, "choosing kse %p from main runq", ke);
}
+ if (kesel == NULL) {
+ /* trivial implementation for stealing */
+ for (i = 0; i < (mp_ncpus - 1); i++, choose_cpu++) {
+ pick = choose_cpu%mp_ncpus;
+ rq = &runq_global[pick];
+ if (rq == NULL)
+ panic("NULL cpu for %ld", pick);
+ if (runq_check(rq) &&
+ ((kesel = runq_choose(rq)) != NULL))
+ break;
+ }
+ }
+
#else
- rq = &runq;
- kesel = ke = runq_choose(&runq);
+ rq = &runq_global[0];
+ kesel = ke = runq_choose(rq);
#endif
if (kesel) {
td = kesel->ke_thread;
TD_SET_NOT_ON_RUNQ(td);
- KASSERT(ke->ke_thread->td_proc->p_sflag & PS_INMEM,
+ KASSERT(td->td_proc->p_sflag & PS_INMEM,
("sched_choose: process swapped out"));
}
==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_sleepqueue.c#7 (text+ko) ====
@@ -488,7 +488,6 @@
*/
else if (callout_stop(&td->td_slpcallout) == 0) {
atomic_set_int(&td->td_flags, TDF_TIMEOUT);
- mtx_lock_spin(&sched_lock);
TD_SET_SLEEPING(td);
mi_switch(SW_INVOL, NULL);
}
@@ -565,6 +564,7 @@
int rval;
MPASS(!(curthread->td_flags & TDF_SINTR));
+ mtx_assert(&sched_lock, MA_NOTOWNED);
mtx_lock_spin(&sched_lock);
sleepq_switch(wchan);
rval = sleepq_check_timeout();
==== //depot/projects/kmacy_sun4v/src/sys/sys/runq.h#4 (text+ko) ====
@@ -59,11 +59,12 @@
*/
struct runq {
struct rqbits rq_status;
- rqb_word_t rq_lockbits[RQB_LEN];
struct rqhead rq_queues[RQ_NQS];
+ struct mtx rq_lock;
};
void runq_add(struct runq *, struct kse *, int flags);
+void runq_add_unlocked(struct runq *, struct kse *, int flags);
int runq_check(struct runq *);
struct kse *runq_choose(struct runq *);
void runq_init(struct runq *);
==== //depot/projects/kmacy_sun4v/src/sys/sys/sched.h#4 (text+ko) ====
@@ -71,7 +71,7 @@
/*
* Threads are moved on and off of run queues
*/
-void sched_add(struct thread *td, int flags);
+void sched_add(struct thread *td, int flags, int cpu, int single_cpu);
void sched_clock(struct thread *td);
void sched_rem(struct thread *td);
More information about the p4-projects
mailing list