PERFORCE change 98846 for review
Kip Macy
kmacy at FreeBSD.org
Fri Jun 9 04:18:13 UTC 2006
http://perforce.freebsd.org/chv.cgi?CH=98846
Change 98846 by kmacy at kmacy_storage:sun4v_work on 2006/06/09 04:15:34
eliminate sched_lock acquisition from the common case code paths in the timer interrupt handler
Affected files ...
.. //depot/projects/kmacy_sun4v/src/sys/kern/init_main.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_clock.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_shutdown.c#4 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_sig.c#7 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_switch.c#6 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_synch.c#6 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_thr.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_thread.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_time.c#4 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/sched_4bsd.c#6 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/subr_prof.c#3 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/subr_sleepqueue.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/subr_smp.c#3 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/subr_trap.c#4 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/subr_turnstile.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/sys_generic.c#3 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/sys_process.c#4 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/sys_socket.c#3 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/uipc_usrreq.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/vfs_vnops.c#6 edit
.. //depot/projects/kmacy_sun4v/src/sys/posix4/ksched.c#6 edit
.. //depot/projects/kmacy_sun4v/src/sys/security/mac_lomac/mac_lomac.c#3 edit
.. //depot/projects/kmacy_sun4v/src/sys/vm/vm_glue.c#5 edit
Differences ...
==== //depot/projects/kmacy_sun4v/src/sys/kern/init_main.c#5 (text+ko) ====
@@ -746,9 +746,7 @@
PROC_UNLOCK(initproc);
crfree(oldcred);
cred_update_thread(FIRST_THREAD_IN_PROC(initproc));
- mtx_lock_spin(&sched_lock);
- initproc->p_sflag |= PS_INMEM;
- mtx_unlock_spin(&sched_lock);
+ atomic_set_int(&initproc->p_sflag, PS_INMEM);
cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL);
}
SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL)
==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_clock.c#5 (text+ko) ====
@@ -196,29 +196,30 @@
struct pstats *pstats;
struct thread *td = curthread;
struct proc *p = td->td_proc;
+ int sflag = 0;
/*
* Run current process's virtual and profile time, as needed.
*/
- mtx_lock_spin_flags(&sched_lock, MTX_QUIET);
+ mtx_lock_spin_flags(&timer_lock, MTX_QUIET);
pstats = p->p_stats;
if (usermode &&
- timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
- itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) {
- p->p_sflag |= PS_ALRMPEND;
- td->td_flags |= TDF_ASTPENDING;
- }
+ timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
+ itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
+ sflag = PS_ALRMPEND;
if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
- itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) {
- p->p_sflag |= PS_PROFPEND;
- td->td_flags |= TDF_ASTPENDING;
- }
- mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
-
+ itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
+ sflag = PS_PROFPEND;
+ mtx_unlock_spin_flags(&timer_lock, MTX_QUIET);
#ifdef HWPMC_HOOKS
if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
#endif
+ if (!sflag)
+ return;
+
+ atomic_set_int(&p->p_sflag, sflag);
+ atomic_set_int(&td->td_flags, TDF_ASTPENDING);
}
/*
@@ -404,7 +405,6 @@
td = curthread;
p = td->td_proc;
- mtx_lock_spin_flags(&sched_lock, MTX_QUIET);
if (usermode) {
/*
* Charge the time as appropriate.
@@ -456,7 +456,7 @@
rss = pgtok(vmspace_resident_count(vm));
if (ru->ru_maxrss < rss)
ru->ru_maxrss = rss;
- mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
+
}
void
@@ -536,7 +536,7 @@
/*
* Handle a watchdog timeout by dumping interrupt information and
- * then either dropping to DDB or panicing.
+ * then either dropping to DDB or panicking.
*/
static void
watchdog_fire(void)
==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_shutdown.c#4 (text+ko) ====
@@ -557,9 +557,7 @@
}
#endif
#endif
- mtx_lock_spin(&sched_lock);
- td->td_flags |= TDF_INPANIC;
- mtx_unlock_spin(&sched_lock);
+ atomic_set_int(&td->td_flags, TDF_INPANIC);
if (!sync_on_panic)
bootopt |= RB_NOSYNC;
boot(bootopt);
==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_sig.c#7 (text+ko) ====
@@ -584,9 +584,7 @@
if (! SIGISEMPTY(set))
sigqueue_move_set(&p->p_sigqueue, &td->td_sigqueue, &set);
if (SIGPENDING(td)) {
- mtx_lock_spin(&sched_lock);
- td->td_flags |= TDF_NEEDSIGCHK | TDF_ASTPENDING;
- mtx_unlock_spin(&sched_lock);
+ atomic_set_int(&td->td_flags, (TDF_NEEDSIGCHK|TDF_ASTPENDING));
}
}
@@ -2361,7 +2359,7 @@
thread_suspend_one(td2);
} else {
if (sending || td != td2)
- td2->td_flags |= TDF_ASTPENDING;
+ atomic_set_int(&td2->td_flags, TDF_ASTPENDING);
#ifdef SMP
if (TD_IS_RUNNING(td2) && td2 != td)
forward_signal(td2);
@@ -2379,15 +2377,11 @@
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK,
&p->p_mtx.mtx_object, "Stopping for traced signal");
- mtx_lock_spin(&sched_lock);
- td->td_flags |= TDF_XSIG;
- mtx_unlock_spin(&sched_lock);
+ atomic_set_int(&td->td_flags, TDF_XSIG);
td->td_xsig = sig;
while ((p->p_flag & P_TRACED) && (td->td_flags & TDF_XSIG)) {
if (p->p_flag & P_SINGLE_EXIT) {
- mtx_lock_spin(&sched_lock);
- td->td_flags &= ~TDF_XSIG;
- mtx_unlock_spin(&sched_lock);
+ atomic_clear_int(&td->td_flags, TDF_XSIG);
return (sig);
}
/*
==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_switch.c#6 (text+ko) ====
@@ -99,7 +99,7 @@
/* Shutting down, run idlethread on AP's */
td = PCPU_GET(idlethread);
CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
- td->td_kse->ke_flags |= KEF_DIDRUN;
+ atomic_set_int(&td->td_kse->ke_flags, KEF_DIDRUN);
TD_SET_RUNNING(td);
return (td);
}
@@ -115,7 +115,7 @@
td = PCPU_GET(idlethread);
CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
}
- td->td_kse->ke_flags |= KEF_DIDRUN;
+ atomic_set_int(&td->td_kse->ke_flags, KEF_DIDRUN);
/*
* If we are in panic, only allow system threads,
==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_synch.c#6 (text+ko) ====
@@ -393,8 +393,8 @@
*/
if (p->p_cpulimit != RLIM_INFINITY &&
p->p_rux.rux_runtime >= p->p_cpulimit * cpu_tickrate()) {
- p->p_sflag |= PS_XCPU;
- td->td_flags |= TDF_ASTPENDING;
+ atomic_set_int(&p->p_sflag, PS_XCPU);
+ atomic_set_int(&td->td_flags, TDF_ASTPENDING);
}
/*
@@ -474,7 +474,7 @@
}
if ((p->p_sflag & PS_INMEM) == 0) {
if ((p->p_sflag & PS_SWAPPINGIN) == 0) {
- p->p_sflag |= PS_SWAPINREQ;
+ atomic_set_int(&p->p_sflag, PS_SWAPINREQ);
/*
* due to a LOR between sched_lock and
* the sleepqueue chain locks, use
==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_thr.c#5 (text+ko) ====
@@ -46,6 +46,7 @@
#include <sys/limits.h>
#include <machine/frame.h>
+#include "opt_global.h"
extern int max_threads_per_proc;
extern int max_groups_per_proc;
@@ -88,6 +89,19 @@
return (EINVAL);
if ((error = copyin(uap->param, ¶m, sizeof(param))))
return (error);
+#ifndef __NO_STRICT_ALIGNMENT
+#ifdef SUN4V
+ if ((param.stack_size & (64-1)) != 0)
+ return (EINVAL);
+ if (((u_long)param.stack_base & (64-1)) != 0)
+ return (EINVAL);
+#else
+ if ((param.stack_size & (sizeof(void *)-1)) != 0)
+ return (EINVAL);
+ if (((u_long)param.stack_base & (sizeof(void *)-1)) != 0)
+ return (EINVAL);
+#endif
+#endif
error = create_thread(td, NULL, param.start_func, param.arg,
param.stack_base, param.stack_size, param.tls_base,
param.child_tid, param.parent_tid, param.flags);
@@ -301,9 +315,7 @@
error = msleep((void *)td, &td->td_proc->p_mtx, PCATCH, "lthr",
hz);
if (td->td_flags & TDF_THRWAKEUP) {
- mtx_lock_spin(&sched_lock);
- td->td_flags &= ~TDF_THRWAKEUP;
- mtx_unlock_spin(&sched_lock);
+ atomic_clear_int(&td->td_flags, TDF_THRWAKEUP);
PROC_UNLOCK(td->td_proc);
return (0);
}
@@ -331,9 +343,7 @@
PROC_UNLOCK(p);
return (ESRCH);
}
- mtx_lock_spin(&sched_lock);
- ttd->td_flags |= TDF_THRWAKEUP;
- mtx_unlock_spin(&sched_lock);
+ atomic_set_int(&ttd->td_flags, TDF_THRWAKEUP);
wakeup((void *)ttd);
PROC_UNLOCK(p);
return (0);
==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_thread.c#5 (text+ko) ====
@@ -544,12 +544,12 @@
FOREACH_THREAD_IN_PROC(p, td2) {
if (td2 == td)
continue;
- td2->td_flags |= TDF_ASTPENDING;
+ atomic_set_int(&td2->td_flags, TDF_ASTPENDING);
if (TD_IS_INHIBITED(td2)) {
switch (mode) {
case SINGLE_EXIT:
if (td->td_flags & TDF_DBSUSPEND)
- td->td_flags &= ~TDF_DBSUSPEND;
+ atomic_clear_int(&td->td_flags, TDF_DBSUSPEND);
if (TD_IS_SUSPENDED(td2))
thread_unsuspend_one(td2);
if (TD_ON_SLEEPQ(td2) &&
@@ -717,7 +717,7 @@
thread_suspend_one(td);
if (return_instead == 0) {
p->p_boundary_count++;
- td->td_flags |= TDF_BOUNDARY;
+ atomic_set_int(&td->td_flags, TDF_BOUNDARY);
}
if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
if (p->p_numthreads == p->p_suspcount)
@@ -727,7 +727,7 @@
mi_switch(SW_INVOL, NULL);
if (return_instead == 0) {
p->p_boundary_count--;
- td->td_flags &= ~TDF_BOUNDARY;
+ atomic_clear_int(&td->td_flags, TDF_BOUNDARY);
}
mtx_unlock_spin(&sched_lock);
PROC_LOCK(p);
==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_time.c#4 (text+ko) ====
@@ -588,9 +588,9 @@
timevalsub(&aitv->it_value, &ctv);
}
} else {
- mtx_lock_spin(&sched_lock);
+ mtx_lock_spin(&timer_lock);
*aitv = p->p_stats->p_timer[which];
- mtx_unlock_spin(&sched_lock);
+ mtx_unlock_spin(&timer_lock);
}
return (0);
}
@@ -663,10 +663,10 @@
timevalsub(&oitv->it_value, &ctv);
}
} else {
- mtx_lock_spin(&sched_lock);
+ mtx_lock_spin(&timer_lock);
*oitv = p->p_stats->p_timer[which];
p->p_stats->p_timer[which] = *aitv;
- mtx_unlock_spin(&sched_lock);
+ mtx_unlock_spin(&timer_lock);
}
return (0);
}
==== //depot/projects/kmacy_sun4v/src/sys/kern/sched_4bsd.c#6 (text+ko) ====
@@ -259,7 +259,7 @@
mtx_assert(&sched_lock, MA_OWNED);
if (td->td_priority < curthread->td_priority)
- curthread->td_flags |= TDF_NEEDRESCHED;
+ atomic_set_int(&curthread->td_flags, TDF_NEEDRESCHED);
}
/*
@@ -408,14 +408,14 @@
*/
if (ke->ke_state == KES_ONRUNQ) {
awake = 1;
- ke->ke_flags &= ~KEF_DIDRUN;
+ atomic_clear_int(&ke->ke_flags, KEF_DIDRUN);
} else if ((ke->ke_state == KES_THREAD) &&
(TD_IS_RUNNING(td))) {
awake = 1;
/* Do not clear KEF_DIDRUN */
} else if (ke->ke_flags & KEF_DIDRUN) {
awake = 1;
- ke->ke_flags &= ~KEF_DIDRUN;
+ atomic_clear_int(&ke->ke_flags, KEF_DIDRUN);
}
/*
@@ -626,14 +626,15 @@
{
struct kse *ke;
- mtx_assert(&sched_lock, MA_OWNED);
ke = td->td_kse;
- ke->ke_cpticks++;
+ atomic_add_int(&ke->ke_cpticks, 1);
td->td_estcpu = ESTCPULIM(td->td_estcpu + 1);
if ((td->td_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) {
+ mtx_lock_spin(&sched_lock);
resetpriority(td);
resetpriority_thread(td);
+ mtx_unlock_spin(&sched_lock);
}
}
@@ -709,7 +710,7 @@
sched_lend_prio(struct thread *td, u_char prio)
{
- td->td_flags |= TDF_BORROWING;
+ atomic_set_int(&td->td_flags, TDF_BORROWING);
sched_priority(td, prio);
}
@@ -732,7 +733,7 @@
else
base_pri = td->td_base_pri;
if (prio >= base_pri) {
- td->td_flags &= ~TDF_BORROWING;
+ atomic_clear_int(&td->td_flags, TDF_BORROWING);
sched_prio(td, base_pri);
} else
sched_lend_prio(td, prio);
@@ -778,19 +779,22 @@
{
struct kse *ke;
struct proc *p;
+ struct thread *choosetd;
ke = td->td_kse;
p = td->td_proc;
+ choosetd = NULL;
- mtx_assert(&sched_lock, MA_OWNED);
+ if (newtd == NULL)
+ choosetd = choosethread();
if ((p->p_flag & P_NOLOAD) == 0)
sched_load_rem();
if (newtd)
- newtd->td_flags |= (td->td_flags & TDF_NEEDRESCHED);
+ atomic_set_int(&newtd->td_flags, (td->td_flags & TDF_NEEDRESCHED));
td->td_lastcpu = td->td_oncpu;
- td->td_flags &= ~TDF_NEEDRESCHED;
+ atomic_clear_int(&td->td_flags, TDF_NEEDRESCHED);
td->td_owepreempt = 0;
td->td_oncpu = NOCPU;
/*
@@ -819,21 +823,12 @@
*/
KASSERT((newtd->td_inhibitors == 0),
("trying to run inhibitted thread"));
- newtd->td_kse->ke_flags |= KEF_DIDRUN;
+ atomic_set_int(&newtd->td_kse->ke_flags, KEF_DIDRUN);
TD_SET_RUNNING(newtd);
if ((newtd->td_proc->p_flag & P_NOLOAD) == 0)
sched_load_add();
} else {
-#if 0
- spinlock_enter();
- mtx_unlock_spin(&sched_lock);
-#endif
- newtd = choosethread();
-#if 0
- mtx_lock_spin(&sched_lock);
- spinlock_exit();
-#endif
-
+ newtd = choosetd;
}
if (td != newtd) {
@@ -948,8 +943,6 @@
ipi_selected(map, IPI_AST);
return (1);
}
- if (cpunum == NOCPU)
- printf("forward_wakeup: Idle processor not found\n");
return (0);
}
#endif
@@ -982,7 +975,7 @@
}
#endif /* defined(IPI_PREEMPTION) && defined(PREEMPTION) */
- pcpu->pc_curthread->td_flags |= TDF_NEEDRESCHED;
+ atomic_set_int(&pcpu->pc_curthread->td_flags, TDF_NEEDRESCHED);
ipi_selected( pcpu->pc_cpumask , IPI_AST);
return;
}
@@ -1175,7 +1168,7 @@
KASSERT(TD_IS_RUNNING(td),
("sched_bind: cannot bind non-running thread"));
ke = td->td_kse;
- ke->ke_flags |= KEF_BOUND;
+ atomic_set_int(&ke->ke_flags, KEF_BOUND);
#ifdef SMP
ke->ke_runq = &runq_pcpu[cpu];
if (PCPU_GET(cpuid) == cpu)
@@ -1189,15 +1182,13 @@
sched_unbind(struct thread* td)
{
- mtx_assert(&sched_lock, MA_OWNED);
- td->td_kse->ke_flags &= ~KEF_BOUND;
+ atomic_clear_int(&td->td_kse->ke_flags, KEF_BOUND);
}
int
sched_is_bound(struct thread *td)
{
- mtx_assert(&sched_lock, MA_OWNED);
return (td->td_kse->ke_flags & KEF_BOUND);
}
==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_prof.c#3 (text+ko) ====
@@ -484,9 +484,7 @@
td->td_profil_addr = pc;
td->td_profil_ticks = ticks;
td->td_pflags |= TDP_OWEUPC;
- mtx_lock_spin(&sched_lock);
- td->td_flags |= TDF_ASTPENDING;
- mtx_unlock_spin(&sched_lock);
+ atomic_set_int(&td->td_flags, TDF_ASTPENDING);
}
}
==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_sleepqueue.c#5 (text+ko) ====
@@ -315,14 +315,13 @@
}
TAILQ_INSERT_TAIL(&sq->sq_blocked, td, td_slpq);
td->td_sleepqueue = NULL;
- mtx_lock_spin(&sched_lock);
td->td_wchan = wchan;
td->td_wmesg = wmesg;
if (flags & SLEEPQ_INTERRUPTIBLE) {
- td->td_flags |= TDF_SINTR;
- td->td_flags &= ~TDF_SLEEPABORT;
+ atomic_set_int(&td->td_flags, TDF_SINTR);
+ atomic_clear_int(&td->td_flags, TDF_SLEEPABORT);
}
- mtx_unlock_spin(&sched_lock);
+
}
/*
@@ -468,7 +467,7 @@
* If TDF_TIMEOUT is set, we timed out.
*/
if (td->td_flags & TDF_TIMEOUT) {
- td->td_flags &= ~TDF_TIMEOUT;
+ atomic_clear_int(&td->td_flags, TDF_TIMEOUT);
return (EWOULDBLOCK);
}
@@ -477,15 +476,16 @@
* already been woken up.
*/
if (td->td_flags & TDF_TIMOFAIL)
- td->td_flags &= ~TDF_TIMOFAIL;
-
+ atomic_clear_int(&td->td_flags, TDF_TIMOFAIL);
+
/*
* If callout_stop() fails, then the timeout is running on
* another CPU, so synchronize with it to avoid having it
* accidentally wake up a subsequent sleep.
*/
else if (callout_stop(&td->td_slpcallout) == 0) {
- td->td_flags |= TDF_TIMEOUT;
+ atomic_set_int(&td->td_flags, TDF_TIMEOUT);
+ mtx_lock_spin(&sched_lock);
TD_SET_SLEEPING(td);
mi_switch(SW_INVOL, NULL);
}
@@ -500,15 +500,14 @@
{
struct thread *td;
- mtx_assert(&sched_lock, MA_OWNED);
td = curthread;
/* We are no longer in an interruptible sleep. */
if (td->td_flags & TDF_SINTR)
- td->td_flags &= ~TDF_SINTR;
+ atomic_clear_int(&td->td_flags, TDF_SINTR);
if (td->td_flags & TDF_SLEEPABORT) {
- td->td_flags &= ~TDF_SLEEPABORT;
+ atomic_clear_int(&td->td_flags, TDF_SLEEPABORT);
return (td->td_intrval);
}
@@ -547,7 +546,7 @@
else
sleepq_release(wchan);
rval = sleepq_check_signals();
- mtx_unlock_spin(&sched_lock);
+ mtx_unlock_spin(&sched_lock);
if (rcatch)
return (rcatch);
return (rval);
@@ -632,7 +631,7 @@
td->td_wmesg = NULL;
td->td_wchan = NULL;
- td->td_flags &= ~TDF_SINTR;
+ atomic_clear_int(&td->td_flags, TDF_SINTR);
/*
* Note that thread td might not be sleeping if it is running
@@ -759,7 +758,7 @@
if (TD_ON_SLEEPQ(td)) {
MPASS(td->td_wchan == wchan);
MPASS(sq != NULL);
- td->td_flags |= TDF_TIMEOUT;
+ atomic_set_int(&td->td_flags, TDF_TIMEOUT);
sleepq_resume_thread(sq, td, -1);
mtx_unlock_spin(&sched_lock);
sleepq_release(wchan);
@@ -778,11 +777,11 @@
*/
if (td->td_flags & TDF_TIMEOUT) {
MPASS(TD_IS_SLEEPING(td));
- td->td_flags &= ~TDF_TIMEOUT;
+ atomic_clear_int(&td->td_flags, TDF_TIMEOUT);
TD_CLR_SLEEPING(td);
setrunnable(td);
} else
- td->td_flags |= TDF_TIMOFAIL;
+ atomic_set_int(&td->td_flags, TDF_TIMOFAIL);
mtx_unlock_spin(&sched_lock);
}
@@ -846,7 +845,7 @@
wchan = td->td_wchan;
if (wchan != NULL) {
td->td_intrval = intrval;
- td->td_flags |= TDF_SLEEPABORT;
+ atomic_set_int(&td->td_flags, TDF_SLEEPABORT);
}
mtx_unlock_spin(&sched_lock);
sleepq_remove(td, wchan);
==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_smp.c#3 (text+ko) ====
@@ -202,7 +202,7 @@
id = pc->pc_cpumask;
if (id != me && (id & stopped_cpus) == 0 &&
td != pc->pc_idlethread) {
- td->td_flags |= TDF_NEEDRESCHED;
+ atomic_set_int(&td->td_flags, TDF_NEEDRESCHED);
map |= id;
}
}
==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_trap.c#4 (text+ko) ====
@@ -77,7 +77,7 @@
userret(struct thread *td, struct trapframe *frame)
{
struct proc *p = td->td_proc;
-
+
CTR3(KTR_SYSC, "userret: thread %p (pid %d, %s)", td, p->p_pid,
p->p_comm);
#ifdef DIAGNOSTIC
@@ -150,6 +150,7 @@
td = curthread;
p = td->td_proc;
+
CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, p->p_pid,
p->p_comm);
KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode"));
@@ -166,19 +167,18 @@
* AST's saved in sflag, the astpending flag will be set and
* ast() will be called again.
*/
- mtx_lock_spin(&sched_lock);
+
flags = td->td_flags;
sflag = p->p_sflag;
if (p->p_sflag & (PS_ALRMPEND | PS_PROFPEND | PS_XCPU))
- p->p_sflag &= ~(PS_ALRMPEND | PS_PROFPEND | PS_XCPU);
+ atomic_clear_int(&p->p_sflag, (PS_ALRMPEND | PS_PROFPEND | PS_XCPU));
#ifdef MAC
if (p->p_sflag & PS_MACPEND)
- p->p_sflag &= ~PS_MACPEND;
+ atomic_clear_int(&p->p_sflag, PS_MACPEND);
#endif
- td->td_flags &= ~(TDF_ASTPENDING | TDF_NEEDSIGCHK |
- TDF_NEEDRESCHED | TDF_INTERRUPT);
cnt.v_soft++;
- mtx_unlock_spin(&sched_lock);
+ atomic_clear_int(&td->td_flags, (TDF_ASTPENDING | TDF_NEEDSIGCHK |
+ TDF_NEEDRESCHED | TDF_INTERRUPT));
/*
* XXXKSE While the fact that we owe a user profiling
==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_turnstile.c#5 (text+ko) ====
@@ -640,15 +640,13 @@
td->td_turnstile = NULL;
mtx_unlock_spin(&tc->tc_lock);
- mtx_lock_spin(&sched_lock);
/*
* Handle race condition where a thread on another CPU that owns
* lock 'lock' could have woken us in between us dropping the
* turnstile chain lock and acquiring the sched_lock.
*/
if (td->td_flags & TDF_TSNOBLOCK) {
- td->td_flags &= ~TDF_TSNOBLOCK;
- mtx_unlock_spin(&sched_lock);
+ atomic_clear_int(&td->td_flags, TDF_TSNOBLOCK);
return;
}
@@ -668,7 +666,7 @@
}
}
#endif
-
+ mtx_lock_spin(&sched_lock);
/* Save who we are blocked on and switch. */
td->td_tsqueue = queue;
td->td_blocked = ts;
@@ -871,7 +869,7 @@
MPASS(TD_CAN_RUN(td));
setrunqueue(td, SRQ_BORING);
} else {
- td->td_flags |= TDF_TSNOBLOCK;
+ atomic_set_int(&td->td_flags, TDF_TSNOBLOCK);
MPASS(TD_IS_RUNNING(td) || TD_ON_RUNQ(td));
}
}
==== //depot/projects/kmacy_sun4v/src/sys/kern/sys_generic.c#3 (text+ko) ====
@@ -755,9 +755,7 @@
mtx_lock(&sellock);
retry:
ncoll = nselcoll;
- mtx_lock_spin(&sched_lock);
- td->td_flags |= TDF_SELECT;
- mtx_unlock_spin(&sched_lock);
+ atomic_set_int(&td->td_flags, TDF_SELECT);
mtx_unlock(&sellock);
error = selscan(td, ibits, obits, nd);
@@ -797,9 +795,7 @@
done:
clear_selinfo_list(td);
- mtx_lock_spin(&sched_lock);
- td->td_flags &= ~TDF_SELECT;
- mtx_unlock_spin(&sched_lock);
+ atomic_clear_int(&td->td_flags, TDF_SELECT);
mtx_unlock(&sellock);
done_nosellock:
@@ -935,9 +931,7 @@
mtx_lock(&sellock);
retry:
ncoll = nselcoll;
- mtx_lock_spin(&sched_lock);
- td->td_flags |= TDF_SELECT;
- mtx_unlock_spin(&sched_lock);
+ atomic_set_int(&td->td_flags, TDF_SELECT);
mtx_unlock(&sellock);
error = pollscan(td, bits, nfds);
@@ -958,12 +952,9 @@
* sellock, so check TDF_SELECT and the number of collisions
* and rescan the file descriptors if necessary.
*/
- mtx_lock_spin(&sched_lock);
if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
- mtx_unlock_spin(&sched_lock);
goto retry;
}
- mtx_unlock_spin(&sched_lock);
if (timo > 0)
error = cv_timedwait_sig(&selwait, &sellock, timo);
@@ -975,9 +966,7 @@
done:
clear_selinfo_list(td);
- mtx_lock_spin(&sched_lock);
- td->td_flags &= ~TDF_SELECT;
- mtx_unlock_spin(&sched_lock);
+ atomic_clear_int(&td->td_flags, TDF_SELECT);
mtx_unlock(&sellock);
done_nosellock:
@@ -1150,9 +1139,7 @@
}
TAILQ_REMOVE(&td->td_selq, sip, si_thrlist);
sip->si_thread = NULL;
- mtx_lock_spin(&sched_lock);
- td->td_flags &= ~TDF_SELECT;
- mtx_unlock_spin(&sched_lock);
+ atomic_clear_int(&td->td_flags, TDF_SELECT);
sleepq_remove(td, &selwait);
mtx_unlock(&sellock);
}
==== //depot/projects/kmacy_sun4v/src/sys/kern/sys_process.c#4 (text+ko) ====
@@ -708,15 +708,11 @@
break;
case PT_SUSPEND:
- mtx_lock_spin(&sched_lock);
- td2->td_flags |= TDF_DBSUSPEND;
- mtx_unlock_spin(&sched_lock);
+ atomic_set_int(&td2->td_flags, TDF_DBSUSPEND);
break;
case PT_RESUME:
- mtx_lock_spin(&sched_lock);
- td2->td_flags &= ~TDF_DBSUSPEND;
- mtx_unlock_spin(&sched_lock);
+ atomic_clear_int(&td2->td_flags, TDF_DBSUSPEND);
break;
case PT_STEP:
@@ -787,9 +783,7 @@
proctree_locked = 0;
}
/* deliver or queue signal */
- mtx_lock_spin(&sched_lock);
- td2->td_flags &= ~TDF_XSIG;
- mtx_unlock_spin(&sched_lock);
+ atomic_clear_int(&td2->td_flags, TDF_XSIG);
td2->td_xsig = data;
p->p_xstat = data;
p->p_xthread = NULL;
@@ -798,7 +792,8 @@
if (req == PT_DETACH) {
struct thread *td3;
FOREACH_THREAD_IN_PROC(p, td3)
- td3->td_flags &= ~TDF_DBSUSPEND;
+ atomic_clear_int(&td3->td_flags, TDF_DBSUSPEND);
+
}
/*
* unsuspend all threads, to not let a thread run,
==== //depot/projects/kmacy_sun4v/src/sys/kern/sys_socket.c#3 (text+ko) ====
@@ -63,7 +63,7 @@
.fo_kqfilter = soo_kqfilter,
.fo_stat = soo_stat,
.fo_close = soo_close,
- .fo_flags = DFLAG_PASSABLE
+ .fo_flags = DFLAG_PASSABLE | DFLAG_MPSAFE
};
/* ARGSUSED */
==== //depot/projects/kmacy_sun4v/src/sys/kern/uipc_usrreq.c#5 (text+ko) ====
@@ -88,32 +88,99 @@
struct mbuf *unp_addsockcred(struct thread *, struct mbuf *);
/*
- * Currently, UNIX domain sockets are protected by a single subsystem lock,
- * which covers global data structures and variables, the contents of each
- * per-socket unpcb structure, and the so_pcb field in sockets attached to
- * the UNIX domain. This provides for a moderate degree of paralellism, as
- * receive operations on UNIX domain sockets do not need to acquire the
- * subsystem lock. Finer grained locking to permit send() without acquiring
- * a global lock would be a logical next step.
+ * Both send and receive buffers are allocated PIPSIZ bytes of buffering
+ * for stream sockets, although the total for sender and receiver is
+ * actually only PIPSIZ.
+ * Datagram sockets really use the sendspace as the maximum datagram size,
+ * and don't really want to reserve the sendspace. Their recvspace should
+ * be large enough for at least one max-size datagram plus address.
+ */
+#ifndef PIPSIZ
+#define PIPSIZ 8192
+#endif
+static u_long unpst_sendspace = PIPSIZ;
+static u_long unpst_recvspace = PIPSIZ;
+static u_long unpdg_sendspace = 2*1024; /* really max datagram size */
+static u_long unpdg_recvspace = 4*1024;
+
+static int unp_rights; /* file descriptors in flight */
+
+SYSCTL_DECL(_net_local_stream);
+SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
+ &unpst_sendspace, 0, "");
+SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
+ &unpst_recvspace, 0, "");
+SYSCTL_DECL(_net_local_dgram);
+SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
+ &unpdg_sendspace, 0, "");
+SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
+ &unpdg_recvspace, 0, "");
+SYSCTL_DECL(_net_local);
+SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
+
+/*
+ * Locking and synchronization:
+ *
+ * A global UNIX domain socket mutex protects all global variables in the
+ * implementation, as well as the linked lists tracking the set of allocated
+ * UNIX domain sockets. These variables/fields may be read lockless using
+ * atomic operations if stale values are permissible; otherwise the global
+ * mutex is required to read or read-modify-write. The global mutex also
+ * serves to prevent deadlock when multiple PCB locks may be acquired at once
+ * (see below). Finally, the global mutex protects uncounted references from
+ * vnodes to sockets bound to those vnodes: to safely dereference the
+ * v_socket pointer, the global mutex must be held while a full reference is
+ * acquired.
+ *
+ * UNIX domain sockets each have one unpcb PCB associated with them from
+ * pru_attach() to pru_detach() via the so_pcb pointer. The validity of that
+ * reference is an invariant for the lifetime of the socket, so no lock is
+ * required to dereference the so_pcb pointer if a valid socket reference is
+ * held.
+ *
+ * Each PCB has a back-pointer to its socket, unp_socket. This pointer may
+ * only be safely dereferenced as long as a valid reference to the PCB is
+ * held. Typically, this reference will be from the socket, or from another
+ * PCB when the referring PCB's lock is held (in order that the reference not
+ * be invalidated during use). In particular, to follow
+ * unp->unp_conn->unp_socket, you need unlock the lock on unp, not unp_conn.
+ *
+ * Fields of PCBs are locked using a per-unpcb lock, unp_mtx. Individual
+ * atomic reads without the lock may be performed "lockless", but more
+ * complex reads and read-modify-writes require the mutex to be held. No
+ * lock order is defined between PCB locks -- multiple PCB locks may be
+ * acquired at the same time only when holding the global UNIX domain socket
+ * mutex, which prevents deadlocks. To prevent inter-PCB references from
+ * becoming invalid, the lock protecting the reference must be held for the
+ * lifetime of use of the reference.
*
- * The UNIX domain socket lock preceds all socket layer locks, including the
- * socket lock and socket buffer lock, permitting UNIX domain socket code to
- * call into socket support routines without releasing its locks.
+ * Blocking with UNIX domain sockets is a tricky issue: unlike most network
+ * protocols, bind() is a non-atomic operation, and connect() requires
+ * potential sleeping in the protocol, due to potentially waiting on local or
+ * distributed file systems. We try to separate "lookup" operations, which
+ * may sleep, and the IPC operations themselves, which typically can occur
+ * with relative atomicity as locks can be held over the entire operation.
*
- * Some caution is required in areas where the UNIX domain socket code enters
- * VFS in order to create or find rendezvous points. This results in
- * dropping of the UNIX domain socket subsystem lock, acquisition of the
- * Giant lock, and potential sleeping. This increases the chances of races,
- * and exposes weaknesses in the socket->protocol API by offering poor
- * failure modes.
+ * Another tricky issue is simultaneous multi-threaded or multi-process
+ * access to a single UNIX domain socket. These are handled by the flags
+ * UNP_CONNECTING and UNP_BINDING.
*/
-static struct mtx unp_mtx;
-#define UNP_LOCK_INIT() \
- mtx_init(&unp_mtx, "unp", NULL, MTX_DEF)
-#define UNP_LOCK() mtx_lock(&unp_mtx)
-#define UNP_UNLOCK() mtx_unlock(&unp_mtx)
-#define UNP_LOCK_ASSERT() mtx_assert(&unp_mtx, MA_OWNED)
-#define UNP_UNLOCK_ASSERT() mtx_assert(&unp_mtx, MA_NOTOWNED)
+static struct mtx unp_global_mtx;
+
+#define UNP_GLOBAL_LOCK_INIT() mtx_init(&unp_global_mtx, \
+ "unp_global_mtx", NULL, MTX_DEF)
+#define UNP_GLOBAL_LOCK() mtx_lock(&unp_global_mtx)
+#define UNP_GLOBAL_UNLOCK() mtx_unlock(&unp_global_mtx)
+#define UNP_GLOBAL_UNLOCK_ASSERT() mtx_assert(&unp_global_mtx, MA_NOTOWNED)
+#define UNP_GLOBAL_LOCK_ASSERT() mtx_assert(&unp_global_mtx, MA_OWNED)
+
+#define UNP_PCB_LOCK_INIT(unp) mtx_init(&(unp)->unp_mtx, \
+ "unp_mtx", "unp_mtx", \
+ MTX_DUPOK|MTX_DEF|MTX_RECURSE)
+#define UNP_PCB_LOCK_DESTROY(unp) mtx_destroy(&(unp)->unp_mtx)
+#define UNP_PCB_LOCK(unp) mtx_lock(&(unp)->unp_mtx)
+#define UNP_PCB_UNLOCK(unp) mtx_unlock(&(unp)->unp_mtx)
+#define UNP_PCB_LOCK_ASSERT(unp) mtx_assert(&(unp)->unp_mtx, MA_OWNED)
/*
* Garbage collection of cyclic file descriptor/socket references occurs
@@ -123,12 +190,10 @@
*/
static struct task unp_gc_task;
-static int unp_attach(struct socket *);
static void unp_detach(struct unpcb *);
-static int unp_bind(struct unpcb *,struct sockaddr *, struct thread *);
static int unp_connect(struct socket *,struct sockaddr *, struct thread *);
static int unp_connect2(struct socket *so, struct socket *so2, int);
-static void unp_disconnect(struct unpcb *);
+static void unp_disconnect(struct unpcb *unp, struct unpcb *unp2);
static void unp_shutdown(struct unpcb *);
static void unp_drop(struct unpcb *, int);
static void unp_gc(__unused void *, int);
@@ -137,8 +202,6 @@
static void unp_discard(struct file *);
static void unp_freerights(struct file **, int);
static int unp_internalize(struct mbuf **, struct thread *);
-static int unp_listen(struct socket *, struct unpcb *, int,
- struct thread *);
static void
uipc_abort(struct socket *so)
@@ -147,83 +210,238 @@
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_abort: unp == NULL"));
- UNP_LOCK();
+
+ UNP_GLOBAL_LOCK();
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list