svn commit: r221677 - in user/avg/xcpu/sys: amd64/amd64 kern sys
Andriy Gapon
avg at FreeBSD.org
Mon May 9 07:05:06 UTC 2011
Author: avg
Date: Mon May 9 07:05:06 2011
New Revision: 221677
URL: http://svn.freebsd.org/changeset/base/221677
Log:
re-implement hard stopping of CPUs and use it enforce panic(9) context
Hard stopping changes:
- stop_cpus_hard interface is intentionally narrowed to always act on all
other cpus
- stop_cpus_hard keeps its own accounting of stopped cpus completely
disregarding soft stopping of cpus (pausing, suspending)
- no recursion whatsoever is allowed for stop_cpus_hard; all callers
must check if they are already in "super" mode
- protect stop function with spinlock context
- avoid potential deadlock between two cpus already being in nmi context
(e.g. because of hardware) and trying to stop each other by checking
the stopped bit while spinning on stopper lock; this is possibly an
arch-specific thing [x86];
The last item is what I call a cross-cpu deadlock avoidance approach.
General idea: if a CPU can't grab a lock protecting cross-/intra-CPU
interactions, then while spinning on the lock the CPU should check
for incoming cross-CPU events possibly posted by a CPU that currently
holds the lock. In this scheme an IPI is used only to interrupt a CPU
so that it can notice a new event, but the actual event should be passed
via the memory (a bit in a cpu set - like newly introduced
hard_stopping_cpus, or some other variable).
Panic context changes:
- ensure that only one (panic-ing) CPU runs after panic(9) call by
stopping other CPUs using stop_cpus_hard()
- disable interrupts/preemption early in panic(9) and do re-enable
(this most probably breaks sync-on-panic behavior)
- allow a panic-ing thread to penetrate all locks and never sleep on
conditions (lockmgr locks are exempt actually):
o mutex, spin mutex, rwlock, rmlock, sx: allow lock/unlock operations to
unconditionally succeed for a thread in panic [jhb]
o tsleep, msleep (msleep_spin, _sleep): return success without waiting
for a thread in panic [jhb]
o cv code already does that when panisctr != NULL
- boot(): do not rebind to bsp if panic-ing [mdf]
- in all places where we want to stop other cpus first check if they
are not stopped already
cpu_reset() and cpustop_hard_handler() are only (re-)implemented for
amd64 at the moment.
Modified:
user/avg/xcpu/sys/amd64/amd64/mp_machdep.c
user/avg/xcpu/sys/amd64/amd64/vm_machdep.c
user/avg/xcpu/sys/kern/kern_mutex.c
user/avg/xcpu/sys/kern/kern_rmlock.c
user/avg/xcpu/sys/kern/kern_rwlock.c
user/avg/xcpu/sys/kern/kern_shutdown.c
user/avg/xcpu/sys/kern/kern_sx.c
user/avg/xcpu/sys/kern/kern_synch.c
user/avg/xcpu/sys/kern/subr_kdb.c
user/avg/xcpu/sys/kern/subr_smp.c
user/avg/xcpu/sys/sys/smp.h
Modified: user/avg/xcpu/sys/amd64/amd64/mp_machdep.c
==============================================================================
--- user/avg/xcpu/sys/amd64/amd64/mp_machdep.c Mon May 9 07:04:14 2011 (r221676)
+++ user/avg/xcpu/sys/amd64/amd64/mp_machdep.c Mon May 9 07:05:06 2011 (r221677)
@@ -1360,8 +1360,40 @@ ipi_all_but_self(u_int ipi)
lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
}
+void
+cpuhardstop_handler(void)
+{
+ cpumask_t cpumask;
+ u_int cpu;
+
+ cpumask = PCPU_GET(cpumask);
+
+ /* Just return if this is a belated NMI */
+ if ((hard_stopping_cpus & cpumask) == 0)
+ return;
+
+ cpu = PCPU_GET(cpuid);
+ savectx(&stoppcbs[cpu]);
+
+ /* Indicate that we are stopped */
+ atomic_set_int(&hard_stopped_cpus, cpumask);
+ atomic_clear_int(&hard_stopping_cpus, cpumask);
+
+ /* Wait for restart */
+ while ((hard_started_cpus & cpumask) == 0) {
+ /* BSP can be asked to reset system while spinning here. */
+ if (cpu == 0 && cpustop_hook != NULL) {
+ cpustop_hook();
+ cpustop_hook = NULL;
+ }
+ ia32_pause();
+ }
+ atomic_clear_int(&hard_started_cpus, cpumask);
+ atomic_clear_int(&hard_stopped_cpus, cpumask);
+}
+
int
-ipi_nmi_handler()
+ipi_nmi_handler(void)
{
cpumask_t cpumask;
@@ -1376,7 +1408,7 @@ ipi_nmi_handler()
return (1);
atomic_clear_int(&ipi_nmi_pending, cpumask);
- cpustop_handler();
+ cpuhardstop_handler();
return (0);
}
Modified: user/avg/xcpu/sys/amd64/amd64/vm_machdep.c
==============================================================================
--- user/avg/xcpu/sys/amd64/amd64/vm_machdep.c Mon May 9 07:04:14 2011 (r221676)
+++ user/avg/xcpu/sys/amd64/amd64/vm_machdep.c Mon May 9 07:05:06 2011 (r221677)
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <sys/buf.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
+#include <sys/kdb.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
@@ -531,12 +532,13 @@ cpu_reset()
u_int cnt;
if (smp_active) {
- map = PCPU_GET(other_cpus) & ~stopped_cpus;
- if (map != 0) {
- printf("cpu_reset: Stopping other CPUs\n");
- stop_cpus(map);
+ if (panicstr == NULL && !kdb_active) {
+ map = PCPU_GET(other_cpus) & ~stopped_cpus;
+ if (map != 0) {
+ printf("cpu_reset: Stopping other CPUs\n");
+ stop_cpus(map);
+ }
}
-
if (PCPU_GET(cpuid) != 0) {
cpu_reset_proxyid = PCPU_GET(cpuid);
cpustop_restartfunc = cpu_reset_proxy;
Modified: user/avg/xcpu/sys/kern/kern_mutex.c
==============================================================================
--- user/avg/xcpu/sys/kern/kern_mutex.c Mon May 9 07:04:14 2011 (r221676)
+++ user/avg/xcpu/sys/kern/kern_mutex.c Mon May 9 07:05:06 2011 (r221677)
@@ -348,6 +348,9 @@ _mtx_lock_sleep(struct mtx *m, uintptr_t
return;
}
+ if (IS_PANIC_THREAD())
+ return;
+
lock_profile_obtain_lock_failed(&m->lock_object,
&contested, &waittime);
if (LOCK_LOG_TEST(&m->lock_object, opts))
@@ -507,6 +510,9 @@ _mtx_lock_spin(struct mtx *m, uintptr_t
uint64_t waittime = 0;
#endif
+ if (IS_PANIC_THREAD())
+ return;
+
if (LOCK_LOG_TEST(&m->lock_object, opts))
CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
@@ -577,6 +583,10 @@ retry:
m->mtx_recurse++;
break;
}
+
+ if (IS_PANIC_THREAD())
+ return;
+
lock_profile_obtain_lock_failed(&m->lock_object,
&contested, &waittime);
/* Give interrupts a chance while we spin. */
@@ -663,6 +673,9 @@ _mtx_unlock_sleep(struct mtx *m, int opt
return;
}
+ if (IS_PANIC_THREAD())
+ return;
+
/*
* We have to lock the chain before the turnstile so this turnstile
* can be removed from the hash list if it is empty.
Modified: user/avg/xcpu/sys/kern/kern_rmlock.c
==============================================================================
--- user/avg/xcpu/sys/kern/kern_rmlock.c Mon May 9 07:04:14 2011 (r221676)
+++ user/avg/xcpu/sys/kern/kern_rmlock.c Mon May 9 07:05:06 2011 (r221677)
@@ -279,6 +279,9 @@ _rm_rlock_hard(struct rmlock *rm, struct
return (1);
}
+ if (IS_PANIC_THREAD())
+ return (1);
+
/*
* We allow readers to aquire a lock even if a writer is blocked if
* the lock is recursive and the reader already holds the lock.
@@ -385,6 +388,9 @@ _rm_unlock_hard(struct thread *td,struct
if (!tracker->rmp_flags)
return;
+ if (IS_PANIC_THREAD())
+ return;
+
mtx_lock_spin(&rm_spinlock);
LIST_REMOVE(tracker, rmp_qentry);
@@ -436,6 +442,9 @@ _rm_wlock(struct rmlock *rm)
else
mtx_lock(&rm->rm_lock_mtx);
+ if (IS_PANIC_THREAD())
+ return;
+
if (rm->rm_writecpus != all_cpus) {
/* Get all read tokens back */
Modified: user/avg/xcpu/sys/kern/kern_rwlock.c
==============================================================================
--- user/avg/xcpu/sys/kern/kern_rwlock.c Mon May 9 07:04:14 2011 (r221676)
+++ user/avg/xcpu/sys/kern/kern_rwlock.c Mon May 9 07:05:06 2011 (r221677)
@@ -323,6 +323,9 @@ _rw_rlock(struct rwlock *rw, const char
rw->lock_object.lo_name, file, line));
WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
+ if (IS_PANIC_THREAD())
+ return;
+
for (;;) {
#ifdef KDTRACE_HOOKS
spin_cnt++;
@@ -532,6 +535,9 @@ _rw_runlock(struct rwlock *rw, const cha
WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
+ if (IS_PANIC_THREAD())
+ return;
+
/* TODO: drop "owner of record" here. */
for (;;) {
@@ -659,6 +665,9 @@ _rw_wlock_hard(struct rwlock *rw, uintpt
return;
}
+ if (IS_PANIC_THREAD())
+ return;
+
if (LOCK_LOG_TEST(&rw->lock_object, 0))
CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
@@ -820,6 +829,9 @@ _rw_wunlock_hard(struct rwlock *rw, uint
return;
}
+ if (IS_PANIC_THREAD())
+ return;
+
KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
("%s: neither of the waiter flags are set", __func__));
Modified: user/avg/xcpu/sys/kern/kern_shutdown.c
==============================================================================
--- user/avg/xcpu/sys/kern/kern_shutdown.c Mon May 9 07:04:14 2011 (r221676)
+++ user/avg/xcpu/sys/kern/kern_shutdown.c Mon May 9 07:05:06 2011 (r221677)
@@ -121,6 +121,13 @@ SYSCTL_INT(_kern, OID_AUTO, sync_on_pani
&sync_on_panic, 0, "Do a sync before rebooting from a panic");
TUNABLE_INT("kern.sync_on_panic", &sync_on_panic);
+#ifdef SMP
+static int stop_cpus_on_panic = 1;
+SYSCTL_INT(_kern, OID_AUTO, stop_cpus_on_panic, CTLFLAG_RW | CTLFLAG_TUN,
+ &stop_cpus_on_panic, 0, "stop other CPUs when entering the debugger");
+TUNABLE_INT("kern.stop_cpus_on_panic", &stop_cpus_on_panic);
+#endif
+
SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment");
/*
@@ -283,10 +290,12 @@ kern_reboot(int howto)
* systems don't shutdown properly (i.e., ACPI power off) if we
* run on another processor.
*/
- thread_lock(curthread);
- sched_bind(curthread, 0);
- thread_unlock(curthread);
- KASSERT(PCPU_GET(cpuid) == 0, ("%s: not running on cpu 0", __func__));
+ if (panicstr == NULL) {
+ thread_lock(curthread);
+ sched_bind(curthread, 0);
+ thread_unlock(curthread);
+ KASSERT(PCPU_GET(cpuid) == 0, ("boot: not running on cpu 0"));
+ }
#endif
/* We're in the process of rebooting. */
rebooting = 1;
@@ -530,27 +539,19 @@ shutdown_reset(void *junk, int howto)
void
panic(const char *fmt, ...)
{
-#ifdef SMP
- static volatile u_int panic_cpu = NOCPU;
-#endif
struct thread *td = curthread;
int bootopt, newpanic;
+ int did_stop_cpus;
va_list ap;
static char buf[256];
- critical_enter();
+ spinlock_enter();
#ifdef SMP
- /*
- * We don't want multiple CPU's to panic at the same time, so we
- * use panic_cpu as a simple spinlock. We have to keep checking
- * panic_cpu if we are spinning in case the panic on the first
- * CPU is canceled.
- */
- if (panic_cpu != PCPU_GET(cpuid))
- while (atomic_cmpset_int(&panic_cpu, NOCPU,
- PCPU_GET(cpuid)) == 0)
- while (panic_cpu != NOCPU)
- ; /* nothing */
+ if (stop_cpus_on_panic && panicstr == NULL && !kdb_active) {
+ stop_cpus_hard();
+ did_stop_cpus = 1;
+ } else
+ did_stop_cpus = 0;
#endif
bootopt = RB_AUTOBOOT | RB_DUMP;
@@ -586,8 +587,13 @@ panic(const char *fmt, ...)
/* See if the user aborted the panic, in which case we continue. */
if (panicstr == NULL) {
#ifdef SMP
- atomic_store_rel_int(&panic_cpu, NOCPU);
+ if (did_stop_cpus)
+ unstop_cpus_hard();
+ else
+ atomic_store_rel_int(&panic_cpu, NOCPU);
#endif
+
+ spinlock_exit();
return;
}
#endif
@@ -595,9 +601,10 @@ panic(const char *fmt, ...)
/*thread_lock(td); */
td->td_flags |= TDF_INPANIC;
/* thread_unlock(td); */
+
if (!sync_on_panic)
bootopt |= RB_NOSYNC;
- critical_exit();
+
kern_reboot(bootopt);
}
Modified: user/avg/xcpu/sys/kern/kern_sx.c
==============================================================================
--- user/avg/xcpu/sys/kern/kern_sx.c Mon May 9 07:04:14 2011 (r221676)
+++ user/avg/xcpu/sys/kern/kern_sx.c Mon May 9 07:05:06 2011 (r221677)
@@ -493,6 +493,9 @@ _sx_xlock_hard(struct sx *sx, uintptr_t
return (0);
}
+ if (IS_PANIC_THREAD())
+ return (0);
+
if (LOCK_LOG_TEST(&sx->lock_object, 0))
CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
@@ -691,6 +694,10 @@ _sx_xunlock_hard(struct sx *sx, uintptr_
CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx);
return;
}
+
+ if (IS_PANIC_THREAD())
+ return;
+
MPASS(sx->sx_lock & (SX_LOCK_SHARED_WAITERS |
SX_LOCK_EXCLUSIVE_WAITERS));
if (LOCK_LOG_TEST(&sx->lock_object, 0))
@@ -753,6 +760,9 @@ _sx_slock_hard(struct sx *sx, int opts,
int64_t sleep_time = 0;
#endif
+ if (IS_PANIC_THREAD())
+ return (0);
+
/*
* As with rwlocks, we don't make any attempt to try to block
* shared locks once there is an exclusive waiter.
@@ -919,6 +929,9 @@ _sx_sunlock_hard(struct sx *sx, const ch
uintptr_t x;
int wakeup_swapper;
+ if (IS_PANIC_THREAD())
+ return;
+
for (;;) {
x = sx->sx_lock;
Modified: user/avg/xcpu/sys/kern/kern_synch.c
==============================================================================
--- user/avg/xcpu/sys/kern/kern_synch.c Mon May 9 07:04:14 2011 (r221676)
+++ user/avg/xcpu/sys/kern/kern_synch.c Mon May 9 07:05:06 2011 (r221677)
@@ -158,7 +158,7 @@ _sleep(void *ident, struct lock_object *
else
class = NULL;
- if (cold) {
+ if (cold || IS_PANIC_THREAD()) {
/*
* During autoconfiguration, just return;
* don't run any other threads or panic below,
@@ -260,7 +260,7 @@ msleep_spin(void *ident, struct mtx *mtx
KASSERT(p != NULL, ("msleep1"));
KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep"));
- if (cold) {
+ if (cold || IS_PANIC_THREAD()) {
/*
* During autoconfiguration, just return;
* don't run any other threads or panic below,
Modified: user/avg/xcpu/sys/kern/subr_kdb.c
==============================================================================
--- user/avg/xcpu/sys/kern/subr_kdb.c Mon May 9 07:04:14 2011 (r221676)
+++ user/avg/xcpu/sys/kern/subr_kdb.c Mon May 9 07:05:06 2011 (r221677)
@@ -211,10 +211,7 @@ kdb_sysctl_trap_code(SYSCTL_HANDLER_ARGS
void
kdb_panic(const char *msg)
{
-
-#ifdef SMP
- stop_cpus_hard(PCPU_GET(other_cpus));
-#endif
+
printf("KDB: panic\n");
panic("%s", msg);
}
@@ -515,8 +512,11 @@ kdb_trap(int type, int code, struct trap
intr = intr_disable();
#ifdef SMP
- if ((did_stop_cpus = kdb_stop_cpus) != 0)
- stop_cpus_hard(PCPU_GET(other_cpus));
+ if (kdb_stop_cpus && panicstr == NULL && !kdb_active) {
+ stop_cpus_hard();
+ did_stop_cpus = 1;
+ } else
+ did_stop_cpus = 0;
#endif
kdb_active++;
@@ -543,7 +543,7 @@ kdb_trap(int type, int code, struct trap
#ifdef SMP
if (did_stop_cpus)
- restart_cpus(stopped_cpus);
+ unstop_cpus_hard();
#endif
intr_restore(intr);
Modified: user/avg/xcpu/sys/kern/subr_smp.c
==============================================================================
--- user/avg/xcpu/sys/kern/subr_smp.c Mon May 9 07:04:14 2011 (r221676)
+++ user/avg/xcpu/sys/kern/subr_smp.c Mon May 9 07:05:06 2011 (r221677)
@@ -55,6 +55,9 @@ __FBSDID("$FreeBSD$");
#ifdef SMP
volatile cpumask_t stopped_cpus;
volatile cpumask_t started_cpus;
+volatile cpumask_t hard_stopped_cpus;
+volatile cpumask_t hard_started_cpus;
+volatile cpumask_t hard_stopping_cpus;
cpumask_t idle_cpus_mask;
cpumask_t hlt_cpus_mask;
cpumask_t logical_cpus_mask;
@@ -207,9 +210,9 @@ generic_stop_cpus(cpumask_t map, u_int t
KASSERT(
#if defined(__amd64__)
- type == IPI_STOP || type == IPI_STOP_HARD || type == IPI_SUSPEND,
+ type == IPI_STOP || type == IPI_SUSPEND,
#else
- type == IPI_STOP || type == IPI_STOP_HARD,
+ type == IPI_STOP
#endif
("%s: invalid stop type", __func__));
@@ -249,13 +252,6 @@ stop_cpus(cpumask_t map)
return (generic_stop_cpus(map, IPI_STOP));
}
-int
-stop_cpus_hard(cpumask_t map)
-{
-
- return (generic_stop_cpus(map, IPI_STOP_HARD));
-}
-
#if defined(__amd64__)
int
suspend_cpus(cpumask_t map)
@@ -297,6 +293,82 @@ restart_cpus(cpumask_t map)
return 1;
}
+void
+stop_cpus_hard(void)
+{
+ static volatile u_int hard_stopper_cpu = NOCPU;
+ cpumask_t map;
+ cpumask_t mask;
+ u_int cpu;
+ int i;
+
+ if (!smp_started)
+ return;
+
+ /* Ensure non-preemtable context, just in case. */
+ spinlock_enter();
+
+ map = PCPU_GET(other_cpus);
+ mask = PCPU_GET(cpumask);
+ cpu = PCPU_GET(cpuid);
+
+ CTR2(KTR_SMP, "stop_cpus(%x) with %u type", map, IPI_STOP_HARD);
+
+ if (cpu != hard_stopper_cpu) {
+ while (atomic_cmpset_int(&hard_stopper_cpu, NOCPU, cpu) == 0)
+ while (hard_stopper_cpu != NOCPU) {
+ if ((mask & hard_stopping_cpus) != 0)
+ cpuhardstop_handler();
+ else
+ cpu_spinwait();
+ }
+ } else {
+ /*
+ * Recursion here is not expected.
+ */
+ atomic_store_rel_int(&hard_stopper_cpu, NOCPU);
+ panic("hard stop recursion\n");
+ }
+
+ atomic_set_int(&hard_stopping_cpus, map);
+ ipi_all_but_self(IPI_STOP_HARD);
+
+ i = 0;
+ while ((hard_stopped_cpus & map) != map) {
+ cpu_spinwait();
+ i++;
+ if (i == 10000000) {
+ /* Should not happen; other CPU stuck in NMI handler? */
+ printf("timeout stopping cpus\n");
+ break;
+ }
+ }
+
+ atomic_store_rel_int(&hard_stopper_cpu, NOCPU);
+
+ spinlock_exit();
+ return;
+}
+
+void
+unstop_cpus_hard(void)
+{
+ cpumask_t map;
+
+ if (!smp_started)
+ return;
+
+ map = PCPU_GET(other_cpus);
+ CTR1(KTR_SMP, "restart_cpus(%x)", map);
+
+ /* signal other cpus to restart */
+ atomic_store_rel_int(&hard_started_cpus, map);
+
+ /* wait for each to clear its bit */
+ while ((hard_stopped_cpus & map) != 0)
+ cpu_spinwait();
+}
+
/*
* All-CPU rendezvous. CPUs are signalled, all execute the setup function
* (if specified), rendezvous, execute the action function (if specified),
Modified: user/avg/xcpu/sys/sys/smp.h
==============================================================================
--- user/avg/xcpu/sys/sys/smp.h Mon May 9 07:04:14 2011 (r221676)
+++ user/avg/xcpu/sys/sys/smp.h Mon May 9 07:05:06 2011 (r221677)
@@ -73,6 +73,9 @@ extern int smp_active;
extern int smp_cpus;
extern volatile cpumask_t started_cpus;
extern volatile cpumask_t stopped_cpus;
+extern volatile cpumask_t hard_started_cpus;
+extern volatile cpumask_t hard_stopped_cpus;
+extern volatile cpumask_t hard_stopping_cpus;
extern cpumask_t idle_cpus_mask;
extern cpumask_t hlt_cpus_mask;
extern cpumask_t logical_cpus_mask;
@@ -161,10 +164,12 @@ void cpu_mp_start(void);
void forward_signal(struct thread *);
int restart_cpus(cpumask_t);
int stop_cpus(cpumask_t);
-int stop_cpus_hard(cpumask_t);
#if defined(__amd64__)
int suspend_cpus(cpumask_t);
#endif
+void stop_cpus_hard(void);
+void unstop_cpus_hard(void);
+void cpuhardstop_handler(void);
void smp_rendezvous_action(void);
extern struct mtx smp_ipi_mtx;
More information about the svn-src-user
mailing list