Panic with mca trap
John Baldwin
jhb at freebsd.org
Thu Feb 3 15:09:54 UTC 2011
On Thursday, February 03, 2011 8:05:31 am John Baldwin wrote:
> On Tuesday, February 01, 2011 11:58:12 am mdf at freebsd.org wrote:
> > On a piece of hardware trying to verify basic build tests, we got an
> > MCA exception that then panic'd the kernel due to WITNESS/INVARIANTS
> > interaction.
> >
> > panic @ time 1296563157.510, thread 0xffffff0005540000: blockable
> > sleep lock (sleep mutex) 128 @ /build/mnt/src/sys/vm/uma_core.c:1872
> >
> > Stack: --------------------------------------------------
> > kernel:witness_checkorder+0x7a2
> > kernel:_mtx_lock_flags+0x81
> > kernel:uma_zalloc_arg+0x256
> > kernel:malloc+0xc5
> > kernel:mca_record_entry+0x30
> > kernel:mca_scan+0xc9
> > kernel:mca_intr+0x79
> > kernel:trap+0x30b
> > kernel:witness_checkorder+0x66
> > kernel:_mtx_lock_spin_flags+0xa4
> > kernel:witness_checkorder+0x2a8
> > kernel:_mtx_lock_spin_flags+0xa4
> > kernel:tdq_idled+0xe8
> > kernel:sched_idletd+0x5b
> > kernel:fork_exit+0x9b
> >
> > That's this bit of code in uma_zalloc_arg():
> >
> > #ifdef INVARIANTS
> > ZONE_LOCK(zone);
> > uma_dbg_alloc(zone, NULL, item);
> > ZONE_UNLOCK(zone);
> > #endif
> >
> >
> > I don't know uma(9) well enough to know the best workaround. Clearly
> > there are times we can be in uma_zalloc_arg() and taking a regular
> > mutex is not acceptable. But what to do for the uma_dbg_free() call
> > so it's happy, and whether to guard taking the ZONE lock with M_NOWAIT
> > or td_critnest > 0 or both is outside my current knowledge.
> >
> > I don't expect we'll see this panic again any time soon, but it would
> > be nice to fix the story for WITNESS of when an M_NOWAIT allocation
> > can be done.
>
> Actually, this is more my fault. The machine check happened while the
> interrupted thread was already in a critical section (hence the WITNESS
> complaint). However, it really isn't correct to be calling malloc() from an
> arbitrary exception handler, especially one like MC# which can fire pretty
> much anywhere. I think instead that we should use malloc() when polling the
> machine check banks, but keep a pre-allocated pool of structures for use with
> MC# exceptions and CMC interrupts and replenish the pool via an asynchronous
> task.
This is what I'm thinking:
Index: mca.c
===================================================================
--- mca.c (revision 218221)
+++ mca.c (working copy)
@@ -85,6 +85,7 @@
static MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture");
static int mca_count; /* Number of records stored. */
+static int mca_banks; /* Number of per-CPU register banks. */
SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL, "Machine Check Architecture");
@@ -102,6 +103,8 @@
SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RD, &workaround_erratum383, 0,
"Is the workaround for Erratum 383 on AMD Family 10h processors enabled?");
+static STAILQ_HEAD(, mca_internal) mca_freelist;
+static int mca_freecount;
static STAILQ_HEAD(, mca_internal) mca_records;
static struct callout mca_timer;
static int mca_ticks = 3600; /* Check hourly by default. */
@@ -111,7 +114,6 @@
#ifdef DEV_APIC
static struct cmc_state **cmc_state; /* Indexed by cpuid, bank */
-static int cmc_banks;
static int cmc_throttle = 60; /* Time in seconds to throttle CMCI. */
#endif
@@ -415,21 +417,51 @@
return (1);
}
-static void __nonnull(1)
-mca_record_entry(const struct mca_record *record)
+static void
+mca_fill_freelist(void)
{
struct mca_internal *rec;
+ int desired;
- rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT);
- if (rec == NULL) {
- printf("MCA: Unable to allocate space for an event.\n");
- mca_log(record);
- return;
+ /*
+ * Ensure we have at least one record for each bank and one
+ * record per CPU.
+ */
+ desired = imax(mp_ncpus, mca_banks);
+ mtx_lock_spin(&mca_lock);
+ while (desired < mca_freecount) {
+ mtx_unlock_spin(&mca_lock);
+ rec = malloc(sizeof(*rec), M_MCA, M_WAITOK);
+ mtx_lock_spin(&mca_lock);
+ STAILQ_INSERT_TAIL(&mca_freelist, rec, link);
+ mca_freecount++;
}
+ mtx_unlock_spin(&mca_lock);
+}
+static void __nonnull(2)
+mca_record_entry(enum scan_mode mode, const struct mca_record *record)
+{
+ struct mca_internal *rec;
+
+ if (mode == POLLED) {
+ rec = malloc(sizeof(*rec), M_MCA, M_WAITOK);
+ mtx_lock_spin(&mca_lock);
+ } else {
+ mtx_lock_spin(&mca_lock);
+ rec = STAILQ_FIRST(&mca_freelist);
+ if (rec == NULL) {
+ mtx_unlock_spin(&mca_lock);
+ printf("MCA: Unable to allocate space for an event.\n");
+ mca_log(record);
+ return;
+ }
+ STAILQ_REMOVE_HEAD(&mca_freelist, link);
+ mca_freecount--;
+ }
+
rec->rec = *record;
rec->logged = 0;
- mtx_lock_spin(&mca_lock);
STAILQ_INSERT_TAIL(&mca_records, rec, link);
mca_count++;
mtx_unlock_spin(&mca_lock);
@@ -551,7 +583,7 @@
recoverable = 0;
mca_log(&rec);
}
- mca_record_entry(&rec);
+ mca_record_entry(mode, &rec);
}
#ifdef DEV_APIC
@@ -563,6 +595,8 @@
cmci_update(mode, i, valid, &rec);
#endif
}
+ if (mode == POLLED)
+ mca_fill_freelist();
return (mode == MCE ? recoverable : count);
}
@@ -642,15 +676,14 @@
#ifdef DEV_APIC
static void
-cmci_setup(uint64_t mcg_cap)
+cmci_setup(void)
{
int i;
cmc_state = malloc((mp_maxid + 1) * sizeof(struct cmc_state **),
M_MCA, M_WAITOK);
- cmc_banks = mcg_cap & MCG_CAP_COUNT;
for (i = 0; i <= mp_maxid; i++)
- cmc_state[i] = malloc(sizeof(struct cmc_state) * cmc_banks,
+ cmc_state[i] = malloc(sizeof(struct cmc_state) * mca_banks,
M_MCA, M_WAITOK | M_ZERO);
SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
"cmc_throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
@@ -672,10 +705,13 @@
CPUID_TO_FAMILY(cpu_id) == 0x10 && amd10h_L1TP)
workaround_erratum383 = 1;
+ mca_banks = mcg_cap & MCG_CAP_COUNT;
mtx_init(&mca_lock, "mca", NULL, MTX_SPIN);
STAILQ_INIT(&mca_records);
TASK_INIT(&mca_task, 0, mca_scan_cpus, NULL);
callout_init(&mca_timer, CALLOUT_MPSAFE);
+ STAILQ_INIT(&mca_freelist);
+ mca_fill_freelist();
SYSCTL_ADD_INT(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
"count", CTLFLAG_RD, &mca_count, 0, "Record count");
SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
@@ -689,7 +725,7 @@
sysctl_mca_scan, "I", "Force an immediate scan for machine checks");
#ifdef DEV_APIC
if (mcg_cap & MCG_CAP_CMCI_P)
- cmci_setup(mcg_cap);
+ cmci_setup();
#endif
}
@@ -707,7 +743,7 @@
struct cmc_state *cc;
uint64_t ctl;
- KASSERT(i < cmc_banks, ("CPU %d has more MC banks", PCPU_GET(cpuid)));
+ KASSERT(i < mca_banks, ("CPU %d has more MC banks", PCPU_GET(cpuid)));
ctl = rdmsr(MSR_MC_CTL2(i));
if (ctl & MC_CTL2_CMCI_EN)
@@ -751,7 +787,7 @@
struct cmc_state *cc;
uint64_t ctl;
- KASSERT(i < cmc_banks, ("CPU %d has more MC banks", PCPU_GET(cpuid)));
+ KASSERT(i < mca_banks, ("CPU %d has more MC banks", PCPU_GET(cpuid)));
/* Ignore banks not monitored by this CPU. */
if (!(PCPU_GET(cmci_mask) & 1 << i))
--
John Baldwin
More information about the freebsd-current
mailing list