svn commit: r350928 - stable/12/sys/dev/nvme
Alexander Motin
mav at FreeBSD.org
Mon Aug 12 18:49:33 UTC 2019
Author: mav
Date: Mon Aug 12 18:49:32 2019
New Revision: 350928
URL: https://svnweb.freebsd.org/changeset/base/350928
Log:
MFC r348495 (by imp):
Since a fatal trap can happen at aribtrary times, don't panic when the
completions are not in a consistent state. Cope with the different
places the normal I/O completion polling thread can be interrupted and
then re-entered during a kernel panic + dump.
Modified:
stable/12/sys/dev/nvme/nvme_qpair.c
Directory Properties:
stable/12/ (props changed)
Modified: stable/12/sys/dev/nvme/nvme_qpair.c
==============================================================================
--- stable/12/sys/dev/nvme/nvme_qpair.c Mon Aug 12 18:48:47 2019 (r350927)
+++ stable/12/sys/dev/nvme/nvme_qpair.c Mon Aug 12 18:49:32 2019 (r350928)
@@ -31,6 +31,8 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/proc.h>
#include <dev/pci/pcivar.h>
@@ -308,7 +310,7 @@ get_status_string(uint16_t sct, uint16_t sc)
}
static void
-nvme_qpair_print_completion(struct nvme_qpair *qpair,
+nvme_qpair_print_completion(struct nvme_qpair *qpair,
struct nvme_completion *cpl)
{
uint16_t sct, sc;
@@ -479,18 +481,51 @@ nvme_qpair_process_completions(struct nvme_qpair *qpai
struct nvme_tracker *tr;
struct nvme_completion cpl;
int done = 0;
+ bool in_panic = dumping || SCHEDULER_STOPPED();
qpair->num_intr_handler_calls++;
+ /*
+ * qpair is not enabled, likely because a controller reset is is in
+ * progress. Ignore the interrupt - any I/O that was associated with
+ * this interrupt will get retried when the reset is complete.
+ */
if (!qpair->is_enabled)
- /*
- * qpair is not enabled, likely because a controller reset is
- * is in progress. Ignore the interrupt - any I/O that was
- * associated with this interrupt will get retried when the
- * reset is complete.
- */
return (false);
+ /*
+ * A panic can stop the CPU this routine is running on at any point. If
+ * we're called during a panic, complete the sq_head wrap protocol for
+ * the case where we are interrupted just after the increment at 1
+ * below, but before we can reset cq_head to zero at 2. Also cope with
+ * the case where we do the zero at 2, but may or may not have done the
+ * phase adjustment at step 3. The panic machinery flushes all pending
+ * memory writes, so we can make these strong ordering assumptions
+ * that would otherwise be unwise if we were racing in real time.
+ */
+ if (__predict_false(in_panic)) {
+ if (qpair->cq_head == qpair->num_entries) {
+ /*
+ * Here we know that we need to zero cq_head and then negate
+ * the phase, which hasn't been assigned if cq_head isn't
+ * zero due to the atomic_store_rel.
+ */
+ qpair->cq_head = 0;
+ qpair->phase = !qpair->phase;
+ } else if (qpair->cq_head == 0) {
+ /*
+ * In this case, we know that the assignment at 2
+ * happened below, but we don't know if it 3 happened or
+ * not. To do this, we look at the last completion
+ * entry and set the phase to the opposite phase
+ * that it has. This gets us back in sync
+ */
+ cpl = qpair->cpl[qpair->num_entries - 1];
+ nvme_completion_swapbytes(&cpl);
+ qpair->phase = !NVME_STATUS_GET_P(cpl.status);
+ }
+ }
+
bus_dmamap_sync(qpair->dma_tag, qpair->queuemem_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
while (1) {
@@ -508,17 +543,35 @@ nvme_qpair_process_completions(struct nvme_qpair *qpai
nvme_qpair_complete_tracker(qpair, tr, &cpl, ERROR_PRINT_ALL);
qpair->sq_head = cpl.sqhd;
done++;
- } else {
- nvme_printf(qpair->ctrlr,
+ } else if (!in_panic) {
+ /*
+ * A missing tracker is normally an error. However, a
+ * panic can stop the CPU this routine is running on
+ * after completing an I/O but before updating
+ * qpair->cq_head at 1 below. Later, we re-enter this
+ * routine to poll I/O associated with the kernel
+ * dump. We find that the tr has been set to null before
+ * calling the completion routine. If it hasn't
+ * completed (or it triggers a panic), then '1' below
+ * won't have updated cq_head. Rather than panic again,
+ * ignore this condition because it's not unexpected.
+ */
+ nvme_printf(qpair->ctrlr,
"cpl does not map to outstanding cmd\n");
/* nvme_dump_completion expects device endianess */
nvme_dump_completion(&qpair->cpl[qpair->cq_head]);
- KASSERT(0, ("received completion for unknown cmd\n"));
+ KASSERT(0, ("received completion for unknown cmd"));
}
- if (++qpair->cq_head == qpair->num_entries) {
- qpair->cq_head = 0;
- qpair->phase = !qpair->phase;
+ /*
+ * There's a number of races with the following (see above) when
+ * the system panics. We compensate for each one of them by
+ * using the atomic store to force strong ordering (at least when
+ * viewed in the aftermath of a panic).
+ */
+ if (++qpair->cq_head == qpair->num_entries) { /* 1 */
+ atomic_store_rel_int(&qpair->cq_head, 0); /* 2 */
+ qpair->phase = !qpair->phase; /* 3 */
}
nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl,
More information about the svn-src-stable-12
mailing list