svn commit: r248754 - head/sys/dev/nvme
Jim Harris
jimharris at FreeBSD.org
Tue Mar 26 20:32:58 UTC 2013
Author: jimharris
Date: Tue Mar 26 20:32:57 2013
New Revision: 248754
URL: http://svnweb.freebsd.org/changeset/base/248754
Log:
By default, always escalate to controller reset when an I/O times out.
While aborts are typically cleaner than a full controller reset, many times
an I/O timeout indicates other controller-level issues where aborts may not
work. NVMe drivers for other operating systems are also defaulting to
controller reset rather than aborts for timed out I/O.
Sponsored by: Intel
Reviewed by: carl
Modified:
head/sys/dev/nvme/nvme_ctrlr.c
head/sys/dev/nvme/nvme_private.h
head/sys/dev/nvme/nvme_qpair.c
Modified: head/sys/dev/nvme/nvme_ctrlr.c
==============================================================================
--- head/sys/dev/nvme/nvme_ctrlr.c Tue Mar 26 20:32:46 2013 (r248753)
+++ head/sys/dev/nvme/nvme_ctrlr.c Tue Mar 26 20:32:57 2013 (r248754)
@@ -422,12 +422,8 @@ nvme_ctrlr_hw_reset(struct nvme_controll
void
nvme_ctrlr_reset(struct nvme_controller *ctrlr)
{
- int status;
- status = nvme_ctrlr_hw_reset(ctrlr);
- DELAY(100*1000);
- if (status == 0)
- taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->restart_task);
+ taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->reset_task);
}
static int
@@ -686,11 +682,24 @@ err:
}
static void
-nvme_ctrlr_restart_task(void *arg, int pending)
+nvme_ctrlr_reset_task(void *arg, int pending)
{
- struct nvme_controller *ctrlr = arg;
+ struct nvme_controller *ctrlr = arg;
+ int status;
- nvme_ctrlr_start(ctrlr);
+ device_printf(ctrlr->dev, "resetting controller");
+ status = nvme_ctrlr_hw_reset(ctrlr);
+ /*
+ * Use pause instead of DELAY, so that we yield to any nvme interrupt
+ * handlers on this CPU that were blocked on a qpair lock. We want
+ * all nvme interrupts completed before proceeding with restarting the
+ * controller.
+ *
+ * XXX - any way to guarantee the interrupt handlers have quiesced?
+ */
+ pause("nvmereset", hz / 10);
+ if (status == 0)
+ nvme_ctrlr_start(ctrlr);
}
static void
@@ -841,6 +850,9 @@ nvme_ctrlr_construct(struct nvme_control
ctrlr->force_intx = 0;
TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
+ ctrlr->enable_aborts = 0;
+ TUNABLE_INT_FETCH("hw.nvme.enable_aborts", &ctrlr->enable_aborts);
+
ctrlr->msix_enabled = 1;
if (ctrlr->force_intx) {
@@ -879,7 +891,7 @@ intx:
ctrlr->cdev->si_drv1 = (void *)ctrlr;
- TASK_INIT(&ctrlr->restart_task, 0, nvme_ctrlr_restart_task, ctrlr);
+ TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr);
ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK,
taskqueue_thread_enqueue, &ctrlr->taskqueue);
taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_DISK, "nvme taskq");
Modified: head/sys/dev/nvme/nvme_private.h
==============================================================================
--- head/sys/dev/nvme/nvme_private.h Tue Mar 26 20:32:46 2013 (r248753)
+++ head/sys/dev/nvme/nvme_private.h Tue Mar 26 20:32:57 2013 (r248754)
@@ -230,6 +230,7 @@ struct nvme_controller {
uint32_t msix_enabled;
uint32_t force_intx;
+ uint32_t enable_aborts;
uint32_t num_io_queues;
boolean_t per_cpu_io_queues;
@@ -239,7 +240,7 @@ struct nvme_controller {
uint32_t ns_identified;
uint32_t queues_created;
uint32_t num_start_attempts;
- struct task restart_task;
+ struct task reset_task;
struct taskqueue *taskqueue;
/* For shared legacy interrupt. */
Modified: head/sys/dev/nvme/nvme_qpair.c
==============================================================================
--- head/sys/dev/nvme/nvme_qpair.c Tue Mar 26 20:32:46 2013 (r248753)
+++ head/sys/dev/nvme/nvme_qpair.c Tue Mar 26 20:32:57 2013 (r248754)
@@ -460,21 +460,20 @@ nvme_timeout(void *arg)
struct nvme_controller *ctrlr = qpair->ctrlr;
union csts_register csts;
+ /* Read csts to get value of cfs - controller fatal status. */
csts.raw = nvme_mmio_read_4(ctrlr, csts);
- if (csts.bits.cfs == 1) {
+ device_printf(ctrlr->dev, "i/o timeout, csts.cfs=%d\n", csts.bits.cfs);
+ nvme_dump_command(&tr->req->cmd);
+
+ if (ctrlr->enable_aborts && csts.bits.cfs == 0) {
/*
- * The controller is reporting fatal status. Don't bother
- * trying to abort the timed out command - proceed
- * immediately to a controller-level reset.
+ * If aborts are enabled, only use them if the controller is
+ * not reporting fatal status.
*/
- device_printf(ctrlr->dev,
- "controller reports fatal status, resetting...\n");
+ nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
+ nvme_abort_complete, tr);
+ } else
nvme_ctrlr_reset(ctrlr);
- return;
- }
-
- nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
- nvme_abort_complete, tr);
}
void
More information about the svn-src-head
mailing list