git: aa41354349c1 - main - nvme: Optimize timeout code further
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 23 Jul 2024 23:03:40 UTC
The branch main has been updated by imp: URL: https://cgit.FreeBSD.org/src/commit/?id=aa41354349c16ea7220893010df78b47d67d0f74 commit aa41354349c16ea7220893010df78b47d67d0f74 Author: Warner Losh <imp@FreeBSD.org> AuthorDate: 2024-07-23 23:02:03 +0000 Commit: Warner Losh <imp@FreeBSD.org> CommitDate: 2024-07-23 23:04:02 +0000 nvme: Optimize timeout code further Optimize timeout code based on three observations. (1) The tr queues are sorted in order of submission, so the first one that could time out is the first "real" one on the list. (2) Timeouts for a given queue are all the same length (well, except at startup, where timeout doesn't matter, and when you change it at runtime, where timeouts will still happen eventually and the difference isn't worth optimizing for). (3) Calling the ISR races the real ISR and we should avoid that better. So now, after checking to see if the card is there and working, the timeout routine scans the pending tracker list until it finds a non-AER tracker. If the deadline hasn't passed, we return, doing nothing further. Otherwise, we call poll completions and then process the list looking for timed out items. This should move the timeout routine to touching hardware only when it's really necessary. It thus avoids racing the normal ISR, while still timig out stuck transactions quickly enough. There was also some minor code motion to make all of the above flow more nicely for the reader. When interrupts aren't working at all, then this will increase latency somewhat. But when interrupts aren't working at all, there's bigger problems and we should poll quite often in that case. That will be handled in future commits. Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D46026 --- sys/dev/nvme/nvme_qpair.c | 138 ++++++++++++++++++++++++++++++---------------- 1 file changed, 90 insertions(+), 48 deletions(-) diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c index e4286b61a3fc..c917b34dbe43 100644 --- a/sys/dev/nvme/nvme_qpair.c +++ b/sys/dev/nvme/nvme_qpair.c @@ -1045,8 +1045,8 @@ nvme_qpair_timeout(void *arg) struct nvme_controller *ctrlr = qpair->ctrlr; struct nvme_tracker *tr; sbintime_t now; - bool idle = false; - bool needs_reset; + bool idle = true; + bool fast; uint32_t csts; uint8_t cfs; @@ -1092,23 +1092,69 @@ nvme_qpair_timeout(void *arg) */ csts = nvme_mmio_read_4(ctrlr, csts); cfs = NVMEV(NVME_CSTS_REG_CFS, csts); - if (csts == NVME_GONE || cfs == 1) - goto do_reset; + if (csts == NVME_GONE || cfs == 1) { + /* + * We've had a command timeout that we weren't able to + * abort or we have aborts disabled and any command + * timed out. + * + * If we get here due to a possible surprise hot-unplug + * event, then we let nvme_ctrlr_reset confirm and fail + * the controller. + */ +do_reset: + nvme_printf(ctrlr, "Resetting controller due to a timeout%s.\n", + (csts == 0xffffffff) ? " and possible hot unplug" : + (cfs ? " and fatal error status" : "")); + qpair->recovery_state = RECOVERY_WAITING; + nvme_ctrlr_reset(ctrlr); + idle = false; + break; + } + /* - * Process completions. We already have the recovery lock, so - * call the locked version. + * See if there's any recovery needed. First, do a fast check to + * see if anything could have timed out. If not, then skip + * everything else. + */ + fast = false; + mtx_lock(&qpair->lock); + now = getsbinuptime(); + TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq) { + /* + * Skip async commands, they are posted to the card for + * an indefinite amount of time and have no deadline. + */ + if (tr->deadline == SBT_MAX) + continue; + + /* + * If the first real transaction is not in timeout, then + * we're done. Otherwise, we try recovery. + */ + idle = false; + if (now <= tr->deadline) + fast = true; + break; + } + mtx_unlock(&qpair->lock); + if (idle || fast) + break; + + /* + * There's a stale transaction at the start of the queue whose + * deadline has passed. Poll the competions as a last-ditch + * effort in case an interrupt has been missed. */ _nvme_qpair_process_completions(qpair); /* - * Check to see if we need to timeout any commands. If we do, then - * we also enter a recovery phase. + * Now that we've run the ISR, re-rheck to see if there's any + * timed out commands and abort them or reset the card if so. */ - now = getsbinuptime(); - needs_reset = false; - idle = true; mtx_lock(&qpair->lock); + idle = true; TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq) { /* * Skip async commands, they are posted to the card for @@ -1116,48 +1162,44 @@ nvme_qpair_timeout(void *arg) */ if (tr->deadline == SBT_MAX) continue; - if (now > tr->deadline) { - if (tr->req->cb_fn != nvme_abort_complete && - ctrlr->enable_aborts) { - /* - * This isn't an abort command, ask - * for a hardware abort. - */ - nvme_ctrlr_cmd_abort(ctrlr, tr->cid, - qpair->id, nvme_abort_complete, tr); - } else { - /* - * Otherwise we have a live command in - * the card (either one we couldn't - * abort, or aborts weren't enabled). - * The only safe way to proceed is to do - * a reset. - */ - needs_reset = true; - } + + /* + * If we know this tracker hasn't timed out, we also + * know all subsequent ones haven't timed out. The tr + * queue is in submission order and all normal commands + * in a queue have the same timeout (or the timeout was + * changed by the user, but we eventually timeout then). + */ + idle = false; + if (now <= tr->deadline) + break; + + /* + * Timeout expired, abort it or reset controller. + */ + if (ctrlr->enable_aborts && + tr->req->cb_fn != nvme_abort_complete) { + /* + * This isn't an abort command, ask for a + * hardware abort. This goes to the admin + * queue which will reset the card if it + * times out. + */ + nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id, + nvme_abort_complete, tr); } else { - idle = false; + /* + * We have a live command in the card (either + * one we couldn't abort, or aborts weren't + * enabled). We can only reset. + */ + mtx_unlock(&qpair->lock); + goto do_reset; } } mtx_unlock(&qpair->lock); - if (!needs_reset) - break; - - /* - * We've had a command timeout that we weren't able to abort - * - * If we get here due to a possible surprise hot-unplug event, - * then we let nvme_ctrlr_reset confirm and fail the - * controller. - */ - do_reset: - nvme_printf(ctrlr, "Resetting controller due to a timeout%s.\n", - (csts == 0xffffffff) ? " and possible hot unplug" : - (cfs ? " and fatal error status" : "")); - qpair->recovery_state = RECOVERY_WAITING; - nvme_ctrlr_reset(ctrlr); - idle = false; /* We want to keep polling */ break; + case RECOVERY_WAITING: /* * These messages aren't interesting while we're suspended. We