git: d95431624f93 - main - nvme: Give up when we've failed
Date: Sun, 17 Sep 2023 15:16:29 UTC
The branch main has been updated by imp: URL: https://cgit.FreeBSD.org/src/commit/?id=d95431624f934fe4740211738fc787808005b14e commit d95431624f934fe4740211738fc787808005b14e Author: Warner Losh <imp@FreeBSD.org> AuthorDate: 2023-09-15 16:02:32 +0000 Commit: Warner Losh <imp@FreeBSD.org> CommitDate: 2023-09-17 15:11:56 +0000 nvme: Give up when we've failed Normally, we poll the device every so often to see if commands have timed out. However, we'll go into the recovery state as part of failing the drive. To account for all possibilties, if we're failed when we get into the polling function, just stop polling: Party is over. Sponsored by: Netflix --- sys/dev/nvme/nvme_qpair.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c index b256c4713c8d..4e37aa0e1020 100644 --- a/sys/dev/nvme/nvme_qpair.c +++ b/sys/dev/nvme/nvme_qpair.c @@ -1011,6 +1011,17 @@ nvme_qpair_timeout(void *arg) mtx_assert(&qpair->recovery, MA_OWNED); + /* + * If the controller has failed, give up. We're never going to change + * state from a failed controller: no further transactions are possible. + * We go ahead and let the timeout expire in many cases for simplicity. + */ + if (qpair->ctrlr->is_failed) { + nvme_printf(ctrlr, "Controller failed, giving up\n"); + qpair->timer_armed = false; + return; + } + switch (qpair->recovery_state) { case RECOVERY_NONE: /* @@ -1094,8 +1105,8 @@ nvme_qpair_timeout(void *arg) idle = false; /* We want to keep polling */ break; case RECOVERY_WAITING: - nvme_printf(ctrlr, "waiting for reset to complete\n"); - idle = false; /* We want to keep polling */ + nvme_printf(ctrlr, "Waiting for reset to complete\n"); + idle = false; /* We want to keep polling */ break; }