git: 298e32773ff2 - stable/13 - ena: Add reset reason for missing admin interrupt
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 31 Oct 2024 16:00:03 UTC
The branch stable/13 has been updated by osamaabb: URL: https://cgit.FreeBSD.org/src/commit/?id=298e32773ff2fd1652a668ae56f38df03c61a946 commit 298e32773ff2fd1652a668ae56f38df03c61a946 Author: Osama Abboud <osamaabb@amazon.com> AuthorDate: 2024-08-07 06:24:19 +0000 Commit: Osama Abboud <osamaabb@FreeBSD.org> CommitDate: 2024-10-31 14:55:20 +0000 ena: Add reset reason for missing admin interrupt There can be cases when we trigger reset if an admin interrupt is missing. In order to identify this use-case specifically, this commit adds a new reset reason. Approved by: cperciva (mentor) Sponsored by: Amazon, Inc. (cherry picked from commit 274319acb48424958242d55e1b0c7d4528da7f70) --- sys/dev/ena/ena.c | 13 +++++++++++-- sys/dev/ena/ena.h | 5 ++++- sys/dev/ena/ena_sysctl.c | 4 ++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c index 32c93de821a3..a895ede8226f 100644 --- a/sys/dev/ena/ena.c +++ b/sys/dev/ena/ena.c @@ -3030,6 +3030,7 @@ static void check_for_missing_keep_alive(struct ena_adapter *adapter) { sbintime_t timestamp, time; + enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO; if (adapter->wd_active == 0) return; @@ -3041,7 +3042,10 @@ check_for_missing_keep_alive(struct ena_adapter *adapter) time = getsbinuptime() - timestamp; if (unlikely(time > adapter->keep_alive_timeout)) { ena_log(adapter->pdev, ERR, "Keep alive watchdog timeout.\n"); - ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO); + if (ena_com_aenq_has_keep_alive(adapter->ena_dev)) + reset_reason = ENA_REGS_RESET_MISSING_ADMIN_INTERRUPT; + + ena_trigger_reset(adapter, reset_reason); } } @@ -3049,10 +3053,15 @@ check_for_missing_keep_alive(struct ena_adapter *adapter) static void check_for_admin_com_state(struct ena_adapter *adapter) { + enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_ADMIN_TO; if (unlikely(ena_com_get_admin_running_state(adapter->ena_dev) == false)) { ena_log(adapter->pdev, ERR, "ENA admin queue is not in running state!\n"); - ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO); + counter_u64_add(adapter->dev_stats.admin_q_pause, 1); + if (ena_com_get_missing_admin_interrupt(adapter->ena_dev)) + reset_reason = ENA_REGS_RESET_MISSING_ADMIN_INTERRUPT; + + ena_trigger_reset(adapter, reset_reason); } } diff --git a/sys/dev/ena/ena.h b/sys/dev/ena/ena.h index b747736224d8..1a436a702ba1 100644 --- a/sys/dev/ena/ena.h +++ b/sys/dev/ena/ena.h @@ -391,6 +391,8 @@ struct ena_stats_dev { counter_u64_t missing_intr; counter_u64_t tx_desc_malformed; counter_u64_t rx_desc_malformed; + counter_u64_t missing_admin_interrupt; + counter_u64_t admin_to; }; struct ena_hw_stats { @@ -542,7 +544,7 @@ struct ena_reset_stats_offset { static const struct ena_reset_stats_offset resets_to_stats_offset_map[ENA_REGS_RESET_LAST] = { ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_KEEP_ALIVE_TO, wd_expired), - ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_ADMIN_TO, admin_q_pause), + ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_ADMIN_TO, admin_to), ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_OS_TRIGGER, os_trigger), ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_MISS_TX_CMPL, missing_tx_cmpl), ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_INV_RX_REQ_ID, bad_rx_req_id), @@ -552,6 +554,7 @@ static const struct ena_reset_stats_offset resets_to_stats_offset_map[ENA_REGS_R ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_MISS_INTERRUPT, missing_intr), ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_TX_DESCRIPTOR_MALFORMED, tx_desc_malformed), ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_RX_DESCRIPTOR_MALFORMED, rx_desc_malformed), + ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_MISSING_ADMIN_INTERRUPT, missing_admin_interrupt), }; int ena_up(struct ena_adapter *adapter); diff --git a/sys/dev/ena/ena_sysctl.c b/sys/dev/ena/ena_sysctl.c index e46e835572fe..c711ecf66d7a 100644 --- a/sys/dev/ena/ena_sysctl.c +++ b/sys/dev/ena/ena_sysctl.c @@ -298,6 +298,10 @@ ena_sysctl_add_stats(struct ena_adapter *adapter) &dev_stats->tx_desc_malformed, "TX descriptors malformed count"); SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "rx_desc_malformed", CTLFLAG_RD, &dev_stats->rx_desc_malformed, "RX descriptors malformed count"); + SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_admin_interrupt", CTLFLAG_RD, + &dev_stats->missing_admin_interrupt, "Missing admin interrupts count"); + SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_to", CTLFLAG_RD, + &dev_stats->admin_to, "Admin queue timeouts count"); SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "total_resets", CTLFLAG_RD, &dev_stats->total_resets, "Total resets count");