git: a0594d1f65d6 - stable/14 - ena: Add reset reason for missing admin interrupt

From: Osama Abboud <osamaabb_at_FreeBSD.org>
Date: Thu, 31 Oct 2024 16:01:29 UTC
The branch stable/14 has been updated by osamaabb:

URL: https://cgit.FreeBSD.org/src/commit/?id=a0594d1f65d67b2ff91cf9c0caa94cdebae7e04e

commit a0594d1f65d67b2ff91cf9c0caa94cdebae7e04e
Author:     Osama Abboud <osamaabb@amazon.com>
AuthorDate: 2024-08-07 06:24:19 +0000
Commit:     Osama Abboud <osamaabb@FreeBSD.org>
CommitDate: 2024-10-31 14:54:10 +0000

    ena: Add reset reason for missing admin interrupt
    
    There can be cases when we trigger reset if an admin interrupt
    is missing.
    In order to identify this use-case specifically,
    this commit adds a new reset reason.
    
    Approved by: cperciva (mentor)
    Sponsored by: Amazon, Inc.
    
    (cherry picked from commit 274319acb48424958242d55e1b0c7d4528da7f70)
---
 sys/dev/ena/ena.c        | 13 +++++++++++--
 sys/dev/ena/ena.h        |  5 ++++-
 sys/dev/ena/ena_sysctl.c |  4 ++++
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c
index 3f3a4946ccca..36e9ac15e8ff 100644
--- a/sys/dev/ena/ena.c
+++ b/sys/dev/ena/ena.c
@@ -3029,6 +3029,7 @@ static void
 check_for_missing_keep_alive(struct ena_adapter *adapter)
 {
 	sbintime_t timestamp, time;
+	enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
 
 	if (adapter->wd_active == 0)
 		return;
@@ -3040,7 +3041,10 @@ check_for_missing_keep_alive(struct ena_adapter *adapter)
 	time = getsbinuptime() - timestamp;
 	if (unlikely(time > adapter->keep_alive_timeout)) {
 		ena_log(adapter->pdev, ERR, "Keep alive watchdog timeout.\n");
-		ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO);
+		if (ena_com_aenq_has_keep_alive(adapter->ena_dev))
+			reset_reason = ENA_REGS_RESET_MISSING_ADMIN_INTERRUPT;
+
+		ena_trigger_reset(adapter, reset_reason);
 	}
 }
 
@@ -3048,10 +3052,15 @@ check_for_missing_keep_alive(struct ena_adapter *adapter)
 static void
 check_for_admin_com_state(struct ena_adapter *adapter)
 {
+	enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_ADMIN_TO;
 	if (unlikely(ena_com_get_admin_running_state(adapter->ena_dev) == false)) {
 		ena_log(adapter->pdev, ERR,
 		    "ENA admin queue is not in running state!\n");
-		ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO);
+		counter_u64_add(adapter->dev_stats.admin_q_pause, 1);
+		if (ena_com_get_missing_admin_interrupt(adapter->ena_dev))
+			reset_reason = ENA_REGS_RESET_MISSING_ADMIN_INTERRUPT;
+
+		ena_trigger_reset(adapter, reset_reason);
 	}
 }
 
diff --git a/sys/dev/ena/ena.h b/sys/dev/ena/ena.h
index b747736224d8..1a436a702ba1 100644
--- a/sys/dev/ena/ena.h
+++ b/sys/dev/ena/ena.h
@@ -391,6 +391,8 @@ struct ena_stats_dev {
 	counter_u64_t missing_intr;
 	counter_u64_t tx_desc_malformed;
 	counter_u64_t rx_desc_malformed;
+	counter_u64_t missing_admin_interrupt;
+	counter_u64_t admin_to;
 };
 
 struct ena_hw_stats {
@@ -542,7 +544,7 @@ struct ena_reset_stats_offset {
 
 static const struct ena_reset_stats_offset resets_to_stats_offset_map[ENA_REGS_RESET_LAST] = {
 	ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_KEEP_ALIVE_TO, wd_expired),
-	ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_ADMIN_TO, admin_q_pause),
+	ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_ADMIN_TO, admin_to),
 	ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_OS_TRIGGER, os_trigger),
 	ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_MISS_TX_CMPL, missing_tx_cmpl),
 	ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_INV_RX_REQ_ID, bad_rx_req_id),
@@ -552,6 +554,7 @@ static const struct ena_reset_stats_offset resets_to_stats_offset_map[ENA_REGS_R
 	ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_MISS_INTERRUPT, missing_intr),
 	ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_TX_DESCRIPTOR_MALFORMED, tx_desc_malformed),
 	ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_RX_DESCRIPTOR_MALFORMED, rx_desc_malformed),
+	ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_MISSING_ADMIN_INTERRUPT, missing_admin_interrupt),
 };
 
 int	ena_up(struct ena_adapter *adapter);
diff --git a/sys/dev/ena/ena_sysctl.c b/sys/dev/ena/ena_sysctl.c
index e46e835572fe..c711ecf66d7a 100644
--- a/sys/dev/ena/ena_sysctl.c
+++ b/sys/dev/ena/ena_sysctl.c
@@ -298,6 +298,10 @@ ena_sysctl_add_stats(struct ena_adapter *adapter)
 	    &dev_stats->tx_desc_malformed, "TX descriptors malformed count");
 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "rx_desc_malformed", CTLFLAG_RD,
 	    &dev_stats->rx_desc_malformed, "RX descriptors malformed count");
+	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_admin_interrupt", CTLFLAG_RD,
+	    &dev_stats->missing_admin_interrupt, "Missing admin interrupts count");
+	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_to", CTLFLAG_RD,
+	    &dev_stats->admin_to, "Admin queue timeouts count");
 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "total_resets", CTLFLAG_RD,
 	    &dev_stats->total_resets, "Total resets count");