git: d5141ea592ea - stable/13 - ena: Add support for device request reset message over AENQ

From: Osama Abboud <osamaabb_at_FreeBSD.org>
Date: Thu, 31 Oct 2024 16:00:13 UTC
The branch stable/13 has been updated by osamaabb:

URL: https://cgit.FreeBSD.org/src/commit/?id=d5141ea592eaf1f7fbc24420fb9a6b32fe4b9c7d

commit d5141ea592eaf1f7fbc24420fb9a6b32fe4b9c7d
Author:     Osama Abboud <osamaabb@amazon.com>
AuthorDate: 2024-08-07 06:24:21 +0000
Commit:     Osama Abboud <osamaabb@FreeBSD.org>
CommitDate: 2024-10-31 14:55:20 +0000

    ena: Add support for device request reset message over AENQ
    
    This commit adds a handler for the new aenq message
    ENA_ADMIN_DEVICE_REQUEST_RESET,
    which in turn causes the driver to trigger reset of a new type:
    ENA_REGS_RESET_DEVICE_REQUEST. Also adds counting of such occurrences in
    a new statistic for it.
    
    Approved by: cperciva (mentor)
    Sponsored by: Amazon, Inc.
    
    (cherry picked from commit 705879424bc76fcc925e78eb7643dbf4bd9a11eb)
---
 sys/dev/ena/ena.c        | 13 ++++++++++++-
 sys/dev/ena/ena.h        |  2 ++
 sys/dev/ena/ena_sysctl.c |  2 ++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c
index b09deb162a12..951fc6719aa3 100644
--- a/sys/dev/ena/ena.c
+++ b/sys/dev/ena/ena.c
@@ -2991,7 +2991,8 @@ ena_device_init(struct ena_adapter *adapter, device_t pdev,
 	    BIT(ENA_ADMIN_WARNING) |
 	    BIT(ENA_ADMIN_NOTIFICATION) |
 	    BIT(ENA_ADMIN_KEEP_ALIVE) |
-	    BIT(ENA_ADMIN_CONF_NOTIFICATIONS);
+	    BIT(ENA_ADMIN_CONF_NOTIFICATIONS) |
+	    BIT(ENA_ADMIN_DEVICE_REQUEST_RESET);
 
 	aenq_groups &= get_feat_ctx->aenq.supported_groups;
 	rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
@@ -4196,12 +4197,22 @@ static void ena_conf_notification(void *adapter_data,
 	}
 }
 
+static void ena_admin_device_request_reset(void *adapter_data,
+    struct ena_admin_aenq_entry *aenq_e)
+{
+	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+	ena_log(adapter->pdev, WARN,
+	    "The device has detected an unhealthy state, reset is requested\n");
+	ena_trigger_reset(adapter, ENA_REGS_RESET_DEVICE_REQUEST);
+}
+
 static struct ena_aenq_handlers aenq_handlers = {
     .handlers = {
 	    [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
 	    [ENA_ADMIN_NOTIFICATION] = ena_notification,
 	    [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
 	    [ENA_ADMIN_CONF_NOTIFICATIONS] = ena_conf_notification,
+	    [ENA_ADMIN_DEVICE_REQUEST_RESET] = ena_admin_device_request_reset,
     },
     .unimplemented_handler = unimplemented_aenq_handler
 };
diff --git a/sys/dev/ena/ena.h b/sys/dev/ena/ena.h
index c9eb9e8c43d3..7ec470638859 100644
--- a/sys/dev/ena/ena.h
+++ b/sys/dev/ena/ena.h
@@ -396,6 +396,7 @@ struct ena_stats_dev {
 	counter_u64_t rx_desc_malformed;
 	counter_u64_t missing_admin_interrupt;
 	counter_u64_t admin_to;
+	counter_u64_t device_request_reset;
 };
 
 struct ena_hw_stats {
@@ -558,6 +559,7 @@ static const struct ena_reset_stats_offset resets_to_stats_offset_map[ENA_REGS_R
 	ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_TX_DESCRIPTOR_MALFORMED, tx_desc_malformed),
 	ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_RX_DESCRIPTOR_MALFORMED, rx_desc_malformed),
 	ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_MISSING_ADMIN_INTERRUPT, missing_admin_interrupt),
+	ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_DEVICE_REQUEST, device_request_reset),
 };
 
 int	ena_up(struct ena_adapter *adapter);
diff --git a/sys/dev/ena/ena_sysctl.c b/sys/dev/ena/ena_sysctl.c
index e18328ddc8b2..ce6e0b7375e6 100644
--- a/sys/dev/ena/ena_sysctl.c
+++ b/sys/dev/ena/ena_sysctl.c
@@ -301,6 +301,8 @@ ena_sysctl_add_stats(struct ena_adapter *adapter)
 	    &dev_stats->missing_admin_interrupt, "Missing admin interrupts count");
 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_to", CTLFLAG_RD,
 	    &dev_stats->admin_to, "Admin queue timeouts count");
+	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "device_request_reset", CTLFLAG_RD,
+	    &dev_stats->device_request_reset, "Device reset requests count");
 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "total_resets", CTLFLAG_RD,
 	    &dev_stats->total_resets, "Total resets count");