git: db0c751ed7f1 - stable/14 - ena: Add differentiation for missing TX completions reset

From: Osama Abboud <osamaabb_at_FreeBSD.org>
Date: Thu, 31 Oct 2024 16:01:31 UTC
The branch stable/14 has been updated by osamaabb:

URL: https://cgit.FreeBSD.org/src/commit/?id=db0c751ed7f1dc1c2c3daeaafa9c5354f48b8589

commit db0c751ed7f1dc1c2c3daeaafa9c5354f48b8589
Author:     Osama Abboud <osamaabb@amazon.com>
AuthorDate: 2024-08-07 06:24:19 +0000
Commit:     Osama Abboud <osamaabb@FreeBSD.org>
CommitDate: 2024-10-31 14:54:11 +0000

    ena: Add differentiation for missing TX completions reset
    
    This commit adds differentiation for a reset caused by missing tx
    completions, by verifying if the driver didn't receive tx
    completions caused by missing interrupts.
    The cleanup_running field was added to ena_ring because
    cleanup_task.ta_pending is zeroed before ena_cleanup() runs.
    
    Also ena_increment_reset_counter() API was added in order to support
    only incrementing the reset counter.
    
    Approved by: cperciva (mentor)
    Sponsored by: Amazon, Inc.
    
    (cherry picked from commit a33ec635d1f6d574d54e6f6d74766d070183be4c)
---
 sys/dev/ena/ena.c          | 45 ++++++++++++++++++++++++++++++++++++++++++++-
 sys/dev/ena/ena.h          | 29 ++++++++++++++++++-----------
 sys/dev/ena/ena_datapath.c | 21 +++++++++++++++------
 3 files changed, 77 insertions(+), 18 deletions(-)

diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c
index 8c20596d3d23..bbc857004671 100644
--- a/sys/dev/ena/ena.c
+++ b/sys/dev/ena/ena.c
@@ -169,6 +169,9 @@ static int ena_copy_eni_metrics(struct ena_adapter *);
 static int ena_copy_srd_metrics(struct ena_adapter *);
 static int ena_copy_customer_metrics(struct ena_adapter *);
 static void ena_timer_service(void *);
+static enum ena_regs_reset_reason_types check_cdesc_in_tx_cq(struct ena_adapter *,
+    struct ena_ring *);
+
 
 static char ena_version[] = ENA_DEVICE_NAME ENA_DRV_MODULE_NAME
     " v" ENA_DRV_MODULE_VERSION;
@@ -3088,6 +3091,31 @@ check_for_rx_interrupt_queue(struct ena_adapter *adapter,
 	return (0);
 }
 
+static enum ena_regs_reset_reason_types
+check_cdesc_in_tx_cq(struct ena_adapter *adapter,
+    struct ena_ring *tx_ring)
+{
+	device_t pdev = adapter->pdev;
+	int rc;
+	u16 req_id;
+
+	rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id);
+	/* TX CQ is empty */
+	if (rc == ENA_COM_TRY_AGAIN) {
+		ena_log(pdev, ERR,
+		    "No completion descriptors found in CQ %d\n",
+		    tx_ring->qid);
+		return ENA_REGS_RESET_MISS_TX_CMPL;
+	}
+
+	/* TX CQ has cdescs */
+	ena_log(pdev, ERR,
+	    "Completion descriptors found in CQ %d",
+	    tx_ring->qid);
+
+	return ENA_REGS_RESET_MISS_INTERRUPT;
+}
+
 static int
 check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
     struct ena_ring *tx_ring)
@@ -3100,6 +3128,8 @@ check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
 	int missing_tx_comp_to;
 	sbintime_t time_offset;
 	int i, rc = 0;
+	enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_MISS_TX_CMPL;
+	bool cleanup_scheduled, cleanup_running;
 
 	getbinuptime(&curtime);
 
@@ -3155,7 +3185,19 @@ check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
 		    "The number of lost tx completion is above the threshold "
 		    "(%d > %d). Reset the device\n",
 		    missed_tx, adapter->missing_tx_threshold);
-		ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_TX_CMPL);
+		/* Set the reset flag to prevent ena_cleanup() from running */
+		ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
+		/* Need to make sure that ENA_FLAG_TRIGGER_RESET is visible to ena_cleanup() and
+		 * that cleanup_running is visible to check_missing_comp_in_tx_queue() to
+		 * prevent the case of accessing CQ concurrently with check_cdesc_in_tx_cq()
+		 */
+		mb();
+		cleanup_scheduled = !!(atomic_load_16(&tx_ring->que->cleanup_task.ta_pending));
+		cleanup_running = !!(atomic_load_8((&tx_ring->cleanup_running)));
+		if (!(cleanup_scheduled || cleanup_running))
+			reset_reason = check_cdesc_in_tx_cq(adapter, tx_ring);
+
+		adapter->reset_reason = reset_reason;
 		rc = EIO;
 	}
 	/* Add the newly discovered missing TX completions */
@@ -3618,6 +3660,7 @@ ena_reset_task(void *arg, int pending)
 
 	ENA_LOCK_LOCK();
 	if (likely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
+		ena_increment_reset_counter(adapter);
 		ena_destroy_device(adapter, false);
 		ena_restore_device(adapter);
 
diff --git a/sys/dev/ena/ena.h b/sys/dev/ena/ena.h
index 876c3cd258aa..c9eb9e8c43d3 100644
--- a/sys/dev/ena/ena.h
+++ b/sys/dev/ena/ena.h
@@ -327,6 +327,7 @@ struct ena_ring {
 	};
 
 	uint8_t first_interrupt;
+	uint8_t cleanup_running;
 	uint16_t no_interrupt_event_cnt;
 
 	struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS];
@@ -584,21 +585,27 @@ ena_mbuf_count(struct mbuf *mbuf)
 }
 
 static inline void
-ena_trigger_reset(struct ena_adapter *adapter,
-    enum ena_regs_reset_reason_types reset_reason)
+ena_increment_reset_counter(struct ena_adapter *adapter)
 {
-	if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
-		const struct ena_reset_stats_offset *ena_reset_stats_offset =
-		    &resets_to_stats_offset_map[reset_reason];
+	enum ena_regs_reset_reason_types reset_reason = adapter->reset_reason;
+	const struct ena_reset_stats_offset *ena_reset_stats_offset =
+	    &resets_to_stats_offset_map[reset_reason];
 
-		if (ena_reset_stats_offset->has_counter) {
-			uint64_t *stat_ptr = (uint64_t *)&adapter->dev_stats +
-			    ena_reset_stats_offset->stat_offset;
+	if (ena_reset_stats_offset->has_counter) {
+		uint64_t *stat_ptr = (uint64_t *)&adapter->dev_stats +
+		    ena_reset_stats_offset->stat_offset;
 
-			counter_u64_add((counter_u64_t)(*stat_ptr), 1);
-		}
+		counter_u64_add((counter_u64_t)(*stat_ptr), 1);
+	}
+
+	counter_u64_add(adapter->dev_stats.total_resets, 1);
+}
 
-		counter_u64_add(adapter->dev_stats.total_resets, 1);
+static inline void
+ena_trigger_reset(struct ena_adapter *adapter,
+    enum ena_regs_reset_reason_types reset_reason)
+{
+	if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
 		adapter->reset_reason = reset_reason;
 		ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
 	}
diff --git a/sys/dev/ena/ena_datapath.c b/sys/dev/ena/ena_datapath.c
index 6cbe46cead3e..20864d0d2df6 100644
--- a/sys/dev/ena/ena_datapath.c
+++ b/sys/dev/ena/ena_datapath.c
@@ -77,17 +77,24 @@ ena_cleanup(void *arg, int pending)
 	int qid, ena_qid;
 	int txc, rxc, i;
 
-	if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
-		return;
-
-	ena_log_io(adapter->pdev, DBG, "MSI-X TX/RX routine\n");
-
 	tx_ring = que->tx_ring;
 	rx_ring = que->rx_ring;
 	qid = que->id;
 	ena_qid = ENA_IO_TXQ_IDX(qid);
 	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
 
+	atomic_store_8(&tx_ring->cleanup_running, 1);
+	/* Need to make sure that ENA_FLAG_TRIGGER_RESET is visible to ena_cleanup() and
+	 * that cleanup_running is visible to check_missing_comp_in_tx_queue() to
+	 * prevent the case of accessing CQ concurrently with check_cdesc_in_tx_cq()
+	 */
+	mb();
+	if (unlikely(((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
+	    (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))))
+		return;
+
+	ena_log_io(adapter->pdev, DBG, "MSI-X TX/RX routine\n");
+
 	atomic_store_8(&tx_ring->first_interrupt, 1);
 	atomic_store_8(&rx_ring->first_interrupt, 1);
 
@@ -95,7 +102,8 @@ ena_cleanup(void *arg, int pending)
 		rxc = ena_rx_cleanup(rx_ring);
 		txc = ena_tx_cleanup(tx_ring);
 
-		if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
+		if (unlikely(((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
+		    (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))))
 			return;
 
 		if ((txc != ENA_TX_BUDGET) && (rxc != ENA_RX_BUDGET))
@@ -107,6 +115,7 @@ ena_cleanup(void *arg, int pending)
 	    ENA_TX_IRQ_INTERVAL, true, false);
 	counter_u64_add(tx_ring->tx_stats.unmask_interrupt_num, 1);
 	ena_com_unmask_intr(io_cq, &intr_reg);
+	atomic_store_8(&tx_ring->cleanup_running, 0);
 }
 
 void