git: f0adc907fc7d - main - mlx5en: sync channel close with the rq completion processing

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Mon, 31 Mar 2025 19:00:12 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=f0adc907fc7d3eebfc692fd5f4987c97e61b103d

commit f0adc907fc7d3eebfc692fd5f4987c97e61b103d
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2025-03-04 07:45:17 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2025-03-31 18:59:50 +0000

    mlx5en: sync channel close with the rq completion processing
    
    Without the wait, mlx5e_destroy_rq() might free mbuf that is passed up
    to the network stack on receive in mlx5e_poll_rx_cq().
    
    Sponsored by:   NVidia networking
    MFC after:      1 week
---
 sys/dev/mlx5/mlx5_en/en.h           |  1 +
 sys/dev/mlx5/mlx5_en/mlx5_en_main.c | 11 +++++++++++
 sys/dev/mlx5/mlx5_en/mlx5_en_rx.c   |  5 +++++
 3 files changed, 17 insertions(+)

diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h
index fdc12381e91b..f59902be226a 100644
--- a/sys/dev/mlx5/mlx5_en/en.h
+++ b/sys/dev/mlx5/mlx5_en/en.h
@@ -772,6 +772,7 @@ struct mlx5e_rq {
 	struct mlx5e_cq cq;
 	struct lro_ctrl lro;
 	volatile int enabled;
+	int processing;
 	int	ix;
 
 	/* Dynamic Interrupt Moderation */
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
index 2c83492bdaa9..c3539cf8b8f2 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
@@ -1470,6 +1470,17 @@ static void
 mlx5e_close_rq_wait(struct mlx5e_rq *rq)
 {
 
+	mtx_lock(&rq->mtx);
+	MPASS(rq->enabled == 0);
+	while (rq->processing > 0) {
+		/*
+		 * No wakeup, relying on timeout.
+		 * Use msleep_sbt() since msleep() conflicts with linuxkpi.
+		 */
+		msleep_sbt(&rq->processing, &rq->mtx, 0, "mlx5ecrq",
+		    tick_sbt * hz, 0, C_HARDCLOCK);
+	}
+	mtx_unlock(&rq->mtx);
 	mlx5e_disable_rq(rq);
 	mlx5e_close_cq(&rq->cq);
 	cancel_work_sync(&rq->dim.work);
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
index 3aef10109720..6b53db6fea23 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
@@ -697,6 +697,9 @@ mlx5e_rx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused)
 	mtx_unlock(&c->iq.lock);
 
 	mtx_lock(&rq->mtx);
+	if (rq->enabled == 0)
+		goto out;
+	rq->processing++;
 
 	/*
 	 * Polling the entire CQ without posting new WQEs results in
@@ -717,6 +720,8 @@ mlx5e_rx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused)
 		net_dim(&rq->dim, rq->stats.packets, rq->stats.bytes);
 	mlx5e_cq_arm(&rq->cq, MLX5_GET_DOORBELL_LOCK(&rq->channel->priv->doorbell_lock));
 	tcp_lro_flush_all(&rq->lro);
+	rq->processing--;
+out:
 	mtx_unlock(&rq->mtx);
 
 	for (int j = 0; j != MLX5E_MAX_TX_NUM_TC; j++) {