git: 266c81aae38a - main - mlx5/mlx5en: Add SQ remap support

From: Hans Petter Selasky <hselasky_at_FreeBSD.org>
Date: Tue, 01 Feb 2022 15:24:24 UTC
The branch main has been updated by hselasky:

URL: https://cgit.FreeBSD.org/src/commit/?id=266c81aae38adec45effaa67dfec5cd06996f9d3

commit 266c81aae38adec45effaa67dfec5cd06996f9d3
Author:     Hans Petter Selasky <hselasky@FreeBSD.org>
AuthorDate: 2022-02-01 15:20:11 +0000
Commit:     Hans Petter Selasky <hselasky@FreeBSD.org>
CommitDate: 2022-02-01 15:21:15 +0000

    mlx5/mlx5en: Add SQ remap support
    
    Add support to map an SQ to a specific schedule queue using a
    special WQE as performance enhancement.
    
    SQ remap operation is handled by a privileged internal queue, IQ,
    and the mapping is enabled from one rate to another.
    
    The transition from paced to non-paced should however always go
    through FW.
    
    MFC after:      1 week
    Sponsored by:   NVIDIA Networking
---
 sys/dev/mlx5/device.h             |   1 +
 sys/dev/mlx5/driver.h             |  12 +++++
 sys/dev/mlx5/mlx5_core/mlx5_rl.c  |  29 ++++++++++
 sys/dev/mlx5/mlx5_en/en.h         |   6 +++
 sys/dev/mlx5/mlx5_en/mlx5_en_iq.c |  10 ++++
 sys/dev/mlx5/mlx5_en/mlx5_en_rl.c | 109 ++++++++++++++++++++++++++++++++++++--
 sys/dev/mlx5/mlx5_ifc.h           |  61 ++++++++++++++++++---
 sys/dev/mlx5/qp.h                 |   7 +++
 8 files changed, 225 insertions(+), 10 deletions(-)

diff --git a/sys/dev/mlx5/device.h b/sys/dev/mlx5/device.h
index 34e240241d63..bd52d04244af 100644
--- a/sys/dev/mlx5/device.h
+++ b/sys/dev/mlx5/device.h
@@ -391,6 +391,7 @@ enum {
 	MLX5_OPCODE_RCHECK_PSV		= 0x27,
 
 	MLX5_OPCODE_UMR			= 0x25,
+	MLX5_OPCODE_QOS_REMAP		= 0x2a,
 
 	MLX5_OPCODE_SIGNATURE_CANCELED	= (1 << 15),
 };
diff --git a/sys/dev/mlx5/driver.h b/sys/dev/mlx5/driver.h
index f13de140d4ef..16ada325b1be 100644
--- a/sys/dev/mlx5/driver.h
+++ b/sys/dev/mlx5/driver.h
@@ -50,6 +50,8 @@
 #define MLX5_QCOUNTER_SETS_NETDEV 64
 #define MLX5_MAX_NUMBER_OF_VFS 128
 
+#define MLX5_INVALID_QUEUE_HANDLE 0xffffffff
+
 enum {
 	MLX5_BOARD_ID_LEN = 64,
 	MLX5_MAX_NAME_LEN = 16,
@@ -545,6 +547,7 @@ struct mlx5_rl_entry {
 	u32			rate;
 	u16			burst;
 	u16			index;
+	u32			qos_handle; /* schedule queue handle */
 	u32			refcount;
 };
 
@@ -1189,6 +1192,15 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
 int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u32 burst, u16 *index);
 void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate, u32 burst);
 bool mlx5_rl_is_in_range(const struct mlx5_core_dev *dev, u32 rate, u32 burst);
+int mlx5e_query_rate_limit_cmd(struct mlx5_core_dev *dev, u16 index, u32 *scq_handle);
+
+static inline u32 mlx5_rl_get_scq_handle(struct mlx5_core_dev *dev, uint16_t index)
+{
+	KASSERT(index > 0,
+	    ("invalid rate index for sq remap, failed retrieving SCQ handle"));
+
+        return (dev->priv.rl_table.rl_entry[index - 1].qos_handle);
+}
 
 static inline bool mlx5_rl_is_supported(struct mlx5_core_dev *dev)
 {
diff --git a/sys/dev/mlx5/mlx5_core/mlx5_rl.c b/sys/dev/mlx5/mlx5_core/mlx5_rl.c
index aa8f351e0fc6..d522e72d0442 100644
--- a/sys/dev/mlx5/mlx5_core/mlx5_rl.c
+++ b/sys/dev/mlx5/mlx5_core/mlx5_rl.c
@@ -73,6 +73,25 @@ static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev,
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
+int mlx5e_query_rate_limit_cmd(struct mlx5_core_dev *dev,
+				   u16 index, u32 *scq_handle)
+{
+	int err;
+	u32 in[MLX5_ST_SZ_DW(query_pp_rate_limit_in)] = {};
+	u32 out[MLX5_ST_SZ_DW(query_pp_rate_limit_out)] = {};
+
+	MLX5_SET(query_pp_rate_limit_in, in, opcode, MLX5_CMD_OP_QUERY_RATE_LIMIT);
+	MLX5_SET(query_pp_rate_limit_in, in, rate_limit_index, index);
+
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	*scq_handle = MLX5_GET(query_pp_rate_limit_out, out, pp_context.qos_handle);
+
+	return 0;
+}
+
 bool mlx5_rl_is_in_range(const struct mlx5_core_dev *dev, u32 rate, u32 burst)
 {
 	const struct mlx5_rl_table *table = &dev->priv.rl_table;
@@ -122,6 +141,16 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u32 burst, u16 *index)
 		entry->rate = rate;
 		entry->burst = burst;
 		entry->refcount = 1;
+
+		if (MLX5_CAP_QOS(dev, qos_remap_pp)) {
+			err = mlx5e_query_rate_limit_cmd(dev, entry->index, &entry->qos_handle);
+			if (err) {
+				mlx5_core_err(dev, "Failed retrieving schedule queue handle for"
+				    "SQ remap: rate: %u error:(%d)\n", rate, err);
+				entry->qos_handle = MLX5_INVALID_QUEUE_HANDLE;
+			}
+		} else
+			entry->qos_handle = MLX5_INVALID_QUEUE_HANDLE;
 	}
 	*index = entry->index;
 
diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h
index 3305890d8914..c026021c7145 100644
--- a/sys/dev/mlx5/mlx5_en/en.h
+++ b/sys/dev/mlx5/mlx5_en/en.h
@@ -880,6 +880,7 @@ struct mlx5e_sq {
 	u16	max_inline;
 	u8	min_inline_mode;
 	u8	min_insert_caps;
+	u32	queue_handle; /* SQ remap support */
 #define	MLX5E_INSERT_VLAN 1
 #define	MLX5E_INSERT_NON_VLAN 2
 
@@ -1145,6 +1146,11 @@ struct mlx5e_tx_psv_wqe {
 	struct mlx5_seg_set_psv psv;
 };
 
+struct mlx5e_tx_qos_remap_wqe {
+	struct mlx5_wqe_ctrl_seg ctrl;
+	struct mlx5_wqe_qos_remap_seg qos_remap;
+};
+
 struct mlx5e_rx_wqe {
 	struct mlx5_wqe_srq_next_seg next;
 	struct mlx5_wqe_data_seg data[];
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_iq.c b/sys/dev/mlx5/mlx5_en/mlx5_en_iq.c
index 3bc4959e046f..e375864dc85f 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_iq.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_iq.c
@@ -262,6 +262,16 @@ mlx5e_iq_enable(struct mlx5e_iq *iq, struct mlx5e_sq_param *param,
 	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
 	MLX5_SET(sqc, sqc, allow_swp, 1);
 
+	/* SQ remap support requires reg_umr privileges level */
+	if (MLX5_CAP_QOS(iq->priv->mdev, qos_remap_pp)) {
+		MLX5_SET(sqc, sqc, qos_remap_en, 1);
+		if (MLX5_CAP_ETH(iq->priv->mdev, reg_umr_sq))
+			MLX5_SET(sqc, sqc, reg_umr, 1);
+		 else
+			mlx5_en_err(iq->priv->ifp,
+			    "No reg umr SQ capability, SQ remap disabled\n");
+	}
+
 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
 	MLX5_SET(wq, wq, uar_page, bfreg->index);
 	MLX5_SET(wq, wq, log_wq_pg_sz, iq->wq_ctrl.buf.page_shift -
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c
index 09a894608e6d..6fe4bd344710 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c
@@ -163,6 +163,29 @@ mlx5e_rl_destroy_sq(struct mlx5e_sq *sq)
 	bus_dma_tag_destroy(sq->dma_tag);
 }
 
+static int
+mlx5e_rl_query_sq(struct mlx5e_sq *sq)
+{
+	void *out;
+        int inlen;
+        int err;
+
+        inlen = MLX5_ST_SZ_BYTES(query_sq_out);
+        out = mlx5_vzalloc(inlen);
+        if (!out)
+                return -ENOMEM;
+
+        err = mlx5_core_query_sq(sq->priv->mdev, sq->sqn, out);
+        if (err)
+                goto out;
+
+        sq->queue_handle = MLX5_GET(query_sq_out, out, sq_context.queue_handle);
+
+out:
+        kvfree(out);
+        return err;
+}
+
 static int
 mlx5e_rl_open_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq,
     struct mlx5e_sq_param *param, int ix)
@@ -181,6 +204,16 @@ mlx5e_rl_open_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq,
 	if (err)
 		goto err_disable_sq;
 
+	if (MLX5_CAP_QOS(priv->mdev, qos_remap_pp)) {
+		err = mlx5e_rl_query_sq(sq);
+		if (err) {
+			mlx5_en_err(priv->ifp, "Failed retrieving send queue handle for"
+			    "SQ remap - sqn=%u, err=(%d)\n", sq->sqn, err);
+			sq->queue_handle = MLX5_INVALID_QUEUE_HANDLE;
+		}
+	} else
+		sq->queue_handle = MLX5_INVALID_QUEUE_HANDLE;
+
 	WRITE_ONCE(sq->running, 1);
 
 	return (0);
@@ -390,6 +423,68 @@ mlx5e_rl_find_best_rate_locked(struct mlx5e_rl_priv_data *rl, uint64_t user_rate
 	return (retval);
 }
 
+static int
+mlx5e_rl_post_sq_remap_wqe(struct mlx5e_iq *iq, u32 scq_handle, u32 sq_handle)
+{
+	const u32 ds_cnt = DIV_ROUND_UP(sizeof(struct mlx5e_tx_qos_remap_wqe),
+	            MLX5_SEND_WQE_DS);
+	struct mlx5e_tx_qos_remap_wqe *wqe;
+	int pi;
+
+	mtx_lock(&iq->lock);
+	pi = mlx5e_iq_get_producer_index(iq);
+	if (pi < 0) {
+		mtx_unlock(&iq->lock);
+		return (-ENOMEM);
+	}
+	wqe = mlx5_wq_cyc_get_wqe(&iq->wq, pi);
+
+	memset(wqe, 0, sizeof(*wqe));
+
+	wqe->qos_remap.qos_handle = cpu_to_be32(scq_handle);
+	wqe->qos_remap.queue_handle = cpu_to_be32(sq_handle);
+
+	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((iq->pc << 8) |
+	    MLX5_OPCODE_QOS_REMAP);
+	wqe->ctrl.qpn_ds = cpu_to_be32((iq->sqn << 8) | ds_cnt);
+	wqe->ctrl.imm = cpu_to_be32(iq->priv->tisn[0] << 8);
+	wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE | MLX5_FENCE_MODE_INITIATOR_SMALL;
+
+	/* copy data for doorbell */
+	memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32));
+
+	iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+	iq->pc += iq->data[pi].num_wqebbs;
+
+	mlx5e_iq_notify_hw(iq);
+
+	mtx_unlock(&iq->lock);
+
+	return (0); /* success */
+}
+
+static int
+mlx5e_rl_remap_sq(struct mlx5e_sq *sq, uint16_t index)
+{
+	struct mlx5e_channel *iq_channel;
+	u32	scq_handle;
+	u32	sq_handle;
+	int 	error;
+
+	/* Specific SQ remap operations should be handled by same IQ */
+	iq_channel = &sq->priv->channel[sq->sqn % sq->priv->params.num_channels];
+
+	sq_handle = sq->queue_handle;
+	scq_handle = mlx5_rl_get_scq_handle(sq->priv->mdev, index);
+
+	if (sq_handle == -1U || scq_handle == -1U)
+		error = -1;
+	else
+		error = mlx5e_rl_post_sq_remap_wqe(&iq_channel->iq, scq_handle, sq_handle);
+
+	return (error);
+}
+
 /*
  * This function sets the requested rate for a rate limit channel, in
  * bits per second. The requested rate will be filtered through the
@@ -405,6 +500,7 @@ mlx5e_rlw_channel_set_rate_locked(struct mlx5e_rl_worker *rlw,
 	uint16_t index;
 	uint16_t burst;
 	int error;
+	bool use_sq_remap;
 
 	if (rate != 0) {
 		MLX5E_RL_WORKER_UNLOCK(rlw);
@@ -448,6 +544,10 @@ mlx5e_rlw_channel_set_rate_locked(struct mlx5e_rl_worker *rlw,
 		burst = 0;	/* default */
 	}
 
+	/* paced <--> non-paced transitions must go via FW */
+	use_sq_remap = MLX5_CAP_QOS(rlw->priv->mdev, qos_remap_pp) &&
+	    channel->last_rate != 0 && rate != 0;
+
 	/* atomically swap rates */
 	temp = channel->last_rate;
 	channel->last_rate = rate;
@@ -468,11 +568,14 @@ mlx5e_rlw_channel_set_rate_locked(struct mlx5e_rl_worker *rlw,
 	/* set new rate, if SQ is running */
 	sq = channel->sq;
 	if (sq != NULL && READ_ONCE(sq->running) != 0) {
-		error = mlx5e_rl_modify_sq(sq, index);
-		if (error != 0)
-			atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL);
+		if (!use_sq_remap || mlx5e_rl_remap_sq(sq, index)) {
+			error = mlx5e_rl_modify_sq(sq, index);
+			if (error != 0)
+				atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL);
+		}
 	} else
 		error = 0;
+
 	MLX5E_RL_WORKER_LOCK(rlw);
 
 	return (-error);
diff --git a/sys/dev/mlx5/mlx5_ifc.h b/sys/dev/mlx5/mlx5_ifc.h
index 421a389a2bb1..04aea2d0be3f 100644
--- a/sys/dev/mlx5/mlx5_ifc.h
+++ b/sys/dev/mlx5/mlx5_ifc.h
@@ -736,7 +736,9 @@ struct mlx5_ifc_qos_cap_bits {
 	u8         packet_pacing_typical_size[0x1];
 	u8         reserved_at_7[0x19];
 
-	u8         reserved_at_20[0x20];
+	u8 	   reserved_at_20[0xA];
+	u8	   qos_remap_pp[0x1];
+	u8         reserved_at_2b[0x15];
 
 	u8         packet_pacing_max_rate[0x20];
 
@@ -926,9 +928,9 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         multi_pkt_send_wqe[0x2];
 	u8         wqe_inline_mode[0x2];
 	u8         rss_ind_tbl_cap[0x4];
-	u8         reg_umr_sq[0x1];
+	u8	   reg_umr_sq[0x1];
 	u8         scatter_fcs[0x1];
-	u8         enhanced_multi_pkt_send_wqe[0x1];
+	u8	   enhanced_multi_pkt_send_wqe[0x1];
 	u8         tunnel_lso_const_out_ip_id[0x1];
 	u8         tunnel_lro_gre[0x1];
 	u8         tunnel_lro_vxlan[0x1];
@@ -2478,7 +2480,9 @@ struct mlx5_ifc_sqc_bits {
 	u8         state[0x4];
 	u8         reg_umr[0x1];
 	u8         allow_swp[0x1];
-	u8         reserved_at_e[0xc];
+	u8         reserved_at_e[0x4];
+	u8	   qos_remap_en[0x1];
+	u8	   reserved_at_d[0x7];
 	u8         ts_format[0x2];
 	u8         reserved_at_1c[0x4];
 
@@ -2494,9 +2498,12 @@ struct mlx5_ifc_sqc_bits {
 	u8         packet_pacing_rate_limit_index[0x10];
 
 	u8         tis_lst_sz[0x10];
-	u8         reserved_4[0x10];
+	u8         qos_queue_group_id[0x10];
 
-	u8         reserved_5[0x40];
+	u8	   reserved_4[0x8];
+	u8	   queue_handle[0x18];
+
+	u8         reserved_5[0x20];
 
 	u8         reserved_6[0x8];
 	u8         tis_num_0[0x18];
@@ -2504,6 +2511,45 @@ struct mlx5_ifc_sqc_bits {
 	struct mlx5_ifc_wq_bits wq;
 };
 
+struct mlx5_ifc_query_pp_rate_limit_in_bits {
+	u8	   opcode[0x10];
+	u8	   uid[0x10];
+
+	u8	   reserved1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved2[0x10];
+        u8         rate_limit_index[0x10];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_pp_context_bits {
+	u8	   rate_limit[0x20];
+
+	u8	   burst_upper_bound[0x20];
+
+	u8	   reserved_1[0xc];
+	u8	   rate_mode[0x4];
+	u8	   typical_packet_size[0x10];
+
+	u8	   reserved_2[0x8];
+	u8	   qos_handle[0x18];
+
+	u8	   reserved_3[0x40];
+};
+
+struct mlx5_ifc_query_pp_rate_limit_out_bits {
+        u8	   status[0x8];
+	u8         reserved_1[0x18];
+
+        u8         syndrome[0x20];
+
+        u8         reserved_2[0x40];
+
+	struct mlx5_ifc_pp_context_bits pp_context;
+};
+
 enum {
 	MLX5_TSAR_TYPE_DWRR = 0,
 	MLX5_TSAR_TYPE_ROUND_ROUBIN = 1,
@@ -5747,12 +5793,13 @@ struct mlx5_ifc_modify_rqt_out_bits {
 };
 
 struct mlx5_ifc_rqt_bitmask_bits {
-	u8	   reserved_at_0[0x20];
+	u8         reserved_at_0[0x20];
 
 	u8         reserved_at_20[0x1f];
 	u8         rqn_list[0x1];
 };
 
+
 struct mlx5_ifc_modify_rqt_in_bits {
 	u8         opcode[0x10];
 	u8         uid[0x10];
diff --git a/sys/dev/mlx5/qp.h b/sys/dev/mlx5/qp.h
index 500cda4c613f..c3f526034bcb 100644
--- a/sys/dev/mlx5/qp.h
+++ b/sys/dev/mlx5/qp.h
@@ -322,6 +322,13 @@ struct mlx5_seg_set_psv {
 	__be32		ref_tag;
 };
 
+struct mlx5_wqe_qos_remap_seg {
+	u8		rsvd0[4];
+	u8		rsvd1[4];
+	__be32		qos_handle;
+	__be32		queue_handle;
+};
+
 struct mlx5_seg_get_psv {
 	u8		rsvd[19];
 	u8		num_psv;