git: aa7bbdabde89 - main - mlx5: Implement diagostic counters as sysctl(8) nodes.

From: Hans Petter Selasky <hselasky_at_FreeBSD.org>
Date: Tue, 18 Apr 2023 13:02:07 UTC
The branch main has been updated by hselasky:

URL: https://cgit.FreeBSD.org/src/commit/?id=aa7bbdabde890676d54a22eeec3af9b7681d19b7

commit aa7bbdabde890676d54a22eeec3af9b7681d19b7
Author:     Hans Petter Selasky <hselasky@FreeBSD.org>
AuthorDate: 2023-04-18 11:38:59 +0000
Commit:     Hans Petter Selasky <hselasky@FreeBSD.org>
CommitDate: 2023-04-18 13:01:07 +0000

    mlx5: Implement diagostic counters as sysctl(8) nodes.
    
    MFC after:      1 week
    Sponsored by:   NVIDIA Networking
---
 sys/conf/files                         |   2 +
 sys/dev/mlx5/driver.h                  |  22 ++
 sys/dev/mlx5/mlx5_core/diag_cnt.h      |  43 +++
 sys/dev/mlx5/mlx5_core/mlx5_diag_cnt.c | 579 +++++++++++++++++++++++++++++++++
 sys/dev/mlx5/mlx5_core/mlx5_main.c     |  14 +-
 sys/dev/mlx5/mlx5_ifc.h                |  36 ++
 sys/modules/mlx5/Makefile              |   1 +
 7 files changed, 696 insertions(+), 1 deletion(-)

diff --git a/sys/conf/files b/sys/conf/files
index 4e981a135b0c..e94652462a17 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4896,6 +4896,8 @@ dev/mlx5/mlx5_core/mlx5_cmd.c			optional mlx5 pci	\
 	compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_cq.c			optional mlx5 pci	\
 	compile-with "${OFED_C}"
+dev/mlx5/mlx5_core/mlx5_diag_cnt.c		optional mlx5 pci	\
+	compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_diagnostics.c		optional mlx5 pci	\
 	compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_eq.c			optional mlx5 pci	\
diff --git a/sys/dev/mlx5/driver.h b/sys/dev/mlx5/driver.h
index 47ed4a9d73f3..12e6de958371 100644
--- a/sys/dev/mlx5/driver.h
+++ b/sys/dev/mlx5/driver.h
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2013-2019, Mellanox Technologies, Ltd.  All rights reserved.
+ * Copyright (c) 2022 NVIDIA corporation & affiliates.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -652,6 +653,26 @@ struct mlx5_special_contexts {
 	int resd_lkey;
 };
 
+struct mlx5_diag_cnt_id {
+	u16	id;
+	bool	enabled;
+};
+
+struct mlx5_diag_cnt {
+#define	DIAG_LOCK(dc) mutex_lock(&(dc)->lock)
+#define	DIAG_UNLOCK(dc) mutex_unlock(&(dc)->lock)
+	struct mutex lock;
+	struct sysctl_ctx_list sysctl_ctx;
+	struct mlx5_diag_cnt_id *cnt_id;
+	u16	num_of_samples;
+	u16	sample_index;
+	u8	num_cnt_id;
+	u8	log_num_of_samples;
+	u8	log_sample_period;
+	u8	flag;
+	u8	ready;
+};
+
 struct mlx5_flow_root_namespace;
 struct mlx5_core_dev {
 	struct pci_dev	       *pdev;
@@ -681,6 +702,7 @@ struct mlx5_core_dev {
 	struct mlx5_priv	priv;
 	struct mlx5_profile	*profile;
 	atomic_t		num_qps;
+	struct mlx5_diag_cnt	diag_cnt;
 	u32			vsc_addr;
 	u32			issi;
 	struct mlx5_special_contexts special_contexts;
diff --git a/sys/dev/mlx5/mlx5_core/diag_cnt.h b/sys/dev/mlx5/mlx5_core/diag_cnt.h
new file mode 100644
index 000000000000..25c460b19382
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_core/diag_cnt.h
@@ -0,0 +1,43 @@
+/*-
+ * Copyright (c) 2018, Mellanox Technologies, Ltd.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef __MLX5_DIAG_CNT_H__
+#define	__MLX5_DIAG_CNT_H__
+
+#include <dev/mlx5/driver.h>
+#include <dev/mlx5/mlx5_core/mlx5_core.h>
+
+#define	MLX5_DIAG_CNT_SUPPORTED(mdev) \
+	(MLX5_CAP_GEN(mdev, debug) && \
+	 MLX5_CAP_GEN(mdev, num_of_diagnostic_counters))
+
+int	mlx5_diag_cnt_init(struct mlx5_core_dev *);
+void	mlx5_diag_cnt_cleanup(struct mlx5_core_dev *);
+
+int	mlx5_diag_query_params(struct mlx5_core_dev *);
+int	mlx5_diag_set_params(struct mlx5_core_dev *);
+int	mlx5_diag_query_counters(struct mlx5_core_dev *, u8 * *out_buffer);
+
+#endif					/* __MLX5_DIAG_CNT_H__ */
diff --git a/sys/dev/mlx5/mlx5_core/mlx5_diag_cnt.c b/sys/dev/mlx5/mlx5_core/mlx5_diag_cnt.c
new file mode 100644
index 000000000000..83f8bf2b148f
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_core/mlx5_diag_cnt.c
@@ -0,0 +1,579 @@
+/*-
+ * Copyright (c) 2018, Mellanox Technologies, Ltd.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <dev/mlx5/mlx5_core/diag_cnt.h>
+
+static int get_supported_cnt_ids(struct mlx5_core_dev *dev);
+static int enable_cnt_id(struct mlx5_core_dev *dev, u16 id);
+static void reset_cnt_id(struct mlx5_core_dev *dev);
+static void reset_params(struct mlx5_diag_cnt *diag_cnt);
+
+static int
+mlx5_sysctl_counter_id(SYSCTL_HANDLER_ARGS)
+{
+	struct mlx5_diag_cnt *diag_cnt;
+	struct mlx5_core_dev *dev;
+	uint16_t *ptr;
+	size_t max;
+	size_t num;
+	size_t x;
+	int err;
+
+	dev = arg1;
+	diag_cnt = &dev->diag_cnt;
+
+	max = MLX5_CAP_GEN(dev, num_of_diagnostic_counters);
+
+	ptr = kmalloc(sizeof(ptr[0]) * max, GFP_KERNEL);
+
+	DIAG_LOCK(diag_cnt);
+
+	for (x = num = 0; x != max; x++) {
+		if (diag_cnt->cnt_id[x].enabled)
+			ptr[num++] = diag_cnt->cnt_id[x].id;
+	}
+
+	err = SYSCTL_OUT(req, ptr, sizeof(ptr[0]) * num);
+	if (err || !req->newptr)
+		goto done;
+
+	num = req->newlen / sizeof(ptr[0]);
+	if (num > max) {
+		err = ENOMEM;
+		goto done;
+	}
+
+	err = SYSCTL_IN(req, ptr, sizeof(ptr[0]) * num);
+
+	reset_cnt_id(dev);
+
+	for (x = 0; x != num; x++) {
+		err = enable_cnt_id(dev, ptr[x]);
+		if (err)
+			goto done;
+	}
+
+	diag_cnt->num_cnt_id = num;
+done:
+	kfree(ptr);
+
+	if (err != 0 && req->newptr != NULL)
+		reset_cnt_id(dev);
+
+	DIAG_UNLOCK(diag_cnt);
+
+	return (err);
+}
+
+#define	NUM_OF_DIAG_PARAMS 5
+
+static int
+mlx5_sysctl_params(SYSCTL_HANDLER_ARGS)
+{
+	struct mlx5_diag_cnt *diag_cnt;
+	struct mlx5_core_dev *dev;
+	uint32_t temp[NUM_OF_DIAG_PARAMS];
+	int err;
+
+	dev = arg1;
+	diag_cnt = &dev->diag_cnt;
+
+	DIAG_LOCK(diag_cnt);
+
+	temp[0] = diag_cnt->log_num_of_samples;
+	temp[1] = diag_cnt->log_sample_period;
+	temp[2] = diag_cnt->flag;
+	temp[3] = diag_cnt->num_of_samples;
+	temp[4] = diag_cnt->sample_index;
+
+	err = SYSCTL_OUT(req, temp, sizeof(temp));
+	if (err || !req->newptr)
+		goto done;
+
+	err = SYSCTL_IN(req, temp, sizeof(temp));
+	if (err)
+		goto done;
+
+	reset_params(&dev->diag_cnt);
+
+	if (temp[0] > MLX5_CAP_DEBUG(dev, log_max_samples) ||
+	    (1U << (MLX5_CAP_DEBUG(dev, log_max_samples) - temp[0])) <
+	    diag_cnt->num_cnt_id) {
+		err = ERANGE;
+		goto done;
+	} else if (temp[1] < MLX5_CAP_DEBUG(dev, log_min_sample_period)) {
+		err = ERANGE;
+		goto done;
+	} else if (temp[2] >= 0x100) {
+		err = ERANGE;
+		goto done;
+	} else if (temp[3] > (1U << diag_cnt->log_num_of_samples)) {
+		err = ERANGE;
+		goto done;
+	} else if (temp[4] > (1U << diag_cnt->log_num_of_samples)) {
+		err = ERANGE;
+		goto done;
+	}
+
+	diag_cnt->log_num_of_samples = temp[0];
+	diag_cnt->log_sample_period = temp[1];
+	diag_cnt->flag = temp[2];
+	diag_cnt->num_of_samples = temp[3];
+	diag_cnt->sample_index = temp[4];
+done:
+	DIAG_UNLOCK(diag_cnt);
+
+	return (err);
+}
+
+static void
+decode_cnt_buffer(u32 num_of_samples, u8 *out, struct sbuf *sbuf)
+{
+	void *cnt;
+	u64 temp;
+	u32 i;
+
+	for (i = 0; i != num_of_samples; i++) {
+		cnt = MLX5_ADDR_OF(query_diagnostic_counters_out,
+		    out, diag_counter[i]);
+		temp = MLX5_GET(diagnostic_cntr_struct, cnt, counter_value_h);
+		temp = (temp << 32) |
+		    MLX5_GET(diagnostic_cntr_struct, cnt, counter_value_l);
+		sbuf_printf(sbuf,
+		    "0x%04x,0x%04x,0x%08x,0x%016llx\n",
+		    MLX5_GET(diagnostic_cntr_struct, cnt, counter_id),
+		    MLX5_GET(diagnostic_cntr_struct, cnt, sample_id),
+		    MLX5_GET(diagnostic_cntr_struct, cnt, time_stamp_31_0),
+		    (unsigned long long)temp);
+	}
+}
+
+static int
+mlx5_sysctl_dump_set(SYSCTL_HANDLER_ARGS)
+{
+	struct mlx5_diag_cnt *diag_cnt;
+	struct mlx5_core_dev *dev;
+	uint8_t temp;
+	int err;
+
+	dev = arg1;
+	diag_cnt = &dev->diag_cnt;
+
+	DIAG_LOCK(diag_cnt);
+
+	err = SYSCTL_OUT(req, &diag_cnt->ready, sizeof(diag_cnt->ready));
+	if (err || !req->newptr)
+		goto done;
+
+	err = SYSCTL_IN(req, &temp, sizeof(temp));
+	if (err)
+		goto done;
+
+	diag_cnt->ready = (temp != 0);
+	if (diag_cnt->ready != 0)
+		err = -mlx5_diag_set_params(dev);
+done:
+	DIAG_UNLOCK(diag_cnt);
+
+	return (err);
+}
+
+static int
+mlx5_sysctl_dump_get(SYSCTL_HANDLER_ARGS)
+{
+	struct mlx5_diag_cnt *diag_cnt;
+	struct mlx5_core_dev *dev;
+	struct sbuf sbuf;
+	u8 *out;
+	int err;
+
+	dev = arg1;
+	diag_cnt = &dev->diag_cnt;
+
+	err = sysctl_wire_old_buffer(req, 0);
+	if (err != 0)
+		return (err);
+
+	DIAG_LOCK(diag_cnt);
+
+	sbuf_new_for_sysctl(&sbuf, NULL, 65536, req);
+
+	if (diag_cnt->ready != 0) {
+		err = -mlx5_diag_query_counters(dev, &out);
+		if (err) {
+			sbuf_printf(&sbuf, "\nCould not query counters: %d\n", err);
+		} else {
+			sbuf_printf(&sbuf, "\n");
+			decode_cnt_buffer(diag_cnt->num_of_samples *
+			    diag_cnt->num_cnt_id, out, &sbuf);
+			kfree(out);
+		}
+	} else {
+		sbuf_printf(&sbuf, "\nDump was not set.\n");
+	}
+
+	err = sbuf_finish(&sbuf);
+
+	sbuf_delete(&sbuf);
+
+	DIAG_UNLOCK(diag_cnt);
+
+	return (err);
+}
+
+static int
+mlx5_sysctl_cap_read(SYSCTL_HANDLER_ARGS)
+{
+	struct mlx5_diag_cnt *diag_cnt;
+	struct mlx5_core_dev *dev;
+	struct sbuf sbuf;
+	int err;
+	u32 i;
+
+	dev = arg1;
+	diag_cnt = &dev->diag_cnt;
+
+	err = sysctl_wire_old_buffer(req, 0);
+	if (err != 0)
+		return (err);
+
+	DIAG_LOCK(diag_cnt);
+
+	sbuf_new_for_sysctl(&sbuf, NULL, 8192, req);
+
+	sbuf_printf(&sbuf, "\n");
+
+	/* print cap */
+	sbuf_printf(&sbuf, "log_max_samples=%d\n",
+	    MLX5_CAP_DEBUG(dev, log_max_samples));
+	sbuf_printf(&sbuf, "log_min_sample_period=%d\n",
+	    MLX5_CAP_DEBUG(dev, log_min_sample_period));
+	sbuf_printf(&sbuf, "repetitive=%d\n",
+	    MLX5_CAP_DEBUG(dev, repetitive));
+	sbuf_printf(&sbuf, "single=%d\n",
+	    MLX5_CAP_DEBUG(dev, single));
+	sbuf_printf(&sbuf, "num_of_diagnostic_counters=%d\n",
+	    MLX5_CAP_GEN(dev, num_of_diagnostic_counters));
+
+	/* print list of supported counter */
+	sbuf_printf(&sbuf, "supported counter id:\n");
+	for (i = 0; i != MLX5_CAP_GEN(dev, num_of_diagnostic_counters); i++)
+		sbuf_printf(&sbuf, "0x%04x,", diag_cnt->cnt_id[i].id);
+	sbuf_printf(&sbuf, "\n");
+
+	err = sbuf_finish(&sbuf);
+	sbuf_delete(&sbuf);
+
+	DIAG_UNLOCK(diag_cnt);
+
+	return (err);
+}
+
+static int
+get_supported_cnt_ids(struct mlx5_core_dev *dev)
+{
+	u32 num_counters = MLX5_CAP_GEN(dev, num_of_diagnostic_counters);
+	struct mlx5_diag_cnt *diag_cnt = &dev->diag_cnt;
+	u32 i;
+
+	diag_cnt->cnt_id = kzalloc(sizeof(*diag_cnt->cnt_id) * num_counters,
+	    GFP_KERNEL);
+	if (!diag_cnt->cnt_id)
+		return (-ENOMEM);
+
+	for (i = 0; i != num_counters; i++) {
+		diag_cnt->cnt_id[i].id =
+		    MLX5_CAP_DEBUG(dev, diagnostic_counter[i].counter_id);
+	}
+	return (0);
+}
+
+static void
+reset_cnt_id(struct mlx5_core_dev *dev)
+{
+	struct mlx5_diag_cnt *diag_cnt = &dev->diag_cnt;
+	u32 i;
+
+	diag_cnt->num_cnt_id = 0;
+	for (i = 0; i != MLX5_CAP_GEN(dev, num_of_diagnostic_counters); i++)
+		diag_cnt->cnt_id[i].enabled = false;
+}
+
+static int
+enable_cnt_id(struct mlx5_core_dev *dev, u16 id)
+{
+	struct mlx5_diag_cnt *diag_cnt = &dev->diag_cnt;
+	u32 i;
+
+	for (i = 0; i != MLX5_CAP_GEN(dev, num_of_diagnostic_counters); i++) {
+		if (diag_cnt->cnt_id[i].id == id) {
+			if (diag_cnt->cnt_id[i].enabled)
+				return (EINVAL);
+
+			diag_cnt->cnt_id[i].enabled = true;
+			break;
+		}
+	}
+
+	if (i == MLX5_CAP_GEN(dev, num_of_diagnostic_counters))
+		return (ENOENT);
+	else
+		return (0);
+}
+
+static void
+reset_params(struct mlx5_diag_cnt *diag_cnt)
+{
+	diag_cnt->log_num_of_samples = 0;
+	diag_cnt->log_sample_period = 0;
+	diag_cnt->flag = 0;
+	diag_cnt->num_of_samples = 0;
+	diag_cnt->sample_index = 0;
+}
+
+int
+mlx5_diag_set_params(struct mlx5_core_dev *dev)
+{
+	u8 out[MLX5_ST_SZ_BYTES(set_diagnostic_params_out)] = {0};
+	struct mlx5_diag_cnt *diag_cnt = &dev->diag_cnt;
+	void *cnt_id;
+	void *ctx;
+	u16 in_sz;
+	int err;
+	u8 *in;
+	u32 i;
+	u32 j;
+
+	if (!diag_cnt->num_cnt_id)
+		return (-EINVAL);
+
+	in_sz = MLX5_ST_SZ_BYTES(set_diagnostic_params_in) +
+	    diag_cnt->num_cnt_id * MLX5_ST_SZ_BYTES(counter_id);
+	in = kzalloc(in_sz, GFP_KERNEL);
+	if (!in)
+		return (-ENOMEM);
+
+	MLX5_SET(set_diagnostic_params_in, in, opcode,
+	    MLX5_CMD_OP_SET_DIAGNOSTICS);
+
+	ctx = MLX5_ADDR_OF(set_diagnostic_params_in, in,
+	    diagnostic_params_ctx);
+	MLX5_SET(diagnostic_params_context, ctx, num_of_counters,
+	    diag_cnt->num_cnt_id);
+	MLX5_SET(diagnostic_params_context, ctx, log_num_of_samples,
+	    diag_cnt->log_num_of_samples);
+
+	MLX5_SET(diagnostic_params_context, ctx, single,
+	    (diag_cnt->flag >> 7) & 1);
+	MLX5_SET(diagnostic_params_context, ctx, repetitive,
+	    (diag_cnt->flag >> 6) & 1);
+	MLX5_SET(diagnostic_params_context, ctx, sync,
+	    (diag_cnt->flag >> 5) & 1);
+	MLX5_SET(diagnostic_params_context, ctx, clear,
+	    (diag_cnt->flag >> 4) & 1);
+	MLX5_SET(diagnostic_params_context, ctx, on_demand,
+	    (diag_cnt->flag >> 3) & 1);
+	MLX5_SET(diagnostic_params_context, ctx, enable,
+	    (diag_cnt->flag >> 2) & 1);
+	MLX5_SET(diagnostic_params_context, ctx, log_sample_period,
+	    diag_cnt->log_sample_period);
+
+	for (i = j = 0; i != MLX5_CAP_GEN(dev, num_of_diagnostic_counters); i++) {
+		if (diag_cnt->cnt_id[i].enabled) {
+			cnt_id = MLX5_ADDR_OF(diagnostic_params_context,
+			    ctx, counter_id[j]);
+			MLX5_SET(counter_id, cnt_id, counter_id,
+			    diag_cnt->cnt_id[i].id);
+			j++;
+		}
+	}
+
+	err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+
+	kfree(in);
+	return (err);
+}
+
+/* This function is for debug purpose */
+int
+mlx5_diag_query_params(struct mlx5_core_dev *dev)
+{
+	u8 in[MLX5_ST_SZ_BYTES(query_diagnostic_params_in)] = {0};
+	struct mlx5_diag_cnt *diag_cnt = &dev->diag_cnt;
+	void *cnt_id;
+	u16 out_sz;
+	void *ctx;
+	int err;
+	u8 *out;
+	u32 i;
+
+	out_sz = MLX5_ST_SZ_BYTES(query_diagnostic_params_out) +
+	    diag_cnt->num_cnt_id * MLX5_ST_SZ_BYTES(counter_id);
+
+	out = kzalloc(out_sz, GFP_KERNEL);
+	if (!out)
+		return (-ENOMEM);
+
+	MLX5_SET(query_diagnostic_params_in, in, opcode,
+	    MLX5_CMD_OP_QUERY_DIAGNOSTIC_PARAMS);
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+	if (err)
+		goto out;
+
+	ctx = MLX5_ADDR_OF(query_diagnostic_params_out, out,
+	    diagnostic_params_ctx);
+	mlx5_core_dbg(dev, "single=%x\n",
+	    MLX5_GET(diagnostic_params_context, ctx, single));
+	mlx5_core_dbg(dev, "repetitive=%x\n",
+	    MLX5_GET(diagnostic_params_context, ctx, repetitive));
+	mlx5_core_dbg(dev, "sync=%x\n",
+	    MLX5_GET(diagnostic_params_context, ctx, sync));
+	mlx5_core_dbg(dev, "clear=%x\n",
+	    MLX5_GET(diagnostic_params_context, ctx, clear));
+	mlx5_core_dbg(dev, "on_demand=%x\n",
+	    MLX5_GET(diagnostic_params_context, ctx, on_demand));
+	mlx5_core_dbg(dev, "enable=%x\n",
+	    MLX5_GET(diagnostic_params_context, ctx, enable));
+	mlx5_core_dbg(dev, "log_sample_period=%x\n",
+	    MLX5_GET(diagnostic_params_context, ctx,
+	    log_sample_period));
+
+	for (i = 0; i != diag_cnt->num_cnt_id; i++) {
+		cnt_id = MLX5_ADDR_OF(diagnostic_params_context,
+		    ctx, counter_id[i]);
+		mlx5_core_dbg(dev, "counter_id[%d]=%x\n", i,
+		    MLX5_GET(counter_id, cnt_id, counter_id));
+	}
+out:
+	kfree(out);
+	return (err);
+}
+
+int
+mlx5_diag_query_counters(struct mlx5_core_dev *dev, u8 **out_buffer)
+{
+	u8 in[MLX5_ST_SZ_BYTES(query_diagnostic_counters_in)] = {0};
+	struct mlx5_diag_cnt *diag_cnt = &dev->diag_cnt;
+	u16 out_sz;
+	u8 *out;
+	int err;
+
+	out_sz = MLX5_ST_SZ_BYTES(query_diagnostic_counters_out) +
+	    diag_cnt->num_of_samples * diag_cnt->num_cnt_id *
+	    MLX5_ST_SZ_BYTES(diagnostic_cntr_struct);
+
+	out = kzalloc(out_sz, GFP_KERNEL);
+	if (!out)
+		return (-ENOMEM);
+
+	MLX5_SET(query_diagnostic_counters_in, in, opcode,
+	    MLX5_CMD_OP_QUERY_DIAGNOSTICS);
+	MLX5_SET(query_diagnostic_counters_in, in, num_of_samples,
+	    diag_cnt->num_of_samples);
+	MLX5_SET(query_diagnostic_counters_in, in, sample_index,
+	    diag_cnt->sample_index);
+
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+
+	if (!err)
+		*out_buffer = out;
+	else
+		kfree(out);
+
+	return (err);
+}
+
+int
+mlx5_diag_cnt_init(struct mlx5_core_dev *dev)
+{
+	struct mlx5_diag_cnt *diag_cnt = &dev->diag_cnt;
+	struct sysctl_oid *diag_cnt_sysctl_node;
+	int err;
+
+	if (!MLX5_DIAG_CNT_SUPPORTED(dev))
+		return (0);
+
+	mutex_init(&diag_cnt->lock);
+
+	/* Build private data */
+	err = get_supported_cnt_ids(dev);
+	if (err)
+		return (err);
+
+	sysctl_ctx_init(&diag_cnt->sysctl_ctx);
+
+	diag_cnt_sysctl_node = SYSCTL_ADD_NODE(&diag_cnt->sysctl_ctx,
+	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev->pdev->dev.bsddev)),
+	    OID_AUTO, "diag_cnt", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
+	    "Diagnostics counters");
+
+	if (diag_cnt_sysctl_node == NULL)
+		return (-ENOMEM);
+
+	SYSCTL_ADD_PROC(&diag_cnt->sysctl_ctx, SYSCTL_CHILDREN(diag_cnt_sysctl_node),
+	    OID_AUTO, "counter_id", CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE,
+	    dev, 0, mlx5_sysctl_counter_id, "SU", "Selected counter IDs");
+
+	SYSCTL_ADD_PROC(&diag_cnt->sysctl_ctx, SYSCTL_CHILDREN(diag_cnt_sysctl_node),
+	    OID_AUTO, "params", CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE,
+	    dev, 0, mlx5_sysctl_params, "IU",
+	    "Counter parameters: log_num_of_samples, log_sample_perios, flag, num_of_samples, sample_index");
+
+	SYSCTL_ADD_PROC(&diag_cnt->sysctl_ctx, SYSCTL_CHILDREN(diag_cnt_sysctl_node),
+	    OID_AUTO, "dump_set", CTLTYPE_U8 | CTLFLAG_RW | CTLFLAG_MPSAFE,
+	    dev, 0, mlx5_sysctl_dump_set, "CU",
+	    "Set dump parameters by writing 1 and enable dump_get. Write 0 to disable dump.");
+
+	SYSCTL_ADD_PROC(&diag_cnt->sysctl_ctx, SYSCTL_CHILDREN(diag_cnt_sysctl_node),
+	    OID_AUTO, "dump_get", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
+	    dev, 0, mlx5_sysctl_dump_get, "A",
+	    "Get dump parameters.");
+
+	SYSCTL_ADD_PROC(&diag_cnt->sysctl_ctx, SYSCTL_CHILDREN(diag_cnt_sysctl_node),
+	    OID_AUTO, "cap", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
+	    dev, 0, mlx5_sysctl_cap_read, "A",
+	    "Read capabilities.");
+
+	return (0);
+}
+
+void
+mlx5_diag_cnt_cleanup(struct mlx5_core_dev *dev)
+{
+	struct mlx5_diag_cnt *diag_cnt = &dev->diag_cnt;
+	void *ptr;
+
+	if (!MLX5_DIAG_CNT_SUPPORTED(dev))
+		return;
+
+	sysctl_ctx_free(&diag_cnt->sysctl_ctx);
+
+	ptr = diag_cnt->cnt_id;
+	diag_cnt->cnt_id = NULL;
+
+	kfree(ptr);
+
+	reset_params(diag_cnt);
+}
diff --git a/sys/dev/mlx5/mlx5_core/mlx5_main.c b/sys/dev/mlx5/mlx5_core/mlx5_main.c
index 9e9b7b845528..b258989e601b 100644
--- a/sys/dev/mlx5/mlx5_core/mlx5_main.c
+++ b/sys/dev/mlx5/mlx5_core/mlx5_main.c
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2013-2021, Mellanox Technologies, Ltd.  All rights reserved.
+ * Copyright (c) 2022 NVIDIA corporation & affiliates.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -50,6 +51,7 @@
 #include <dev/mlx5/mlx5_core/mlx5_core.h>
 #include <dev/mlx5/mlx5_core/eswitch.h>
 #include <dev/mlx5/mlx5_core/fs_core.h>
+#include <dev/mlx5/mlx5_core/diag_cnt.h>
 #ifdef PCI_IOV
 #include <sys/nv.h>
 #include <dev/pci/pci_iov.h>
@@ -1209,10 +1211,16 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 		goto err_mpfs;
 	}
 
+	err = mlx5_diag_cnt_init(dev);
+	if (err) {
+		mlx5_core_err(dev, "diag cnt init failed %d\n", err);
+		goto err_fpga;
+	}
+
 	err = mlx5_register_device(dev);
 	if (err) {
 		mlx5_core_err(dev, "mlx5_register_device failed %d\n", err);
-		goto err_fpga;
+		goto err_diag_cnt;
 	}
 
 	set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
@@ -1221,6 +1229,9 @@ out:
 	mutex_unlock(&dev->intf_state_mutex);
 	return 0;
 
+err_diag_cnt:
+	mlx5_diag_cnt_cleanup(dev);
+
 err_fpga:
 	mlx5_fpga_device_stop(dev);
 
@@ -1291,6 +1302,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	mlx5_unregister_device(dev);
 
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
+	mlx5_diag_cnt_cleanup(dev);
 	mlx5_fpga_device_stop(dev);
 	mlx5_mpfs_destroy(dev);
 	mlx5_cleanup_fs(dev);
diff --git a/sys/dev/mlx5/mlx5_ifc.h b/sys/dev/mlx5/mlx5_ifc.h
index 52bd65ccdb45..4336e561ecbb 100644
--- a/sys/dev/mlx5/mlx5_ifc.h
+++ b/sys/dev/mlx5/mlx5_ifc.h
@@ -216,6 +216,7 @@ enum {
 	MLX5_CMD_OP_DEACTIVATE_TRACER             = 0x815,
 	MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN        = 0x816,
 	MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN      = 0x817,
+	MLX5_CMD_OP_QUERY_DIAGNOSTIC_PARAMS       = 0x819,
 	MLX5_CMD_OP_SET_DIAGNOSTICS               = 0x820,
 	MLX5_CMD_OP_QUERY_DIAGNOSTICS             = 0x821,
 	MLX5_CMD_OP_QUERY_CONG_STATUS             = 0x822,
@@ -5337,6 +5338,22 @@ struct mlx5_ifc_dealloc_packet_reformat_context_in_bits {
 	u8         reserved_60[0x20];
 };
 
+struct mlx5_ifc_diagnostic_cntr_struct_bits {
+	u8         counter_id[0x10];
+	u8         sample_id[0x10];
+
+	u8         time_stamp_31_0[0x20];
+
+	u8         counter_value_h[0x20];
+
+	u8         counter_value_l[0x20];
+};
+
+enum {
+	MLX5_DIAGNOSTIC_PARAMS_CONTEXT_ENABLE_ENABLE   = 0x1,
+	MLX5_DIAGNOSTIC_PARAMS_CONTEXT_ENABLE_DISABLE  = 0x0,
+};
+
 struct mlx5_ifc_query_cq_out_bits {
 	u8         status[0x8];
 	u8         reserved_0[0x18];
@@ -7080,6 +7097,25 @@ struct mlx5_ifc_diagnostic_params_context_bits {
 	struct mlx5_ifc_counter_id_bits counter_id[0];
 };
 
+struct mlx5_ifc_query_diagnostic_params_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_query_diagnostic_params_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	struct mlx5_ifc_diagnostic_params_context_bits diagnostic_params_ctx;
+};
+
 struct mlx5_ifc_set_diagnostic_params_in_bits {
 	u8         opcode[0x10];
 	u8         reserved_0[0x10];
diff --git a/sys/modules/mlx5/Makefile b/sys/modules/mlx5/Makefile
index 3eb21722f777..263199f3d595 100644
--- a/sys/modules/mlx5/Makefile
+++ b/sys/modules/mlx5/Makefile
@@ -8,6 +8,7 @@ SRCS= \
 mlx5_alloc.c \
 mlx5_cmd.c \
 mlx5_cq.c \
+mlx5_diag_cnt.c \
 mlx5_diagnostics.c \
 mlx5_eq.c \
 mlx5_eswitch.c \