git: 61dd6a2ea1aa - stable/13 - mlx5: Implement diagostic counters as sysctl(8) nodes.
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sun, 30 Apr 2023 06:58:10 UTC
The branch stable/13 has been updated by hselasky: URL: https://cgit.FreeBSD.org/src/commit/?id=61dd6a2ea1aa6349598bb1cc09976a4c76109ce7 commit 61dd6a2ea1aa6349598bb1cc09976a4c76109ce7 Author: Hans Petter Selasky <hselasky@FreeBSD.org> AuthorDate: 2023-04-18 11:38:59 +0000 Commit: Hans Petter Selasky <hselasky@FreeBSD.org> CommitDate: 2023-04-30 06:56:18 +0000 mlx5: Implement diagostic counters as sysctl(8) nodes. Sponsored by: NVIDIA Networking (cherry picked from commit aa7bbdabde890676d54a22eeec3af9b7681d19b7) --- sys/conf/files | 2 + sys/dev/mlx5/driver.h | 22 ++ sys/dev/mlx5/mlx5_core/diag_cnt.h | 43 +++ sys/dev/mlx5/mlx5_core/mlx5_diag_cnt.c | 579 +++++++++++++++++++++++++++++++++ sys/dev/mlx5/mlx5_core/mlx5_main.c | 14 +- sys/dev/mlx5/mlx5_ifc.h | 36 ++ sys/modules/mlx5/Makefile | 1 + 7 files changed, 696 insertions(+), 1 deletion(-) diff --git a/sys/conf/files b/sys/conf/files index 2258acd4c9e7..5546925bccd8 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4952,6 +4952,8 @@ dev/mlx5/mlx5_core/mlx5_cmd.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_cq.c optional mlx5 pci \ compile-with "${OFED_C}" +dev/mlx5/mlx5_core/mlx5_diag_cnt.c optional mlx5 pci \ + compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_diagnostics.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_eq.c optional mlx5 pci \ diff --git a/sys/dev/mlx5/driver.h b/sys/dev/mlx5/driver.h index 6d3a3be6562e..e1766ae8a742 100644 --- a/sys/dev/mlx5/driver.h +++ b/sys/dev/mlx5/driver.h @@ -1,5 +1,6 @@ /*- * Copyright (c) 2013-2019, Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2022 NVIDIA corporation & affiliates. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -656,6 +657,26 @@ struct mlx5_special_contexts { int resd_lkey; }; +struct mlx5_diag_cnt_id { + u16 id; + bool enabled; +}; + +struct mlx5_diag_cnt { +#define DIAG_LOCK(dc) mutex_lock(&(dc)->lock) +#define DIAG_UNLOCK(dc) mutex_unlock(&(dc)->lock) + struct mutex lock; + struct sysctl_ctx_list sysctl_ctx; + struct mlx5_diag_cnt_id *cnt_id; + u16 num_of_samples; + u16 sample_index; + u8 num_cnt_id; + u8 log_num_of_samples; + u8 log_sample_period; + u8 flag; + u8 ready; +}; + struct mlx5_flow_root_namespace; struct mlx5_core_dev { struct pci_dev *pdev; @@ -685,6 +706,7 @@ struct mlx5_core_dev { struct mlx5_priv priv; struct mlx5_profile *profile; atomic_t num_qps; + struct mlx5_diag_cnt diag_cnt; u32 vsc_addr; u32 issi; struct mlx5_special_contexts special_contexts; diff --git a/sys/dev/mlx5/mlx5_core/diag_cnt.h b/sys/dev/mlx5/mlx5_core/diag_cnt.h new file mode 100644 index 000000000000..25c460b19382 --- /dev/null +++ b/sys/dev/mlx5/mlx5_core/diag_cnt.h @@ -0,0 +1,43 @@ +/*- + * Copyright (c) 2018, Mellanox Technologies, Ltd. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef __MLX5_DIAG_CNT_H__ +#define __MLX5_DIAG_CNT_H__ + +#include <dev/mlx5/driver.h> +#include <dev/mlx5/mlx5_core/mlx5_core.h> + +#define MLX5_DIAG_CNT_SUPPORTED(mdev) \ + (MLX5_CAP_GEN(mdev, debug) && \ + MLX5_CAP_GEN(mdev, num_of_diagnostic_counters)) + +int mlx5_diag_cnt_init(struct mlx5_core_dev *); +void mlx5_diag_cnt_cleanup(struct mlx5_core_dev *); + +int mlx5_diag_query_params(struct mlx5_core_dev *); +int mlx5_diag_set_params(struct mlx5_core_dev *); +int mlx5_diag_query_counters(struct mlx5_core_dev *, u8 * *out_buffer); + +#endif /* __MLX5_DIAG_CNT_H__ */ diff --git a/sys/dev/mlx5/mlx5_core/mlx5_diag_cnt.c b/sys/dev/mlx5/mlx5_core/mlx5_diag_cnt.c new file mode 100644 index 000000000000..83f8bf2b148f --- /dev/null +++ b/sys/dev/mlx5/mlx5_core/mlx5_diag_cnt.c @@ -0,0 +1,579 @@ +/*- + * Copyright (c) 2018, Mellanox Technologies, Ltd. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <dev/mlx5/mlx5_core/diag_cnt.h> + +static int get_supported_cnt_ids(struct mlx5_core_dev *dev); +static int enable_cnt_id(struct mlx5_core_dev *dev, u16 id); +static void reset_cnt_id(struct mlx5_core_dev *dev); +static void reset_params(struct mlx5_diag_cnt *diag_cnt); + +static int +mlx5_sysctl_counter_id(SYSCTL_HANDLER_ARGS) +{ + struct mlx5_diag_cnt *diag_cnt; + struct mlx5_core_dev *dev; + uint16_t *ptr; + size_t max; + size_t num; + size_t x; + int err; + + dev = arg1; + diag_cnt = &dev->diag_cnt; + + max = MLX5_CAP_GEN(dev, num_of_diagnostic_counters); + + ptr = kmalloc(sizeof(ptr[0]) * max, GFP_KERNEL); + + DIAG_LOCK(diag_cnt); + + for (x = num = 0; x != max; x++) { + if (diag_cnt->cnt_id[x].enabled) + ptr[num++] = diag_cnt->cnt_id[x].id; + } + + err = SYSCTL_OUT(req, ptr, sizeof(ptr[0]) * num); + if (err || !req->newptr) + goto done; + + num = req->newlen / sizeof(ptr[0]); + if (num > max) { + err = ENOMEM; + goto done; + } + + err = SYSCTL_IN(req, ptr, sizeof(ptr[0]) * num); + + reset_cnt_id(dev); + + for (x = 0; x != num; x++) { + err = enable_cnt_id(dev, ptr[x]); + if (err) + goto done; + } + + diag_cnt->num_cnt_id = num; +done: + kfree(ptr); + + if (err != 0 && req->newptr != NULL) + reset_cnt_id(dev); + + DIAG_UNLOCK(diag_cnt); + + return (err); +} + +#define NUM_OF_DIAG_PARAMS 5 + +static int +mlx5_sysctl_params(SYSCTL_HANDLER_ARGS) +{ + struct mlx5_diag_cnt *diag_cnt; + struct mlx5_core_dev *dev; + uint32_t temp[NUM_OF_DIAG_PARAMS]; + int err; + + dev = arg1; + diag_cnt = &dev->diag_cnt; + + DIAG_LOCK(diag_cnt); + + temp[0] = diag_cnt->log_num_of_samples; + temp[1] = diag_cnt->log_sample_period; + temp[2] = diag_cnt->flag; + temp[3] = diag_cnt->num_of_samples; + temp[4] = diag_cnt->sample_index; + + err = SYSCTL_OUT(req, temp, sizeof(temp)); + if (err || !req->newptr) + goto done; + + err = SYSCTL_IN(req, temp, sizeof(temp)); + if (err) + goto done; + + reset_params(&dev->diag_cnt); + + if (temp[0] > MLX5_CAP_DEBUG(dev, log_max_samples) || + (1U << (MLX5_CAP_DEBUG(dev, log_max_samples) - temp[0])) < + diag_cnt->num_cnt_id) { + err = ERANGE; + goto done; + } else if (temp[1] < MLX5_CAP_DEBUG(dev, log_min_sample_period)) { + err = ERANGE; + goto done; + } else if (temp[2] >= 0x100) { + err = ERANGE; + goto done; + } else if (temp[3] > (1U << diag_cnt->log_num_of_samples)) { + err = ERANGE; + goto done; + } else if (temp[4] > (1U << diag_cnt->log_num_of_samples)) { + err = ERANGE; + goto done; + } + + diag_cnt->log_num_of_samples = temp[0]; + diag_cnt->log_sample_period = temp[1]; + diag_cnt->flag = temp[2]; + diag_cnt->num_of_samples = temp[3]; + diag_cnt->sample_index = temp[4]; +done: + DIAG_UNLOCK(diag_cnt); + + return (err); +} + +static void +decode_cnt_buffer(u32 num_of_samples, u8 *out, struct sbuf *sbuf) +{ + void *cnt; + u64 temp; + u32 i; + + for (i = 0; i != num_of_samples; i++) { + cnt = MLX5_ADDR_OF(query_diagnostic_counters_out, + out, diag_counter[i]); + temp = MLX5_GET(diagnostic_cntr_struct, cnt, counter_value_h); + temp = (temp << 32) | + MLX5_GET(diagnostic_cntr_struct, cnt, counter_value_l); + sbuf_printf(sbuf, + "0x%04x,0x%04x,0x%08x,0x%016llx\n", + MLX5_GET(diagnostic_cntr_struct, cnt, counter_id), + MLX5_GET(diagnostic_cntr_struct, cnt, sample_id), + MLX5_GET(diagnostic_cntr_struct, cnt, time_stamp_31_0), + (unsigned long long)temp); + } +} + +static int +mlx5_sysctl_dump_set(SYSCTL_HANDLER_ARGS) +{ + struct mlx5_diag_cnt *diag_cnt; + struct mlx5_core_dev *dev; + uint8_t temp; + int err; + + dev = arg1; + diag_cnt = &dev->diag_cnt; + + DIAG_LOCK(diag_cnt); + + err = SYSCTL_OUT(req, &diag_cnt->ready, sizeof(diag_cnt->ready)); + if (err || !req->newptr) + goto done; + + err = SYSCTL_IN(req, &temp, sizeof(temp)); + if (err) + goto done; + + diag_cnt->ready = (temp != 0); + if (diag_cnt->ready != 0) + err = -mlx5_diag_set_params(dev); +done: + DIAG_UNLOCK(diag_cnt); + + return (err); +} + +static int +mlx5_sysctl_dump_get(SYSCTL_HANDLER_ARGS) +{ + struct mlx5_diag_cnt *diag_cnt; + struct mlx5_core_dev *dev; + struct sbuf sbuf; + u8 *out; + int err; + + dev = arg1; + diag_cnt = &dev->diag_cnt; + + err = sysctl_wire_old_buffer(req, 0); + if (err != 0) + return (err); + + DIAG_LOCK(diag_cnt); + + sbuf_new_for_sysctl(&sbuf, NULL, 65536, req); + + if (diag_cnt->ready != 0) { + err = -mlx5_diag_query_counters(dev, &out); + if (err) { + sbuf_printf(&sbuf, "\nCould not query counters: %d\n", err); + } else { + sbuf_printf(&sbuf, "\n"); + decode_cnt_buffer(diag_cnt->num_of_samples * + diag_cnt->num_cnt_id, out, &sbuf); + kfree(out); + } + } else { + sbuf_printf(&sbuf, "\nDump was not set.\n"); + } + + err = sbuf_finish(&sbuf); + + sbuf_delete(&sbuf); + + DIAG_UNLOCK(diag_cnt); + + return (err); +} + +static int +mlx5_sysctl_cap_read(SYSCTL_HANDLER_ARGS) +{ + struct mlx5_diag_cnt *diag_cnt; + struct mlx5_core_dev *dev; + struct sbuf sbuf; + int err; + u32 i; + + dev = arg1; + diag_cnt = &dev->diag_cnt; + + err = sysctl_wire_old_buffer(req, 0); + if (err != 0) + return (err); + + DIAG_LOCK(diag_cnt); + + sbuf_new_for_sysctl(&sbuf, NULL, 8192, req); + + sbuf_printf(&sbuf, "\n"); + + /* print cap */ + sbuf_printf(&sbuf, "log_max_samples=%d\n", + MLX5_CAP_DEBUG(dev, log_max_samples)); + sbuf_printf(&sbuf, "log_min_sample_period=%d\n", + MLX5_CAP_DEBUG(dev, log_min_sample_period)); + sbuf_printf(&sbuf, "repetitive=%d\n", + MLX5_CAP_DEBUG(dev, repetitive)); + sbuf_printf(&sbuf, "single=%d\n", + MLX5_CAP_DEBUG(dev, single)); + sbuf_printf(&sbuf, "num_of_diagnostic_counters=%d\n", + MLX5_CAP_GEN(dev, num_of_diagnostic_counters)); + + /* print list of supported counter */ + sbuf_printf(&sbuf, "supported counter id:\n"); + for (i = 0; i != MLX5_CAP_GEN(dev, num_of_diagnostic_counters); i++) + sbuf_printf(&sbuf, "0x%04x,", diag_cnt->cnt_id[i].id); + sbuf_printf(&sbuf, "\n"); + + err = sbuf_finish(&sbuf); + sbuf_delete(&sbuf); + + DIAG_UNLOCK(diag_cnt); + + return (err); +} + +static int +get_supported_cnt_ids(struct mlx5_core_dev *dev) +{ + u32 num_counters = MLX5_CAP_GEN(dev, num_of_diagnostic_counters); + struct mlx5_diag_cnt *diag_cnt = &dev->diag_cnt; + u32 i; + + diag_cnt->cnt_id = kzalloc(sizeof(*diag_cnt->cnt_id) * num_counters, + GFP_KERNEL); + if (!diag_cnt->cnt_id) + return (-ENOMEM); + + for (i = 0; i != num_counters; i++) { + diag_cnt->cnt_id[i].id = + MLX5_CAP_DEBUG(dev, diagnostic_counter[i].counter_id); + } + return (0); +} + +static void +reset_cnt_id(struct mlx5_core_dev *dev) +{ + struct mlx5_diag_cnt *diag_cnt = &dev->diag_cnt; + u32 i; + + diag_cnt->num_cnt_id = 0; + for (i = 0; i != MLX5_CAP_GEN(dev, num_of_diagnostic_counters); i++) + diag_cnt->cnt_id[i].enabled = false; +} + +static int +enable_cnt_id(struct mlx5_core_dev *dev, u16 id) +{ + struct mlx5_diag_cnt *diag_cnt = &dev->diag_cnt; + u32 i; + + for (i = 0; i != MLX5_CAP_GEN(dev, num_of_diagnostic_counters); i++) { + if (diag_cnt->cnt_id[i].id == id) { + if (diag_cnt->cnt_id[i].enabled) + return (EINVAL); + + diag_cnt->cnt_id[i].enabled = true; + break; + } + } + + if (i == MLX5_CAP_GEN(dev, num_of_diagnostic_counters)) + return (ENOENT); + else + return (0); +} + +static void +reset_params(struct mlx5_diag_cnt *diag_cnt) +{ + diag_cnt->log_num_of_samples = 0; + diag_cnt->log_sample_period = 0; + diag_cnt->flag = 0; + diag_cnt->num_of_samples = 0; + diag_cnt->sample_index = 0; +} + +int +mlx5_diag_set_params(struct mlx5_core_dev *dev) +{ + u8 out[MLX5_ST_SZ_BYTES(set_diagnostic_params_out)] = {0}; + struct mlx5_diag_cnt *diag_cnt = &dev->diag_cnt; + void *cnt_id; + void *ctx; + u16 in_sz; + int err; + u8 *in; + u32 i; + u32 j; + + if (!diag_cnt->num_cnt_id) + return (-EINVAL); + + in_sz = MLX5_ST_SZ_BYTES(set_diagnostic_params_in) + + diag_cnt->num_cnt_id * MLX5_ST_SZ_BYTES(counter_id); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return (-ENOMEM); + + MLX5_SET(set_diagnostic_params_in, in, opcode, + MLX5_CMD_OP_SET_DIAGNOSTICS); + + ctx = MLX5_ADDR_OF(set_diagnostic_params_in, in, + diagnostic_params_ctx); + MLX5_SET(diagnostic_params_context, ctx, num_of_counters, + diag_cnt->num_cnt_id); + MLX5_SET(diagnostic_params_context, ctx, log_num_of_samples, + diag_cnt->log_num_of_samples); + + MLX5_SET(diagnostic_params_context, ctx, single, + (diag_cnt->flag >> 7) & 1); + MLX5_SET(diagnostic_params_context, ctx, repetitive, + (diag_cnt->flag >> 6) & 1); + MLX5_SET(diagnostic_params_context, ctx, sync, + (diag_cnt->flag >> 5) & 1); + MLX5_SET(diagnostic_params_context, ctx, clear, + (diag_cnt->flag >> 4) & 1); + MLX5_SET(diagnostic_params_context, ctx, on_demand, + (diag_cnt->flag >> 3) & 1); + MLX5_SET(diagnostic_params_context, ctx, enable, + (diag_cnt->flag >> 2) & 1); + MLX5_SET(diagnostic_params_context, ctx, log_sample_period, + diag_cnt->log_sample_period); + + for (i = j = 0; i != MLX5_CAP_GEN(dev, num_of_diagnostic_counters); i++) { + if (diag_cnt->cnt_id[i].enabled) { + cnt_id = MLX5_ADDR_OF(diagnostic_params_context, + ctx, counter_id[j]); + MLX5_SET(counter_id, cnt_id, counter_id, + diag_cnt->cnt_id[i].id); + j++; + } + } + + err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); + + kfree(in); + return (err); +} + +/* This function is for debug purpose */ +int +mlx5_diag_query_params(struct mlx5_core_dev *dev) +{ + u8 in[MLX5_ST_SZ_BYTES(query_diagnostic_params_in)] = {0}; + struct mlx5_diag_cnt *diag_cnt = &dev->diag_cnt; + void *cnt_id; + u16 out_sz; + void *ctx; + int err; + u8 *out; + u32 i; + + out_sz = MLX5_ST_SZ_BYTES(query_diagnostic_params_out) + + diag_cnt->num_cnt_id * MLX5_ST_SZ_BYTES(counter_id); + + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return (-ENOMEM); + + MLX5_SET(query_diagnostic_params_in, in, opcode, + MLX5_CMD_OP_QUERY_DIAGNOSTIC_PARAMS); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; + + ctx = MLX5_ADDR_OF(query_diagnostic_params_out, out, + diagnostic_params_ctx); + mlx5_core_dbg(dev, "single=%x\n", + MLX5_GET(diagnostic_params_context, ctx, single)); + mlx5_core_dbg(dev, "repetitive=%x\n", + MLX5_GET(diagnostic_params_context, ctx, repetitive)); + mlx5_core_dbg(dev, "sync=%x\n", + MLX5_GET(diagnostic_params_context, ctx, sync)); + mlx5_core_dbg(dev, "clear=%x\n", + MLX5_GET(diagnostic_params_context, ctx, clear)); + mlx5_core_dbg(dev, "on_demand=%x\n", + MLX5_GET(diagnostic_params_context, ctx, on_demand)); + mlx5_core_dbg(dev, "enable=%x\n", + MLX5_GET(diagnostic_params_context, ctx, enable)); + mlx5_core_dbg(dev, "log_sample_period=%x\n", + MLX5_GET(diagnostic_params_context, ctx, + log_sample_period)); + + for (i = 0; i != diag_cnt->num_cnt_id; i++) { + cnt_id = MLX5_ADDR_OF(diagnostic_params_context, + ctx, counter_id[i]); + mlx5_core_dbg(dev, "counter_id[%d]=%x\n", i, + MLX5_GET(counter_id, cnt_id, counter_id)); + } +out: + kfree(out); + return (err); +} + +int +mlx5_diag_query_counters(struct mlx5_core_dev *dev, u8 **out_buffer) +{ + u8 in[MLX5_ST_SZ_BYTES(query_diagnostic_counters_in)] = {0}; + struct mlx5_diag_cnt *diag_cnt = &dev->diag_cnt; + u16 out_sz; + u8 *out; + int err; + + out_sz = MLX5_ST_SZ_BYTES(query_diagnostic_counters_out) + + diag_cnt->num_of_samples * diag_cnt->num_cnt_id * + MLX5_ST_SZ_BYTES(diagnostic_cntr_struct); + + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return (-ENOMEM); + + MLX5_SET(query_diagnostic_counters_in, in, opcode, + MLX5_CMD_OP_QUERY_DIAGNOSTICS); + MLX5_SET(query_diagnostic_counters_in, in, num_of_samples, + diag_cnt->num_of_samples); + MLX5_SET(query_diagnostic_counters_in, in, sample_index, + diag_cnt->sample_index); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); + + if (!err) + *out_buffer = out; + else + kfree(out); + + return (err); +} + +int +mlx5_diag_cnt_init(struct mlx5_core_dev *dev) +{ + struct mlx5_diag_cnt *diag_cnt = &dev->diag_cnt; + struct sysctl_oid *diag_cnt_sysctl_node; + int err; + + if (!MLX5_DIAG_CNT_SUPPORTED(dev)) + return (0); + + mutex_init(&diag_cnt->lock); + + /* Build private data */ + err = get_supported_cnt_ids(dev); + if (err) + return (err); + + sysctl_ctx_init(&diag_cnt->sysctl_ctx); + + diag_cnt_sysctl_node = SYSCTL_ADD_NODE(&diag_cnt->sysctl_ctx, + SYSCTL_CHILDREN(device_get_sysctl_tree(dev->pdev->dev.bsddev)), + OID_AUTO, "diag_cnt", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, + "Diagnostics counters"); + + if (diag_cnt_sysctl_node == NULL) + return (-ENOMEM); + + SYSCTL_ADD_PROC(&diag_cnt->sysctl_ctx, SYSCTL_CHILDREN(diag_cnt_sysctl_node), + OID_AUTO, "counter_id", CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, + dev, 0, mlx5_sysctl_counter_id, "SU", "Selected counter IDs"); + + SYSCTL_ADD_PROC(&diag_cnt->sysctl_ctx, SYSCTL_CHILDREN(diag_cnt_sysctl_node), + OID_AUTO, "params", CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, + dev, 0, mlx5_sysctl_params, "IU", + "Counter parameters: log_num_of_samples, log_sample_perios, flag, num_of_samples, sample_index"); + + SYSCTL_ADD_PROC(&diag_cnt->sysctl_ctx, SYSCTL_CHILDREN(diag_cnt_sysctl_node), + OID_AUTO, "dump_set", CTLTYPE_U8 | CTLFLAG_RW | CTLFLAG_MPSAFE, + dev, 0, mlx5_sysctl_dump_set, "CU", + "Set dump parameters by writing 1 and enable dump_get. Write 0 to disable dump."); + + SYSCTL_ADD_PROC(&diag_cnt->sysctl_ctx, SYSCTL_CHILDREN(diag_cnt_sysctl_node), + OID_AUTO, "dump_get", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, + dev, 0, mlx5_sysctl_dump_get, "A", + "Get dump parameters."); + + SYSCTL_ADD_PROC(&diag_cnt->sysctl_ctx, SYSCTL_CHILDREN(diag_cnt_sysctl_node), + OID_AUTO, "cap", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, + dev, 0, mlx5_sysctl_cap_read, "A", + "Read capabilities."); + + return (0); +} + +void +mlx5_diag_cnt_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_diag_cnt *diag_cnt = &dev->diag_cnt; + void *ptr; + + if (!MLX5_DIAG_CNT_SUPPORTED(dev)) + return; + + sysctl_ctx_free(&diag_cnt->sysctl_ctx); + + ptr = diag_cnt->cnt_id; + diag_cnt->cnt_id = NULL; + + kfree(ptr); + + reset_params(diag_cnt); +} diff --git a/sys/dev/mlx5/mlx5_core/mlx5_main.c b/sys/dev/mlx5/mlx5_core/mlx5_main.c index 6fcd7cf281f2..431a277119cf 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_main.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_main.c @@ -1,5 +1,6 @@ /*- * Copyright (c) 2013-2021, Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2022 NVIDIA corporation & affiliates. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -50,6 +51,7 @@ #include <dev/mlx5/mlx5_core/mlx5_core.h> #include <dev/mlx5/mlx5_core/eswitch.h> #include <dev/mlx5/mlx5_core/fs_core.h> +#include <dev/mlx5/mlx5_core/diag_cnt.h> #ifdef PCI_IOV #include <sys/nv.h> #include <dev/pci/pci_iov.h> @@ -1190,10 +1192,16 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_mpfs; } + err = mlx5_diag_cnt_init(dev); + if (err) { + mlx5_core_err(dev, "diag cnt init failed %d\n", err); + goto err_fpga; + } + err = mlx5_register_device(dev); if (err) { mlx5_core_err(dev, "mlx5_register_device failed %d\n", err); - goto err_fpga; + goto err_diag_cnt; } set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); @@ -1202,6 +1210,9 @@ out: mutex_unlock(&dev->intf_state_mutex); return 0; +err_diag_cnt: + mlx5_diag_cnt_cleanup(dev); + err_fpga: mlx5_fpga_device_stop(dev); @@ -1272,6 +1283,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_unregister_device(dev); mlx5_eswitch_cleanup(dev->priv.eswitch); + mlx5_diag_cnt_cleanup(dev); mlx5_fpga_device_stop(dev); mlx5_mpfs_destroy(dev); mlx5_cleanup_fs(dev); diff --git a/sys/dev/mlx5/mlx5_ifc.h b/sys/dev/mlx5/mlx5_ifc.h index 627c367eace5..b931bf9b3a0b 100644 --- a/sys/dev/mlx5/mlx5_ifc.h +++ b/sys/dev/mlx5/mlx5_ifc.h @@ -186,6 +186,7 @@ enum { MLX5_CMD_OP_DEACTIVATE_TRACER = 0x815, MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN = 0x816, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN = 0x817, + MLX5_CMD_OP_QUERY_DIAGNOSTIC_PARAMS = 0x819, MLX5_CMD_OP_SET_DIAGNOSTICS = 0x820, MLX5_CMD_OP_QUERY_DIAGNOSTICS = 0x821, MLX5_CMD_OP_QUERY_CONG_STATUS = 0x822, @@ -5118,6 +5119,22 @@ struct mlx5_ifc_query_dc_cnak_trace_in_bits { u8 reserved_2[0x40]; }; +struct mlx5_ifc_diagnostic_cntr_struct_bits { + u8 counter_id[0x10]; + u8 sample_id[0x10]; + + u8 time_stamp_31_0[0x20]; + + u8 counter_value_h[0x20]; + + u8 counter_value_l[0x20]; +}; + +enum { + MLX5_DIAGNOSTIC_PARAMS_CONTEXT_ENABLE_ENABLE = 0x1, + MLX5_DIAGNOSTIC_PARAMS_CONTEXT_ENABLE_DISABLE = 0x0, +}; + struct mlx5_ifc_query_cq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; @@ -6856,6 +6873,25 @@ struct mlx5_ifc_diagnostic_params_context_bits { struct mlx5_ifc_counter_id_bits counter_id[0]; }; +struct mlx5_ifc_query_diagnostic_params_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_query_diagnostic_params_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + struct mlx5_ifc_diagnostic_params_context_bits diagnostic_params_ctx; +}; + struct mlx5_ifc_set_diagnostic_params_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; diff --git a/sys/modules/mlx5/Makefile b/sys/modules/mlx5/Makefile index 2742a5f1ba90..d418375bdf69 100644 --- a/sys/modules/mlx5/Makefile +++ b/sys/modules/mlx5/Makefile @@ -8,6 +8,7 @@ SRCS= \ mlx5_alloc.c \ mlx5_cmd.c \ mlx5_cq.c \ +mlx5_diag_cnt.c \ mlx5_diagnostics.c \ mlx5_eq.c \ mlx5_eswitch.c \