svn commit: r353206 - in stable/11/sys/dev/mlx5: . mlx5_core mlx5_en
Hans Petter Selasky
hselasky at FreeBSD.org
Mon Oct 7 09:02:01 UTC 2019
Author: hselasky
Date: Mon Oct 7 09:01:59 2019
New Revision: 353206
URL: https://svnweb.freebsd.org/changeset/base/353206
Log:
MFC r352966:
Add port module event software counters in mlx5core.
While at it, fixup PME based on latest PRM defines.
Submitted by: slavash@
Sponsored by: Mellanox Technologies
Modified:
stable/11/sys/dev/mlx5/device.h
stable/11/sys/dev/mlx5/driver.h
stable/11/sys/dev/mlx5/mlx5_core/mlx5_eq.c
stable/11/sys/dev/mlx5/mlx5_core/mlx5_main.c
stable/11/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
Directory Properties:
stable/11/ (props changed)
Modified: stable/11/sys/dev/mlx5/device.h
==============================================================================
--- stable/11/sys/dev/mlx5/device.h Mon Oct 7 09:01:21 2019 (r353205)
+++ stable/11/sys/dev/mlx5/device.h Mon Oct 7 09:01:59 2019 (r353206)
@@ -537,7 +537,7 @@ enum {
MLX5_MODULE_STATUS_PLUGGED_ENABLED = 0x1,
MLX5_MODULE_STATUS_UNPLUGGED = 0x2,
MLX5_MODULE_STATUS_ERROR = 0x3,
- MLX5_MODULE_STATUS_PLUGGED_DISABLED = 0x4,
+ MLX5_MODULE_STATUS_NUM ,
};
enum {
@@ -549,7 +549,7 @@ enum {
MLX5_MODULE_EVENT_ERROR_UNSUPPORTED_CABLE = 0x5,
MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6,
MLX5_MODULE_EVENT_ERROR_CABLE_IS_SHORTED = 0x7,
- MLX5_MODULE_EVENT_ERROR_PCIE_SYSTEM_POWER_SLOT_EXCEEDED = 0xc,
+ MLX5_MODULE_EVENT_ERROR_NUM ,
};
struct mlx5_eqe_port_module_event {
Modified: stable/11/sys/dev/mlx5/driver.h
==============================================================================
--- stable/11/sys/dev/mlx5/driver.h Mon Oct 7 09:01:21 2019 (r353205)
+++ stable/11/sys/dev/mlx5/driver.h Mon Oct 7 09:01:59 2019 (r353206)
@@ -546,6 +546,11 @@ struct mlx5_mr_table {
struct radix_tree_root tree;
};
+struct mlx5_pme_stats {
+ u64 status_counters[MLX5_MODULE_STATUS_NUM];
+ u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM];
+};
+
struct mlx5_priv {
char name[MLX5_MAX_NAME_LEN];
struct mlx5_eq_table eq_table;
@@ -598,6 +603,7 @@ struct mlx5_priv {
struct list_head ctx_list;
spinlock_t ctx_lock;
unsigned long pci_dev_data;
+ struct mlx5_pme_stats pme_stats;
};
enum mlx5_device_state {
Modified: stable/11/sys/dev/mlx5/mlx5_core/mlx5_eq.c
==============================================================================
--- stable/11/sys/dev/mlx5/mlx5_core/mlx5_eq.c Mon Oct 7 09:01:21 2019 (r353205)
+++ stable/11/sys/dev/mlx5/mlx5_core/mlx5_eq.c Mon Oct 7 09:01:59 2019 (r353206)
@@ -639,9 +639,9 @@ static const char *mlx5_port_module_event_error_type_t
{
switch (error_type) {
case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
- return "Power Budget Exceeded";
+ return "Power budget exceeded";
case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE:
- return "Long Range for non MLNX cable/module";
+ return "Long Range for non MLNX cable";
case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
return "Bus stuck(I2C or data shorted)";
case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
@@ -649,18 +649,11 @@ static const char *mlx5_port_module_event_error_type_t
case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
return "Enforce part number list";
case MLX5_MODULE_EVENT_ERROR_UNSUPPORTED_CABLE:
- return "Unsupported Cable";
+ return "Unknown identifier";
case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
return "High Temperature";
case MLX5_MODULE_EVENT_ERROR_CABLE_IS_SHORTED:
- return "Cable is shorted";
- case MLX5_MODULE_EVENT_ERROR_PCIE_SYSTEM_POWER_SLOT_EXCEEDED:
- return "One or more network ports have been powered "
- "down due to insufficient/unadvertised power on "
- "the PCIe slot. Please refer to the card's user "
- "manual for power specifications or contact "
- "Mellanox support.";
-
+ return "Bad or shorted cable/module";
default:
return "Unknown error type";
}
@@ -686,29 +679,36 @@ static void mlx5_port_module_event(struct mlx5_core_de
module_num = (unsigned int)module_event_eqe->module;
module_status = (unsigned int)module_event_eqe->module_status &
- PORT_MODULE_EVENT_MODULE_STATUS_MASK;
+ PORT_MODULE_EVENT_MODULE_STATUS_MASK;
error_type = (unsigned int)module_event_eqe->error_type &
- PORT_MODULE_EVENT_ERROR_TYPE_MASK;
+ PORT_MODULE_EVENT_ERROR_TYPE_MASK;
+ if (module_status < MLX5_MODULE_STATUS_NUM)
+ dev->priv.pme_stats.status_counters[module_status]++;
switch (module_status) {
case MLX5_MODULE_STATUS_PLUGGED_ENABLED:
- device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: plugged and enabled\n", module_num);
+ device_printf((&pdev->dev)->bsddev,
+ "INFO: Module %u, status: plugged and enabled\n",
+ module_num);
break;
case MLX5_MODULE_STATUS_UNPLUGGED:
- device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: unplugged\n", module_num);
+ device_printf((&pdev->dev)->bsddev,
+ "INFO: Module %u, status: unplugged\n", module_num);
break;
case MLX5_MODULE_STATUS_ERROR:
- device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: error, %s\n", module_num, mlx5_port_module_event_error_type_to_string(error_type));
+ device_printf((&pdev->dev)->bsddev,
+ "ERROR: Module %u, status: error, %s\n",
+ module_num,
+ mlx5_port_module_event_error_type_to_string(error_type));
+ if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
+ dev->priv.pme_stats.error_counters[error_type]++;
break;
- case MLX5_MODULE_STATUS_PLUGGED_DISABLED:
- device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: plugged but disabled\n", module_num);
- break;
-
default:
- device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, unknown status\n", module_num);
+ device_printf((&pdev->dev)->bsddev,
+ "INFO: Module %u, unknown status\n", module_num);
}
/* store module status */
if (module_num < MLX5_MAX_PORTS)
Modified: stable/11/sys/dev/mlx5/mlx5_core/mlx5_main.c
==============================================================================
--- stable/11/sys/dev/mlx5/mlx5_core/mlx5_main.c Mon Oct 7 09:01:21 2019 (r353205)
+++ stable/11/sys/dev/mlx5/mlx5_core/mlx5_main.c Mon Oct 7 09:01:59 2019 (r353206)
@@ -1226,13 +1226,31 @@ struct mlx5_core_event_handler {
void *data);
};
+#define MLX5_STATS_DESC(a, b, c, d, e, ...) d, e,
+
+#define MLX5_PORT_MODULE_ERROR_STATS(m) \
+m(+1, u64, power_budget_exceeded, "power_budget", "Module Power Budget Exceeded") \
+m(+1, u64, long_range, "long_range", "Module Long Range for non MLNX cable/module") \
+m(+1, u64, bus_stuck, "bus_stuck", "Module Bus stuck(I2C or data shorted)") \
+m(+1, u64, no_eeprom, "no_eeprom", "No EEPROM/retry timeout") \
+m(+1, u64, enforce_part_number, "enforce_part_number", "Module Enforce part number list") \
+m(+1, u64, unknown_id, "unknown_id", "Module Unknown identifier") \
+m(+1, u64, high_temp, "high_temp", "Module High Temperature") \
+m(+1, u64, cable_shorted, "cable_shorted", "Module Cable is shorted")
+
+static const char *mlx5_pme_err_desc[] = {
+ MLX5_PORT_MODULE_ERROR_STATS(MLX5_STATS_DESC)
+};
+
static int init_one(struct pci_dev *pdev,
const struct pci_device_id *id)
{
struct mlx5_core_dev *dev;
struct mlx5_priv *priv;
device_t bsddev = pdev->dev.bsddev;
- int err;
+ int i,err;
+ struct sysctl_oid *pme_sysctl_node;
+ struct sysctl_oid *pme_err_sysctl_node;
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
priv = &dev->priv;
@@ -1264,6 +1282,41 @@ static int init_one(struct pci_dev *pdev,
OID_AUTO, "power_value", CTLFLAG_RD, &dev->pwr_value, 0,
"Current power value in Watts");
+ pme_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
+ SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
+ OID_AUTO, "pme_stats", CTLFLAG_RD, NULL,
+ "Port module event statistics");
+ if (pme_sysctl_node == NULL) {
+ err = -ENOMEM;
+ goto clean_sysctl_ctx;
+ }
+ pme_err_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
+ SYSCTL_CHILDREN(pme_sysctl_node),
+ OID_AUTO, "errors", CTLFLAG_RD, NULL,
+ "Port module event error statistics");
+ if (pme_err_sysctl_node == NULL) {
+ err = -ENOMEM;
+ goto clean_sysctl_ctx;
+ }
+ SYSCTL_ADD_U64(&dev->sysctl_ctx,
+ SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO,
+ "module_plug", CTLFLAG_RD | CTLFLAG_MPSAFE,
+ &dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_PLUGGED_ENABLED],
+ 0, "Number of time module plugged");
+ SYSCTL_ADD_U64(&dev->sysctl_ctx,
+ SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO,
+ "module_unplug", CTLFLAG_RD | CTLFLAG_MPSAFE,
+ &dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_UNPLUGGED],
+ 0, "Number of time module unplugged");
+ for (i = 0 ; i < MLX5_MODULE_EVENT_ERROR_NUM; i++) {
+ SYSCTL_ADD_U64(&dev->sysctl_ctx,
+ SYSCTL_CHILDREN(pme_err_sysctl_node), OID_AUTO,
+ mlx5_pme_err_desc[2 * i], CTLFLAG_RD | CTLFLAG_MPSAFE,
+ &dev->priv.pme_stats.error_counters[i],
+ 0, mlx5_pme_err_desc[2 * i + 1]);
+ }
+
+
INIT_LIST_HEAD(&priv->ctx_list);
spin_lock_init(&priv->ctx_lock);
mutex_init(&dev->pci_status_mutex);
@@ -1302,8 +1355,9 @@ clean_health:
close_pci:
mlx5_pci_close(dev, priv);
clean_dev:
- sysctl_ctx_free(&dev->sysctl_ctx);
mtx_destroy(&dev->dump_lock);
+clean_sysctl_ctx:
+ sysctl_ctx_free(&dev->sysctl_ctx);
kfree(dev);
return err;
}
Modified: stable/11/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
==============================================================================
--- stable/11/sys/dev/mlx5/mlx5_en/mlx5_en_main.c Mon Oct 7 09:01:21 2019 (r353205)
+++ stable/11/sys/dev/mlx5/mlx5_en/mlx5_en_main.c Mon Oct 7 09:01:59 2019 (r353206)
@@ -3269,8 +3269,7 @@ out:
}
/* Check if module is present before doing an access */
module_status = mlx5_query_module_status(priv->mdev, module_num);
- if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED &&
- module_status != MLX5_MODULE_STATUS_PLUGGED_DISABLED) {
+ if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED) {
error = EINVAL;
goto err_i2c;
}
More information about the svn-src-stable-11
mailing list