svn commit: r347880 - in stable/11/sys/dev/mlx5: . mlx5_core
Hans Petter Selasky
hselasky at FreeBSD.org
Thu May 16 18:28:14 UTC 2019
Author: hselasky
Date: Thu May 16 18:28:12 2019
New Revision: 347880
URL: https://svnweb.freebsd.org/changeset/base/347880
Log:
MFC r347323:
Fix race between driver unload and dumping firmware in mlx5core.
Present code uses lock-less accesses to the dump data to prevent top
level ioctls from blocking bottom-level call to dump. Unfortunately, this
depends on the type stability of the dump data structure, which makes it
non-functional during driver teardown.
Switch to the mutex locking scheme where top levels use the mutex in the
bound regions, while copyouts and drain for completion utilize condvars.
The mutex lifetime is guaranteed to be strictly larger than the time
interval where driver can initiate dump, and most of the control fields
of the old struct mlx5_dump_data are directly embedded into struct
mlx5_core_dev.
Submitted by: kib@
Sponsored by: Mellanox Technologies
Modified:
stable/11/sys/dev/mlx5/driver.h
stable/11/sys/dev/mlx5/mlx5_core/mlx5_fwdump.c
stable/11/sys/dev/mlx5/mlx5_core/mlx5_main.c
Directory Properties:
stable/11/ (props changed)
Modified: stable/11/sys/dev/mlx5/driver.h
==============================================================================
--- stable/11/sys/dev/mlx5/driver.h Thu May 16 18:27:27 2019 (r347879)
+++ stable/11/sys/dev/mlx5/driver.h Thu May 16 18:28:12 2019 (r347880)
@@ -638,7 +638,6 @@ struct mlx5_special_contexts {
};
struct mlx5_flow_root_namespace;
-struct mlx5_dump_data;
struct mlx5_core_dev {
struct pci_dev *pdev;
/* sync pci state */
@@ -678,7 +677,12 @@ struct mlx5_core_dev {
struct mlx5_flow_root_namespace *sniffer_rx_root_ns;
struct mlx5_flow_root_namespace *sniffer_tx_root_ns;
u32 num_q_counter_allocated[MLX5_INTERFACE_NUMBER];
- struct mlx5_dump_data *dump_data;
+ const struct mlx5_crspace_regmap *dump_rege;
+ uint32_t *dump_data;
+ unsigned dump_size;
+ bool dump_valid;
+ bool dump_copyout;
+ struct mtx dump_lock;
struct sysctl_ctx_list sysctl_ctx;
int msix_eqvec;
Modified: stable/11/sys/dev/mlx5/mlx5_core/mlx5_fwdump.c
==============================================================================
--- stable/11/sys/dev/mlx5/mlx5_core/mlx5_fwdump.c Thu May 16 18:27:27 2019 (r347879)
+++ stable/11/sys/dev/mlx5/mlx5_core/mlx5_fwdump.c Thu May 16 18:28:12 2019 (r347880)
@@ -39,14 +39,6 @@ extern const struct mlx5_crspace_regmap mlx5_crspace_r
extern const struct mlx5_crspace_regmap mlx5_crspace_regmap_mt4115[];
extern const struct mlx5_crspace_regmap mlx5_crspace_regmap_connectx5[];
-struct mlx5_dump_data {
- const struct mlx5_crspace_regmap *rege;
- uint32_t *dump;
- unsigned dump_size;
- int dump_valid;
- struct mtx dump_lock;
-};
-
static MALLOC_DEFINE(M_MLX5_DUMP, "MLX5DUMP", "MLX5 Firmware dump");
static unsigned
@@ -61,20 +53,20 @@ mlx5_fwdump_getsize(const struct mlx5_crspace_regmap *
}
static void
-mlx5_fwdump_destroy_dd(struct mlx5_dump_data *dd)
+mlx5_fwdump_destroy_dd(struct mlx5_core_dev *mdev)
{
- mtx_destroy(&dd->dump_lock);
- free(dd->dump, M_MLX5_DUMP);
- free(dd, M_MLX5_DUMP);
+ mtx_assert(&mdev->dump_lock, MA_OWNED);
+ free(mdev->dump_data, M_MLX5_DUMP);
+ mdev->dump_data = NULL;
}
void
mlx5_fwdump_prep(struct mlx5_core_dev *mdev)
{
- struct mlx5_dump_data *dd;
int error;
+ mdev->dump_data = NULL;
error = mlx5_vsc_find_cap(mdev);
if (error != 0) {
/* Inability to create a firmware dump is not fatal. */
@@ -82,47 +74,39 @@ mlx5_fwdump_prep(struct mlx5_core_dev *mdev)
"mlx5_fwdump_prep failed %d\n", error);
return;
}
- dd = malloc(sizeof(struct mlx5_dump_data), M_MLX5_DUMP, M_WAITOK);
switch (pci_get_device(mdev->pdev->dev.bsddev)) {
case 0x1013:
- dd->rege = mlx5_crspace_regmap_mt4115;
+ mdev->dump_rege = mlx5_crspace_regmap_mt4115;
break;
case 0x1015:
- dd->rege = mlx5_crspace_regmap_mt4117;
+ mdev->dump_rege = mlx5_crspace_regmap_mt4117;
break;
case 0x1017:
case 0x1019:
- dd->rege = mlx5_crspace_regmap_connectx5;
+ mdev->dump_rege = mlx5_crspace_regmap_connectx5;
break;
default:
- free(dd, M_MLX5_DUMP);
return; /* silently fail, do not prevent driver attach */
}
- dd->dump_size = mlx5_fwdump_getsize(dd->rege);
- dd->dump = malloc(dd->dump_size * sizeof(uint32_t), M_MLX5_DUMP,
- M_WAITOK | M_ZERO);
- dd->dump_valid = 0;
- mtx_init(&dd->dump_lock, "mlx5dmp", NULL, MTX_DEF | MTX_NEW);
- if (atomic_cmpset_rel_ptr((uintptr_t *)&mdev->dump_data, 0,
- (uintptr_t)dd) == 0)
- mlx5_fwdump_destroy_dd(dd);
+ mdev->dump_size = mlx5_fwdump_getsize(mdev->dump_rege);
+ mdev->dump_data = malloc(mdev->dump_size * sizeof(uint32_t),
+ M_MLX5_DUMP, M_WAITOK | M_ZERO);
+ mdev->dump_valid = false;
+ mdev->dump_copyout = false;
}
void
mlx5_fwdump(struct mlx5_core_dev *mdev)
{
- struct mlx5_dump_data *dd;
const struct mlx5_crspace_regmap *r;
uint32_t i, ri;
int error;
dev_info(&mdev->pdev->dev, "Issuing FW dump\n");
- dd = (struct mlx5_dump_data *)atomic_load_acq_ptr((uintptr_t *)
- &mdev->dump_data);
- if (dd == NULL)
- return;
- mtx_lock(&dd->dump_lock);
- if (dd->dump_valid) {
+ mtx_lock(&mdev->dump_lock);
+ if (mdev->dump_data == NULL)
+ goto failed;
+ if (mdev->dump_valid) {
/* only one dump */
dev_warn(&mdev->pdev->dev,
"Only one FW dump can be captured aborting FW dump\n");
@@ -136,37 +120,51 @@ mlx5_fwdump(struct mlx5_core_dev *mdev)
error = mlx5_vsc_set_space(mdev, MLX5_VSC_DOMAIN_PROTECTED_CRSPACE);
if (error != 0)
goto unlock_vsc;
- for (i = 0, r = dd->rege; r->cnt != 0; r++) {
+ for (i = 0, r = mdev->dump_rege; r->cnt != 0; r++) {
for (ri = 0; ri < r->cnt; ri++) {
error = mlx5_vsc_read(mdev, r->addr + ri * 4,
- &dd->dump[i]);
+ &mdev->dump_data[i]);
if (error != 0)
goto unlock_vsc;
i++;
}
}
- atomic_store_rel_int(&dd->dump_valid, 1);
+ mdev->dump_valid = true;
unlock_vsc:
mlx5_vsc_unlock(mdev);
failed:
- mtx_unlock(&dd->dump_lock);
+ mtx_unlock(&mdev->dump_lock);
}
void
mlx5_fwdump_clean(struct mlx5_core_dev *mdev)
{
- struct mlx5_dump_data *dd;
- for (;;) {
- dd = mdev->dump_data;
- if (dd == NULL)
- return;
- if (atomic_cmpset_ptr((uintptr_t *)&mdev->dump_data,
- (uintptr_t)dd, 0) == 1) {
- mlx5_fwdump_destroy_dd(dd);
- return;
+ mtx_lock(&mdev->dump_lock);
+ while (mdev->dump_copyout)
+ msleep(&mdev->dump_copyout, &mdev->dump_lock, 0, "mlx5fwc", 0);
+ mlx5_fwdump_destroy_dd(mdev);
+ mtx_unlock(&mdev->dump_lock);
+}
+
+static int
+mlx5_fwdump_reset(struct mlx5_core_dev *mdev)
+{
+ int error;
+
+ error = 0;
+ mtx_lock(&mdev->dump_lock);
+ if (mdev->dump_data != NULL) {
+ while (mdev->dump_copyout) {
+ msleep(&mdev->dump_copyout, &mdev->dump_lock,
+ 0, "mlx5fwr", 0);
}
+ mdev->dump_valid = false;
+ } else {
+ error = ENOENT;
}
+ mtx_unlock(&mdev->dump_lock);
+ return (error);
}
static int
@@ -190,29 +188,37 @@ mlx5_dbsf_to_core(const struct mlx5_tool_addr *devaddr
}
static int
-mlx5_fwdump_copyout(struct mlx5_dump_data *dd, struct mlx5_fwdump_get *fwg)
+mlx5_fwdump_copyout(struct mlx5_core_dev *mdev, struct mlx5_fwdump_get *fwg)
{
const struct mlx5_crspace_regmap *r;
struct mlx5_fwdump_reg rv, *urv;
uint32_t i, ri;
int error;
- if (dd == NULL)
+ mtx_lock(&mdev->dump_lock);
+ if (mdev->dump_data == NULL) {
+ mtx_unlock(&mdev->dump_lock);
return (ENOENT);
+ }
if (fwg->buf == NULL) {
- fwg->reg_filled = dd->dump_size;
+ fwg->reg_filled = mdev->dump_size;
+ mtx_unlock(&mdev->dump_lock);
return (0);
}
- if (atomic_load_acq_int(&dd->dump_valid) == 0)
+ if (!mdev->dump_valid) {
+ mtx_unlock(&mdev->dump_lock);
return (ENOENT);
+ }
+ mdev->dump_copyout = true;
+ mtx_unlock(&mdev->dump_lock);
urv = fwg->buf;
- for (i = 0, r = dd->rege; r->cnt != 0; r++) {
+ for (i = 0, r = mdev->dump_rege; r->cnt != 0; r++) {
for (ri = 0; ri < r->cnt; ri++) {
if (i >= fwg->reg_cnt)
goto out;
rv.addr = r->addr + ri * 4;
- rv.val = dd->dump[i];
+ rv.val = mdev->dump_data[i];
error = copyout(&rv, urv, sizeof(rv));
if (error != 0)
return (error);
@@ -222,6 +228,10 @@ mlx5_fwdump_copyout(struct mlx5_dump_data *dd, struct
}
out:
fwg->reg_filled = i;
+ mtx_lock(&mdev->dump_lock);
+ mdev->dump_copyout = false;
+ wakeup(&mdev->dump_copyout);
+ mtx_unlock(&mdev->dump_lock);
return (0);
}
@@ -250,7 +260,6 @@ mlx5_ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t d
struct mlx5_core_dev *mdev;
struct mlx5_fwdump_get *fwg;
struct mlx5_tool_addr *devaddr;
- struct mlx5_dump_data *dd;
struct mlx5_fw_update *fu;
struct firmware fake_fw;
int error;
@@ -267,7 +276,7 @@ mlx5_ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t d
error = mlx5_dbsf_to_core(devaddr, &mdev);
if (error != 0)
break;
- error = mlx5_fwdump_copyout(mdev->dump_data, fwg);
+ error = mlx5_fwdump_copyout(mdev, fwg);
break;
case MLX5_FWDUMP_RESET:
if ((fflag & FWRITE) == 0) {
@@ -276,13 +285,8 @@ mlx5_ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t d
}
devaddr = (struct mlx5_tool_addr *)data;
error = mlx5_dbsf_to_core(devaddr, &mdev);
- if (error != 0)
- break;
- dd = mdev->dump_data;
- if (dd != NULL)
- atomic_store_rel_int(&dd->dump_valid, 0);
- else
- error = ENOENT;
+ if (error == 0)
+ error = mlx5_fwdump_reset(mdev);
break;
case MLX5_FWDUMP_FORCE:
if ((fflag & FWRITE) == 0) {
Modified: stable/11/sys/dev/mlx5/mlx5_core/mlx5_main.c
==============================================================================
--- stable/11/sys/dev/mlx5/mlx5_core/mlx5_main.c Thu May 16 18:27:27 2019 (r347879)
+++ stable/11/sys/dev/mlx5/mlx5_core/mlx5_main.c Thu May 16 18:28:12 2019 (r347880)
@@ -1283,6 +1283,7 @@ static int init_one(struct pci_dev *pdev,
spin_lock_init(&priv->ctx_lock);
mutex_init(&dev->pci_status_mutex);
mutex_init(&dev->intf_state_mutex);
+ mtx_init(&dev->dump_lock, "mlx5dmp", NULL, MTX_DEF | MTX_NEW);
err = mlx5_pci_init(dev, priv);
if (err) {
device_printf(bsddev, "ERR: mlx5_pci_init failed %d\n", err);
@@ -1317,6 +1318,7 @@ close_pci:
mlx5_pci_close(dev, priv);
clean_dev:
sysctl_ctx_free(&dev->sysctl_ctx);
+ mtx_destroy(&dev->dump_lock);
kfree(dev);
return err;
}
@@ -1332,10 +1334,11 @@ static void remove_one(struct pci_dev *pdev)
return;
}
- mlx5_fwdump_clean(dev);
mlx5_pagealloc_cleanup(dev);
mlx5_health_cleanup(dev);
+ mlx5_fwdump_clean(dev);
mlx5_pci_close(dev, priv);
+ mtx_destroy(&dev->dump_lock);
pci_set_drvdata(pdev, NULL);
sysctl_ctx_free(&dev->sysctl_ctx);
kfree(dev);
More information about the svn-src-all
mailing list