summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2026-05-06 05:13:12 +0300
committerJakub Kicinski <kuba@kernel.org>2026-05-06 05:13:12 +0300
commit22675f07260ca26423851a42b553b0ea669228d1 (patch)
tree6351ced2b9eef23d33d4375867da6c529e0dfe31
parentaf0e9b26b9667d765d71a7f53b7ed242eb1ba671 (diff)
parentd466ddda5500b6b8ae060909d2317811f2c32a6a (diff)
downloadlinux-22675f07260ca26423851a42b553b0ea669228d1.tar.xz
Merge branch 'net-mlx5-fixes-for-socket-direct'
Tariq Toukan says: ==================== net/mlx5: Fixes for Socket-Direct This series fixes several race conditions and bugs in the mlx5 Socket-Direct (SD) single netdev flow. Patch 1 serializes mlx5_sd_init()/mlx5_sd_cleanup() with mlx5_devcom_comp_lock() and tracks the SD group state on the primary device, preventing concurrent or duplicate bring-up/tear-down. Patch 2 fixes the debugfs "multi-pf" directory being stored on the calling device's sd struct instead of the primary's, which caused memory leaks and recreation errors when cleanup ran from a different PF. Patch 3 fixes a race where a secondary PF could access the primary's auxiliary device after it had been unbound, by holding the primary's device lock while operating on its auxiliary device. Patch 4 fixes missing cleanup on ETH probe errors. The analogous gap on the resume path requires introducing sd_suspend/resume APIs that only destroy FW resources and is left for a follow-up series. ==================== Link: https://patch.msgid.link/20260504180206.268568-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c114
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h2
3 files changed, 122 insertions, 20 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 8e9443caa933..8f2b3abe0092 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -6774,9 +6774,11 @@ static int mlx5e_resume(struct auxiliary_device *adev)
return err;
actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx);
- if (actual_adev)
- return _mlx5e_resume(actual_adev);
- return 0;
+ if (actual_adev) {
+ err = _mlx5e_resume(actual_adev);
+ mlx5_sd_put_adev(actual_adev, adev);
+ }
+ return err;
}
static int _mlx5e_suspend(struct auxiliary_device *adev, bool pre_netdev_reg)
@@ -6815,6 +6817,8 @@ static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)
err = _mlx5e_suspend(actual_adev, false);
mlx5_sd_cleanup(mdev);
+ if (actual_adev)
+ mlx5_sd_put_adev(actual_adev, adev);
return err;
}
@@ -6912,9 +6916,19 @@ static int mlx5e_probe(struct auxiliary_device *adev,
return err;
actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx);
- if (actual_adev)
- return _mlx5e_probe(actual_adev);
+ if (actual_adev) {
+ err = _mlx5e_probe(actual_adev);
+ if (err)
+ goto sd_cleanup;
+ mlx5_sd_put_adev(actual_adev, adev);
+ }
return 0;
+
+sd_cleanup:
+ mlx5_sd_cleanup(mdev);
+ if (actual_adev)
+ mlx5_sd_put_adev(actual_adev, adev);
+ return err;
}
static void _mlx5e_remove(struct auxiliary_device *adev)
@@ -6966,6 +6980,8 @@ static void mlx5e_remove(struct auxiliary_device *adev)
_mlx5e_remove(actual_adev);
mlx5_sd_cleanup(mdev);
+ if (actual_adev)
+ mlx5_sd_put_adev(actual_adev, adev);
}
static const struct auxiliary_device_id mlx5e_id_table[] = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
index 762c783156b4..6e199161b008 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -18,6 +18,7 @@ struct mlx5_sd {
u8 host_buses;
struct mlx5_devcom_comp_dev *devcom;
struct dentry *dfs;
+ u8 state;
bool primary;
union {
struct { /* primary */
@@ -31,6 +32,11 @@ struct mlx5_sd {
};
};
+enum mlx5_sd_state {
+ MLX5_SD_STATE_DOWN = 0,
+ MLX5_SD_STATE_UP,
+};
+
static int mlx5_sd_get_host_buses(struct mlx5_core_dev *dev)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);
@@ -270,9 +276,6 @@ static void sd_unregister(struct mlx5_core_dev *dev)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);
- mlx5_devcom_comp_lock(sd->devcom);
- mlx5_devcom_comp_set_ready(sd->devcom, false);
- mlx5_devcom_comp_unlock(sd->devcom);
mlx5_devcom_unregister_component(sd->devcom);
}
@@ -426,6 +429,7 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
struct mlx5_core_dev *primary, *pos, *to;
struct mlx5_sd *sd = mlx5_get_sd(dev);
u8 alias_key[ACCESS_KEY_LEN];
+ struct mlx5_sd *primary_sd;
int err, i;
err = sd_init(dev);
@@ -440,10 +444,17 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
if (err)
goto err_sd_cleanup;
+ mlx5_devcom_comp_lock(sd->devcom);
if (!mlx5_devcom_comp_is_ready(sd->devcom))
- return 0;
+ goto out;
primary = mlx5_sd_get_primary(dev);
+ if (!primary)
+ goto out;
+
+ primary_sd = mlx5_get_sd(primary);
+ if (primary_sd->state != MLX5_SD_STATE_DOWN)
+ goto out;
for (i = 0; i < ACCESS_KEY_LEN; i++)
alias_key[i] = get_random_u8();
@@ -452,9 +463,13 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
if (err)
goto err_sd_unregister;
- sd->dfs = debugfs_create_dir("multi-pf", mlx5_debugfs_get_dev_root(primary));
- debugfs_create_x32("group_id", 0400, sd->dfs, &sd->group_id);
- debugfs_create_file("primary", 0400, sd->dfs, primary, &dev_fops);
+ primary_sd->dfs =
+ debugfs_create_dir("multi-pf",
+ mlx5_debugfs_get_dev_root(primary));
+ debugfs_create_x32("group_id", 0400, primary_sd->dfs,
+ &primary_sd->group_id);
+ debugfs_create_file("primary", 0400, primary_sd->dfs, primary,
+ &dev_fops);
mlx5_sd_for_each_secondary(i, primary, pos) {
char name[32];
@@ -464,7 +479,8 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
goto err_unset_secondaries;
snprintf(name, sizeof(name), "secondary_%d", i - 1);
- debugfs_create_file(name, 0400, sd->dfs, pos, &dev_fops);
+ debugfs_create_file(name, 0400, primary_sd->dfs, pos,
+ &dev_fops);
}
@@ -472,6 +488,9 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
sd->group_id, mlx5_devcom_comp_get_size(sd->devcom));
sd_print_group(primary);
+ primary_sd->state = MLX5_SD_STATE_UP;
+out:
+ mlx5_devcom_comp_unlock(sd->devcom);
return 0;
err_unset_secondaries:
@@ -479,8 +498,18 @@ err_unset_secondaries:
mlx5_sd_for_each_secondary_to(i, primary, to, pos)
sd_cmd_unset_secondary(pos);
sd_cmd_unset_primary(primary);
- debugfs_remove_recursive(sd->dfs);
+ debugfs_remove_recursive(primary_sd->dfs);
+ primary_sd->dfs = NULL;
err_sd_unregister:
+ mlx5_sd_for_each_secondary(i, primary, pos) {
+ struct mlx5_sd *peer_sd = mlx5_get_sd(pos);
+
+ primary_sd->secondaries[i - 1] = NULL;
+ peer_sd->primary_dev = NULL;
+ }
+ primary_sd->primary = false;
+ mlx5_devcom_comp_set_ready(sd->devcom, false);
+ mlx5_devcom_comp_unlock(sd->devcom);
sd_unregister(dev);
err_sd_cleanup:
sd_cleanup(dev);
@@ -491,42 +520,97 @@ void mlx5_sd_cleanup(struct mlx5_core_dev *dev)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);
struct mlx5_core_dev *primary, *pos;
+ struct mlx5_sd *primary_sd;
int i;
if (!sd)
return;
+ mlx5_devcom_comp_lock(sd->devcom);
if (!mlx5_devcom_comp_is_ready(sd->devcom))
- goto out;
+ goto out_unlock;
primary = mlx5_sd_get_primary(dev);
+ if (!primary)
+ goto out_ready_false;
+
+ primary_sd = mlx5_get_sd(primary);
+ if (primary_sd->state != MLX5_SD_STATE_UP)
+ goto out_clear_peers;
+
mlx5_sd_for_each_secondary(i, primary, pos)
sd_cmd_unset_secondary(pos);
sd_cmd_unset_primary(primary);
- debugfs_remove_recursive(sd->dfs);
+ debugfs_remove_recursive(primary_sd->dfs);
+ primary_sd->dfs = NULL;
sd_info(primary, "group id %#x, uncombined\n", sd->group_id);
-out:
+ primary_sd->state = MLX5_SD_STATE_DOWN;
+out_clear_peers:
+ mlx5_sd_for_each_secondary(i, primary, pos) {
+ struct mlx5_sd *peer_sd = mlx5_get_sd(pos);
+
+ primary_sd->secondaries[i - 1] = NULL;
+ peer_sd->primary_dev = NULL;
+ }
+ primary_sd->primary = false;
+out_ready_false:
+ mlx5_devcom_comp_set_ready(sd->devcom, false);
+out_unlock:
+ mlx5_devcom_comp_unlock(sd->devcom);
sd_unregister(dev);
sd_cleanup(dev);
}
+/* Lock order:
+ * primary: actual_adev_lock -> SD devcom comp lock
+ * secondary: SD devcom comp lock -> (drop) -> actual_adev_lock
+ * The two locks are never held together, so no ABBA.
+ */
struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev,
struct auxiliary_device *adev,
int idx)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);
struct mlx5_core_dev *primary;
+ struct mlx5_adev *primary_adev;
if (!sd)
return adev;
- if (!mlx5_devcom_comp_is_ready(sd->devcom))
+ mlx5_devcom_comp_lock(sd->devcom);
+ if (!mlx5_devcom_comp_is_ready(sd->devcom)) {
+ mlx5_devcom_comp_unlock(sd->devcom);
return NULL;
+ }
primary = mlx5_sd_get_primary(dev);
- if (dev == primary)
+ if (!primary || dev == primary) {
+ mlx5_devcom_comp_unlock(sd->devcom);
return adev;
+ }
+
+ primary_adev = primary->priv.adev[idx];
+ get_device(&primary_adev->adev.dev);
+ mlx5_devcom_comp_unlock(sd->devcom);
+
+ device_lock(&primary_adev->adev.dev);
+ /* Primary may have completed remove between dropping devcom and
+ * acquiring device_lock; recheck.
+ */
+ if (!mlx5_devcom_comp_is_ready(sd->devcom)) {
+ device_unlock(&primary_adev->adev.dev);
+ put_device(&primary_adev->adev.dev);
+ return NULL;
+ }
+ return &primary_adev->adev;
+}
- return &primary->priv.adev[idx]->adev;
+void mlx5_sd_put_adev(struct auxiliary_device *actual_adev,
+ struct auxiliary_device *adev)
+{
+ if (actual_adev != adev) {
+ device_unlock(&actual_adev->dev);
+ put_device(&actual_adev->dev);
+ }
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h
index 137efaf9aabc..9bfd5b9756b5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h
@@ -15,6 +15,8 @@ struct mlx5_core_dev *mlx5_sd_ch_ix_get_dev(struct mlx5_core_dev *primary, int c
struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev,
struct auxiliary_device *adev,
int idx);
+void mlx5_sd_put_adev(struct auxiliary_device *actual_adev,
+ struct auxiliary_device *adev);
int mlx5_sd_init(struct mlx5_core_dev *dev);
void mlx5_sd_cleanup(struct mlx5_core_dev *dev);