From 3564222cfdde83a2d760b80192155a3ada1c9bdd Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 4 May 2026 21:02:05 +0300 Subject: net/mlx5e: SD, Fix missing cleanup on probe error When _mlx5e_probe() fails, the preceding successful mlx5_sd_init() is not undone. Auxiliary bus probe failure skips binding, so mlx5e_remove() is never called for that adev and the matching mlx5_sd_cleanup() never runs - leaking the per-dev SD struct. Call mlx5_sd_cleanup() on the probe error path to balance mlx5_sd_init(). A similar gap exists on the resume path: mlx5_sd_init() and mlx5_sd_cleanup() are currently bundled with both probe/remove and suspend/resume, even though only the FW alias state actually needs to follow the suspend/resume lifecycle - the sd struct allocation and devcom membership are software state that should track the full bound lifetime. As a result, a failed resume can leave a still-bound device with sd == NULL, which mlx5_sd_get_adev() can't distinguish from a non-SD device. Fixing this requires sd_suspend/resume APIs which will only destroy FW resources and is left for a follow-up series. Fixes: 381978d28317 ("net/mlx5e: Create single netdev per SD group") Signed-off-by: Shay Drory Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260504180206.268568-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8e9443caa933..62b70334a13d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -6775,8 +6775,8 @@ static int mlx5e_resume(struct auxiliary_device *adev) actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); if (actual_adev) - return _mlx5e_resume(actual_adev); - return 0; + err = _mlx5e_resume(actual_adev); + return err; } static int _mlx5e_suspend(struct auxiliary_device *adev, bool pre_netdev_reg) @@ -6912,9 +6912,16 @@ static int mlx5e_probe(struct auxiliary_device *adev, return err; actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); - if (actual_adev) - return _mlx5e_probe(actual_adev); + if (actual_adev) { + err = _mlx5e_probe(actual_adev); + if (err) + goto sd_cleanup; + } return 0; + +sd_cleanup: + mlx5_sd_cleanup(mdev); + return err; } static void _mlx5e_remove(struct auxiliary_device *adev) -- cgit v1.2.3