summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@nvidia.com>2026-04-28 19:17:45 +0300
committerJason Gunthorpe <jgg@nvidia.com>2026-05-02 21:30:48 +0300
commitc9341307ea16b9395c2e4c9c94d8499d91fe31d0 (patch)
treec121d48c772dd168369977d8f587eb33ec2c6ff9
parentc54c7e4cb679c0aaa1cb489b9c3f2cd98e63a44c (diff)
downloadlinux-c9341307ea16b9395c2e4c9c94d8499d91fe31d0.tar.xz
RDMA/mlx4: Fix mis-use of RCU in mlx4_srq_event()
Sashiko points out the radix_tree itself is RCU safe, but nothing ever frees the mlx4_srq struct with RCU, and it isn't even accessed within the RCU critical section. It also will crash if an event is delivered before the srq object is finished initializing. Use the spinlock since it isn't easy to make RCU work, use refcount_inc_not_zero() to protect against partially initialized objects, and order the refcount_set() to be after the srq is fully initialized. Cc: stable@vger.kernel.org Fixes: 30353bfc43a1 ("net/mlx4_core: Use RCU to perform radix tree lookup for SRQ") Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=5 Link: https://patch.msgid.link/r/12-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/srq.c13
1 files changed, 7 insertions, 6 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c
index dd890f5d7b72..8711689120f3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/srq.c
@@ -44,13 +44,14 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type)
{
struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
struct mlx4_srq *srq;
+ unsigned long flags;
- rcu_read_lock();
+ spin_lock_irqsave(&srq_table->lock, flags);
srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1));
- rcu_read_unlock();
- if (srq)
- refcount_inc(&srq->refcount);
- else {
+ if (!srq || !refcount_inc_not_zero(&srq->refcount))
+ srq = NULL;
+ spin_unlock_irqrestore(&srq_table->lock, flags);
+ if (!srq) {
mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn);
return;
}
@@ -203,8 +204,8 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
if (err)
goto err_radix;
- refcount_set(&srq->refcount, 1);
init_completion(&srq->free);
+ refcount_set_release(&srq->refcount, 1);
return 0;