summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@nvidia.com>2026-04-28 19:17:46 +0300
committerJason Gunthorpe <jgg@nvidia.com>2026-05-02 21:30:48 +0300
commit48973c6c938737bb900d15dc82b91dfe3586cb0f (patch)
tree304eb8804e8f6b73ce9d4118c32ff4572aa299f5
parentc9341307ea16b9395c2e4c9c94d8499d91fe31d0 (diff)
downloadlinux-48973c6c938737bb900d15dc82b91dfe3586cb0f.tar.xz
RDMA/hns: Fix xarray race in hns_roce_create_srq()
Sashiko points out that once the srq memory is stored into the xarray by alloc_srqc() it can immediately be looked up by: xa_lock(&srq_table->xa); srq = xa_load(&srq_table->xa, srqn & (hr_dev->caps.num_srqs - 1)); if (srq) refcount_inc(&srq->refcount); xa_unlock(&srq_table->xa); Which will fail refcount debug because the refcount is 0 and then crash: srq->event(srq, event_type); Because event is NULL. Use refcount_inc_not_zero() instead to ensure a partially prepared srq is never retrieved from the event handler and fix the ordering of the initialization so refcount becomes 1 only after it is fully ready. All the initialization must be done before calling free_srqc() since it depends on the completion and refcount. Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=3 Link: https://patch.msgid.link/r/13-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Reviewed-by: Junxian Huang <huangjunxian6@hisilicon.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c12
1 files changed, 6 insertions, 6 deletions
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index cb848e8e6bbd..8b94cbdfa54d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -16,8 +16,8 @@ void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type)
xa_lock(&srq_table->xa);
srq = xa_load(&srq_table->xa, srqn & (hr_dev->caps.num_srqs - 1));
- if (srq)
- refcount_inc(&srq->refcount);
+ if (srq && !refcount_inc_not_zero(&srq->refcount))
+ srq = NULL;
xa_unlock(&srq_table->xa);
if (!srq) {
@@ -470,6 +470,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
if (ret)
goto err_srqn;
+ srq->event = hns_roce_ib_srq_event;
+ init_completion(&srq->free);
+ refcount_set_release(&srq->refcount, 1);
+
if (udata) {
resp.cap_flags = srq->cap_flags;
resp.srqn = srq->srqn;
@@ -480,10 +484,6 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
}
}
- srq->event = hns_roce_ib_srq_event;
- refcount_set(&srq->refcount, 1);
- init_completion(&srq->free);
-
return 0;
err_srqc: