summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChengchang Tang <tangchengchang@huawei.com>2026-01-08 14:30:32 +0300
committerLeon Romanovsky <leon@kernel.org>2026-01-15 12:59:53 +0300
commit354e7a6d448b5744362bf33a24315d4d1d0bb7ef (patch)
tree02b53e8e58b1807e7a7dd3aad927c35152be4f18
parent5c3f795d17dc57a58a1fc1c1b449812e26ad85a3 (diff)
downloadlinux-354e7a6d448b5744362bf33a24315d4d1d0bb7ef.tar.xz
RDMA/hns: Support drain SQ and RQ
Some ULPs, e.g. rpcrdma, rely on drain_qp() to ensure all outstanding requests are completed before releasing related memory. If drain_qp() fails, ULPs may release memory directly, and in-flight WRs may later be flushed after the memory is freed, potentially leading to UAF. drain_qp() failures can happen when HW enters an error state or is reset. Add support to drain SQ and RQ in such cases by posting a fake WR during reset, so the driver can process all remaining WRs in sequence and generate corresponding completions. Always invoke comp_handler() in drain process to ensure completions are not lost under concurrency (e.g. concurrent post_send() and reset, or QPs created during reset). If the CQ is already processed, cancel any already scheduled comp_handler() to avoid concurrency issues. Signed-off-by: Chengchang Tang <tangchengchang@huawei.com> Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com> Link: https://patch.msgid.link/20260108113032.856306-1-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky <leon@kernel.org>
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c166
1 files changed, 166 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index a2ae4f33e459..5d0a8662249d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -876,6 +876,170 @@ out:
return ret;
}
+static int hns_roce_push_drain_wr(struct hns_roce_wq *wq, struct ib_cq *cq,
+ u64 wr_id)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&wq->lock, flags);
+ if (hns_roce_wq_overflow(wq, 1, cq)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ wq->wrid[wq->head & (wq->wqe_cnt - 1)] = wr_id;
+ wq->head++;
+
+out:
+ spin_unlock_irqrestore(&wq->lock, flags);
+ return ret;
+}
+
+struct hns_roce_drain_cqe {
+ struct ib_cqe cqe;
+ struct completion done;
+};
+
+static void hns_roce_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct hns_roce_drain_cqe *cqe = container_of(wc->wr_cqe,
+ struct hns_roce_drain_cqe,
+ cqe);
+ complete(&cqe->done);
+}
+
+static void handle_drain_completion(struct ib_cq *ibcq,
+ struct hns_roce_drain_cqe *drain,
+ struct hns_roce_dev *hr_dev)
+{
+#define TIMEOUT (HZ / 10)
+ struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
+ unsigned long flags;
+ bool triggered;
+
+ if (ibcq->poll_ctx == IB_POLL_DIRECT) {
+ while (wait_for_completion_timeout(&drain->done, TIMEOUT) <= 0)
+ ib_process_cq_direct(ibcq, -1);
+ return;
+ }
+
+ if (hr_dev->state < HNS_ROCE_DEVICE_STATE_RST_DOWN)
+ goto waiting_done;
+
+ spin_lock_irqsave(&hr_cq->lock, flags);
+ triggered = hr_cq->is_armed;
+ hr_cq->is_armed = 1;
+ spin_unlock_irqrestore(&hr_cq->lock, flags);
+
+ /* Triggered means this cq is processing or has been processed
+ * by hns_roce_handle_device_err() or this function. We need to
+ * cancel the already invoked comp_handler() to avoid concurrency.
+ * If it has not been triggered, we can directly invoke
+ * comp_handler().
+ */
+ if (triggered) {
+ switch (ibcq->poll_ctx) {
+ case IB_POLL_SOFTIRQ:
+ irq_poll_disable(&ibcq->iop);
+ irq_poll_enable(&ibcq->iop);
+ break;
+ case IB_POLL_WORKQUEUE:
+ case IB_POLL_UNBOUND_WORKQUEUE:
+ cancel_work_sync(&ibcq->work);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+ }
+
+ if (ibcq->comp_handler)
+ ibcq->comp_handler(ibcq, ibcq->cq_context);
+
+waiting_done:
+ if (ibcq->comp_handler)
+ wait_for_completion(&drain->done);
+}
+
+static void hns_roce_v2_drain_rq(struct ib_qp *ibqp)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct hns_roce_drain_cqe rdrain = {};
+ const struct ib_recv_wr *bad_rwr;
+ struct ib_cq *cq = ibqp->recv_cq;
+ struct ib_recv_wr rwr = {};
+ int ret;
+
+ ret = ib_modify_qp(ibqp, &attr, IB_QP_STATE);
+ if (ret && hr_dev->state < HNS_ROCE_DEVICE_STATE_RST_DOWN) {
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to modify qp during drain rq, ret = %d.\n",
+ ret);
+ return;
+ }
+
+ rwr.wr_cqe = &rdrain.cqe;
+ rdrain.cqe.done = hns_roce_drain_qp_done;
+ init_completion(&rdrain.done);
+
+ if (hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)
+ ret = hns_roce_push_drain_wr(&hr_qp->rq, cq, rwr.wr_id);
+ else
+ ret = hns_roce_v2_post_recv(ibqp, &rwr, &bad_rwr);
+ if (ret) {
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to post recv for drain rq, ret = %d.\n",
+ ret);
+ return;
+ }
+
+ handle_drain_completion(cq, &rdrain, hr_dev);
+}
+
+static void hns_roce_v2_drain_sq(struct ib_qp *ibqp)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct hns_roce_drain_cqe sdrain = {};
+ const struct ib_send_wr *bad_swr;
+ struct ib_cq *cq = ibqp->send_cq;
+ struct ib_rdma_wr swr = {
+ .wr = {
+ .next = NULL,
+ { .wr_cqe = &sdrain.cqe, },
+ .opcode = IB_WR_RDMA_WRITE,
+ },
+ };
+ int ret;
+
+ ret = ib_modify_qp(ibqp, &attr, IB_QP_STATE);
+ if (ret && hr_dev->state < HNS_ROCE_DEVICE_STATE_RST_DOWN) {
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to modify qp during drain sq, ret = %d.\n",
+ ret);
+ return;
+ }
+
+ sdrain.cqe.done = hns_roce_drain_qp_done;
+ init_completion(&sdrain.done);
+
+ if (hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)
+ ret = hns_roce_push_drain_wr(&hr_qp->sq, cq, swr.wr.wr_id);
+ else
+ ret = hns_roce_v2_post_send(ibqp, &swr.wr, &bad_swr);
+ if (ret) {
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to post send for drain sq, ret = %d.\n",
+ ret);
+ return;
+ }
+
+ handle_drain_completion(cq, &sdrain, hr_dev);
+}
+
static void *get_srq_wqe_buf(struct hns_roce_srq *srq, u32 n)
{
return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift);
@@ -7040,6 +7204,8 @@ static const struct ib_device_ops hns_roce_v2_dev_ops = {
.post_send = hns_roce_v2_post_send,
.query_qp = hns_roce_v2_query_qp,
.req_notify_cq = hns_roce_v2_req_notify_cq,
+ .drain_rq = hns_roce_v2_drain_rq,
+ .drain_sq = hns_roce_v2_drain_sq,
};
static const struct ib_device_ops hns_roce_v2_dev_srq_ops = {