summaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2026-01-28 03:53:59 +0300
committerSasha Levin <sashal@kernel.org>2026-03-04 15:19:51 +0300
commit97a907bc8d62cc76cb032e02cd76402df2ae9d52 (patch)
tree008d646f930c1b642ff9a360cbcec88d4d25f28d /drivers/infiniband
parent6ba731f9d4ee7508f2ec6db7af8b1e5531c2d045 (diff)
downloadlinux-97a907bc8d62cc76cb032e02cd76402df2ae9d52.tar.xz
RDMA/core: add rdma_rw_max_sge() helper for SQ sizing
[ Upstream commit afcae7d7b8a278a6c29e064f99e5bafd4ac1fb37 ] svc_rdma_accept() computes sc_sq_depth as the sum of rq_depth and the number of rdma_rw contexts (ctxts). This value is used to allocate the Send CQ and to initialize the sc_sq_avail credit pool. However, when the device uses memory registration for RDMA operations, rdma_rw_init_qp() inflates the QP's max_send_wr by a factor of three per context to account for REG and INV work requests. The Send CQ and credit pool remain sized for only one work request per context, causing Send Queue exhaustion under heavy NFS WRITE workloads. Introduce rdma_rw_max_sge() to compute the actual number of Send Queue entries required for a given number of rdma_rw contexts. Upper layer protocols call this helper before creating a Queue Pair so that their Send CQs and credit accounting match the QP's true capacity. Update svc_rdma_accept() to use rdma_rw_max_sge() when computing sc_sq_depth, ensuring the credit pool reflects the work requests that rdma_rw_init_qp() will reserve. Reviewed-by: Christoph Hellwig <hch@lst.de> Fixes: 00bd1439f464 ("RDMA/rw: Support threshold for registration vs scattering to local pages") Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Link: https://patch.msgid.link/20260128005400.25147-5-cel@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/rw.c53
1 files changed, 38 insertions, 15 deletions
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index 6354ddf2a274..2522ff1cc462 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -651,34 +651,57 @@ unsigned int rdma_rw_mr_factor(struct ib_device *device, u32 port_num,
}
EXPORT_SYMBOL(rdma_rw_mr_factor);
+/**
+ * rdma_rw_max_send_wr - compute max Send WRs needed for RDMA R/W contexts
+ * @dev: RDMA device
+ * @port_num: port number
+ * @max_rdma_ctxs: number of rdma_rw_ctx structures
+ * @create_flags: QP create flags (pass IB_QP_CREATE_INTEGRITY_EN if
+ * data integrity will be enabled on the QP)
+ *
+ * Returns the total number of Send Queue entries needed for
+ * @max_rdma_ctxs. The result accounts for memory registration and
+ * invalidation work requests when the device requires them.
+ *
+ * ULPs use this to size Send Queues and Send CQs before creating a
+ * Queue Pair.
+ */
+unsigned int rdma_rw_max_send_wr(struct ib_device *dev, u32 port_num,
+ unsigned int max_rdma_ctxs, u32 create_flags)
+{
+ unsigned int factor = 1;
+ unsigned int result;
+
+ if (create_flags & IB_QP_CREATE_INTEGRITY_EN ||
+ rdma_rw_can_use_mr(dev, port_num))
+ factor += 2; /* reg + inv */
+
+ if (check_mul_overflow(factor, max_rdma_ctxs, &result))
+ return UINT_MAX;
+ return result;
+}
+EXPORT_SYMBOL(rdma_rw_max_send_wr);
+
void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
{
- u32 factor;
+ unsigned int factor = 1;
WARN_ON_ONCE(attr->port_num == 0);
/*
- * Each context needs at least one RDMA READ or WRITE WR.
- *
- * For some hardware we might need more, eventually we should ask the
- * HCA driver for a multiplier here.
- */
- factor = 1;
-
- /*
- * If the device needs MRs to perform RDMA READ or WRITE operations,
- * we'll need two additional MRs for the registrations and the
- * invalidation.
+ * If the device uses MRs to perform RDMA READ or WRITE operations,
+ * or if data integrity is enabled, account for registration and
+ * invalidation work requests.
*/
if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN ||
rdma_rw_can_use_mr(dev, attr->port_num))
- factor += 2; /* inv + reg */
+ factor += 2; /* reg + inv */
attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs;
/*
- * But maybe we were just too high in the sky and the device doesn't
- * even support all we need, and we'll have to live with what we get..
+ * The device might not support all we need, and we'll have to
+ * live with what we get.
*/
attr->cap.max_send_wr =
min_t(u32, attr->cap.max_send_wr, dev->attrs.max_qp_wr);