summaryrefslogtreecommitdiff
path: root/net/sunrpc/xprtrdma/svc_rdma_rw.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-12 19:49:33 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-12 19:49:33 +0300
commit89e255678fec5a1a9ed59664a62212d19873aedc (patch)
treeeac17093cf2122f54d599f6ffbda0206064a324a /net/sunrpc/xprtrdma/svc_rdma_rw.c
parent8efcf34a263965e471e3999904f94d1f6799d42a (diff)
parent692ad280bff3e81721ab138b9455948ab5289acf (diff)
downloadlinux-89e255678fec5a1a9ed59664a62212d19873aedc.tar.xz
Merge tag 'nfsd-4.18' of git://linux-nfs.org/~bfields/linux
Pull nfsd updates from Bruce Fields: "A relatively quiet cycle for nfsd. The largest piece is an RDMA update from Chuck Lever with new trace points, miscellaneous cleanups, and streamlining of the send and receive paths. Other than that, some miscellaneous bugfixes" * tag 'nfsd-4.18' of git://linux-nfs.org/~bfields/linux: (26 commits) nfsd: fix error handling in nfs4_set_delegation() nfsd: fix potential use-after-free in nfsd4_decode_getdeviceinfo Fix 16-byte memory leak in gssp_accept_sec_context_upcall svcrdma: Fix incorrect return value/type in svc_rdma_post_recvs svcrdma: Remove unused svc_rdma_op_ctxt svcrdma: Persistently allocate and DMA-map Send buffers svcrdma: Simplify svc_rdma_send() svcrdma: Remove post_send_wr svcrdma: Don't overrun the SGE array in svc_rdma_send_ctxt svcrdma: Introduce svc_rdma_send_ctxt svcrdma: Clean up Send SGE accounting svcrdma: Refactor svc_rdma_dma_map_buf svcrdma: Allocate recv_ctxt's on CPU handling Receives svcrdma: Persistently allocate and DMA-map Receive buffers svcrdma: Preserve Receive buffer until svc_rdma_sendto svcrdma: Simplify svc_rdma_recv_ctxt_put svcrdma: Remove sc_rq_depth svcrdma: Introduce svc_rdma_recv_ctxt svcrdma: Trace key RDMA API events svcrdma: Trace key RPC/RDMA protocol events ...
Diffstat (limited to 'net/sunrpc/xprtrdma/svc_rdma_rw.c')
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c133
1 files changed, 67 insertions, 66 deletions
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 12b9a7e0b6d2..ce3ea8419704 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -1,15 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2016 Oracle. All rights reserved.
+ * Copyright (c) 2016-2018 Oracle. All rights reserved.
*
* Use the core R/W API to move RPC-over-RDMA Read and Write chunks.
*/
+#include <rdma/rw.h>
+
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
#include <linux/sunrpc/debug.h>
-#include <rdma/rw.h>
+#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
@@ -205,6 +208,8 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
struct svc_rdma_write_info *info =
container_of(cc, struct svc_rdma_write_info, wi_cc);
+ trace_svcrdma_wc_write(wc);
+
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait);
@@ -222,7 +227,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
/* State for pulling a Read chunk.
*/
struct svc_rdma_read_info {
- struct svc_rdma_op_ctxt *ri_readctxt;
+ struct svc_rdma_recv_ctxt *ri_readctxt;
unsigned int ri_position;
unsigned int ri_pageno;
unsigned int ri_pageoff;
@@ -266,6 +271,8 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
struct svc_rdma_read_info *info =
container_of(cc, struct svc_rdma_read_info, ri_cc);
+ trace_svcrdma_wc_read(wc);
+
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait);
@@ -275,10 +282,10 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
pr_err("svcrdma: read ctx: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err);
- svc_rdma_put_context(info->ri_readctxt, 1);
+ svc_rdma_recv_ctxt_put(rdma, info->ri_readctxt);
} else {
spin_lock(&rdma->sc_rq_dto_lock);
- list_add_tail(&info->ri_readctxt->list,
+ list_add_tail(&info->ri_readctxt->rc_list,
&rdma->sc_read_complete_q);
spin_unlock(&rdma->sc_rq_dto_lock);
@@ -323,18 +330,20 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
if (atomic_sub_return(cc->cc_sqecount,
&rdma->sc_sq_avail) > 0) {
ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
+ trace_svcrdma_post_rw(&cc->cc_cqe,
+ cc->cc_sqecount, ret);
if (ret)
break;
return 0;
}
- atomic_inc(&rdma_stat_sq_starve);
+ trace_svcrdma_sq_full(rdma);
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wait_event(rdma->sc_send_wait,
atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount);
+ trace_svcrdma_sq_retry(rdma);
} while (1);
- pr_err("svcrdma: ib_post_send failed (%d)\n", ret);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
/* If even one was posted, there will be a completion. */
@@ -437,6 +446,7 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
if (ret < 0)
goto out_initerr;
+ trace_svcrdma_encode_wseg(seg_handle, write_len, seg_offset);
list_add(&ctxt->rw_list, &cc->cc_rwctxts);
cc->cc_sqecount += ret;
if (write_len == seg_length - info->wi_seg_off) {
@@ -462,7 +472,7 @@ out_noctx:
out_initerr:
svc_rdma_put_rw_ctxt(rdma, ctxt);
- pr_err("svcrdma: failed to map pagelist (%d)\n", ret);
+ trace_svcrdma_dma_map_rwctx(rdma, ret);
return -EIO;
}
@@ -526,6 +536,8 @@ int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch,
ret = svc_rdma_post_chunk_ctxt(&info->wi_cc);
if (ret < 0)
goto out_err;
+
+ trace_svcrdma_encode_write(xdr->page_len);
return xdr->page_len;
out_err:
@@ -582,6 +594,8 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, __be32 *rp_ch,
ret = svc_rdma_post_chunk_ctxt(&info->wi_cc);
if (ret < 0)
goto out_err;
+
+ trace_svcrdma_encode_reply(consumed);
return consumed;
out_err:
@@ -593,7 +607,7 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
struct svc_rqst *rqstp,
u32 rkey, u32 len, u64 offset)
{
- struct svc_rdma_op_ctxt *head = info->ri_readctxt;
+ struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
struct svc_rdma_chunk_ctxt *cc = &info->ri_cc;
struct svc_rdma_rw_ctxt *ctxt;
unsigned int sge_no, seg_len;
@@ -606,18 +620,15 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
goto out_noctx;
ctxt->rw_nents = sge_no;
- dprintk("svcrdma: reading segment %u@0x%016llx:0x%08x (%u sges)\n",
- len, offset, rkey, sge_no);
-
sg = ctxt->rw_sg_table.sgl;
for (sge_no = 0; sge_no < ctxt->rw_nents; sge_no++) {
seg_len = min_t(unsigned int, len,
PAGE_SIZE - info->ri_pageoff);
- head->arg.pages[info->ri_pageno] =
+ head->rc_arg.pages[info->ri_pageno] =
rqstp->rq_pages[info->ri_pageno];
if (!info->ri_pageoff)
- head->count++;
+ head->rc_page_count++;
sg_set_page(sg, rqstp->rq_pages[info->ri_pageno],
seg_len, info->ri_pageoff);
@@ -656,8 +667,8 @@ out_overrun:
return -EINVAL;
out_initerr:
+ trace_svcrdma_dma_map_rwctx(cc->cc_rdma, ret);
svc_rdma_put_rw_ctxt(cc->cc_rdma, ctxt);
- pr_err("svcrdma: failed to map pagelist (%d)\n", ret);
return -EIO;
}
@@ -686,6 +697,7 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
if (ret < 0)
break;
+ trace_svcrdma_encode_rseg(rs_handle, rs_length, rs_offset);
info->ri_chunklen += rs_length;
}
@@ -693,9 +705,9 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
}
/* Construct RDMA Reads to pull over a normal Read chunk. The chunk
- * data lands in the page list of head->arg.pages.
+ * data lands in the page list of head->rc_arg.pages.
*
- * Currently NFSD does not look at the head->arg.tail[0] iovec.
+ * Currently NFSD does not look at the head->rc_arg.tail[0] iovec.
* Therefore, XDR round-up of the Read chunk and trailing
* inline content must both be added at the end of the pagelist.
*/
@@ -703,29 +715,27 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
struct svc_rdma_read_info *info,
__be32 *p)
{
- struct svc_rdma_op_ctxt *head = info->ri_readctxt;
+ struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
int ret;
- dprintk("svcrdma: Reading Read chunk at position %u\n",
- info->ri_position);
-
- info->ri_pageno = head->hdr_count;
- info->ri_pageoff = 0;
-
ret = svc_rdma_build_read_chunk(rqstp, info, p);
if (ret < 0)
goto out;
+ trace_svcrdma_encode_read(info->ri_chunklen, info->ri_position);
+
+ head->rc_hdr_count = 0;
+
/* Split the Receive buffer between the head and tail
* buffers at Read chunk's position. XDR roundup of the
* chunk is not included in either the pagelist or in
* the tail.
*/
- head->arg.tail[0].iov_base =
- head->arg.head[0].iov_base + info->ri_position;
- head->arg.tail[0].iov_len =
- head->arg.head[0].iov_len - info->ri_position;
- head->arg.head[0].iov_len = info->ri_position;
+ head->rc_arg.tail[0].iov_base =
+ head->rc_arg.head[0].iov_base + info->ri_position;
+ head->rc_arg.tail[0].iov_len =
+ head->rc_arg.head[0].iov_len - info->ri_position;
+ head->rc_arg.head[0].iov_len = info->ri_position;
/* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2).
*
@@ -738,9 +748,9 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
*/
info->ri_chunklen = XDR_QUADLEN(info->ri_chunklen) << 2;
- head->arg.page_len = info->ri_chunklen;
- head->arg.len += info->ri_chunklen;
- head->arg.buflen += info->ri_chunklen;
+ head->rc_arg.page_len = info->ri_chunklen;
+ head->rc_arg.len += info->ri_chunklen;
+ head->rc_arg.buflen += info->ri_chunklen;
out:
return ret;
@@ -749,7 +759,7 @@ out:
/* Construct RDMA Reads to pull over a Position Zero Read chunk.
* The start of the data lands in the first page just after
* the Transport header, and the rest lands in the page list of
- * head->arg.pages.
+ * head->rc_arg.pages.
*
* Assumptions:
* - A PZRC has an XDR-aligned length (no implicit round-up).
@@ -761,35 +771,25 @@ static int svc_rdma_build_pz_read_chunk(struct svc_rqst *rqstp,
struct svc_rdma_read_info *info,
__be32 *p)
{
- struct svc_rdma_op_ctxt *head = info->ri_readctxt;
+ struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
int ret;
- dprintk("svcrdma: Reading Position Zero Read chunk\n");
-
- info->ri_pageno = head->hdr_count - 1;
- info->ri_pageoff = offset_in_page(head->byte_len);
-
ret = svc_rdma_build_read_chunk(rqstp, info, p);
if (ret < 0)
goto out;
- head->arg.len += info->ri_chunklen;
- head->arg.buflen += info->ri_chunklen;
+ trace_svcrdma_encode_pzr(info->ri_chunklen);
- if (head->arg.buflen <= head->sge[0].length) {
- /* Transport header and RPC message fit entirely
- * in page where head iovec resides.
- */
- head->arg.head[0].iov_len = info->ri_chunklen;
- } else {
- /* Transport header and part of RPC message reside
- * in the head iovec's page.
- */
- head->arg.head[0].iov_len =
- head->sge[0].length - head->byte_len;
- head->arg.page_len =
- info->ri_chunklen - head->arg.head[0].iov_len;
- }
+ head->rc_arg.len += info->ri_chunklen;
+ head->rc_arg.buflen += info->ri_chunklen;
+
+ head->rc_hdr_count = 1;
+ head->rc_arg.head[0].iov_base = page_address(head->rc_pages[0]);
+ head->rc_arg.head[0].iov_len = min_t(size_t, PAGE_SIZE,
+ info->ri_chunklen);
+
+ head->rc_arg.page_len = info->ri_chunklen -
+ head->rc_arg.head[0].iov_len;
out:
return ret;
@@ -813,29 +813,30 @@ out:
* - All Read segments in @p have the same Position value.
*/
int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *head, __be32 *p)
+ struct svc_rdma_recv_ctxt *head, __be32 *p)
{
struct svc_rdma_read_info *info;
struct page **page;
int ret;
/* The request (with page list) is constructed in
- * head->arg. Pages involved with RDMA Read I/O are
+ * head->rc_arg. Pages involved with RDMA Read I/O are
* transferred there.
*/
- head->hdr_count = head->count;
- head->arg.head[0] = rqstp->rq_arg.head[0];
- head->arg.tail[0] = rqstp->rq_arg.tail[0];
- head->arg.pages = head->pages;
- head->arg.page_base = 0;
- head->arg.page_len = 0;
- head->arg.len = rqstp->rq_arg.len;
- head->arg.buflen = rqstp->rq_arg.buflen;
+ head->rc_arg.head[0] = rqstp->rq_arg.head[0];
+ head->rc_arg.tail[0] = rqstp->rq_arg.tail[0];
+ head->rc_arg.pages = head->rc_pages;
+ head->rc_arg.page_base = 0;
+ head->rc_arg.page_len = 0;
+ head->rc_arg.len = rqstp->rq_arg.len;
+ head->rc_arg.buflen = rqstp->rq_arg.buflen;
info = svc_rdma_read_info_alloc(rdma);
if (!info)
return -ENOMEM;
info->ri_readctxt = head;
+ info->ri_pageno = 0;
+ info->ri_pageoff = 0;
info->ri_position = be32_to_cpup(p + 1);
if (info->ri_position)
@@ -856,7 +857,7 @@ int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp,
out:
/* Read sink pages have been moved from rqstp->rq_pages to
- * head->arg.pages. Force svc_recv to refill those slots
+ * head->rc_arg.pages. Force svc_recv to refill those slots
* in rq_pages.
*/
for (page = rqstp->rq_pages; page < rqstp->rq_respages; page++)