summaryrefslogtreecommitdiff
path: root/net/sunrpc/xprtrdma
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2017-02-09 00:59:54 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-03-12 08:41:52 +0300
commitfab6c2caa48f892c4f3446aea9e253ca8a6187a6 (patch)
tree528353702b23688bd7d0f3617708c22bcea31219 /net/sunrpc/xprtrdma
parentec3bc2c5ed576c27e4c89a4fc1654755a0fe71bb (diff)
downloadlinux-fab6c2caa48f892c4f3446aea9e253ca8a6187a6.tar.xz
xprtrdma: Per-connection pad optimization
commit b5f0afbea4f2ea52c613ac2b06cb6de2ea18cb6d upstream. Pad optimization is changed by echoing into /proc/sys/sunrpc/rdma_pad_optimize. This is a global setting, affecting all RPC-over-RDMA connections to all servers. The marshaling code picks up that value and uses it for decisions about how to construct each RPC-over-RDMA frame. Having it change suddenly in mid-operation can result in unexpected failures. And some servers a client mounts might need chunk round-up, while others don't. So instead, copy the pad_optimize setting into each connection's rpcrdma_ia when the transport is created, and use the copy, which can't change during the life of the connection, instead. This also removes a hack: rpcrdma_convert_iovs was using the remote-invalidation-expected flag to predict when it could leave out Write chunk padding. This is because the Linux server handles implicit XDR padding on Write chunks correctly, and only Linux servers can set the connection's remote-invalidation-expected flag. It's more sensible to use the pad optimization setting instead. Fixes: 677eb17e94ed ("xprtrdma: Fix XDR tail buffer marshalling") Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'net/sunrpc/xprtrdma')
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c28
-rw-r--r--net/sunrpc/xprtrdma/verbs.c1
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h1
3 files changed, 16 insertions, 14 deletions
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index e5a8c22173c2..b8f46c186f02 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -186,9 +186,9 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n)
*/
static int
-rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
- enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg,
- bool reminv_expected)
+rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
+ unsigned int pos, enum rpcrdma_chunktype type,
+ struct rpcrdma_mr_seg *seg)
{
int len, n, p, page_base;
struct page **ppages;
@@ -229,14 +229,15 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
/* When encoding a Read chunk, the tail iovec contains an
* XDR pad and may be omitted.
*/
- if (type == rpcrdma_readch && xprt_rdma_pad_optimize)
+ if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup)
return n;
- /* When encoding the Write list, some servers need to see an extra
- * segment for odd-length Write chunks. The upper layer provides
- * space in the tail iovec for this purpose.
+ /* When encoding a Write chunk, some servers need to see an
+ * extra segment for non-XDR-aligned Write chunks. The upper
+ * layer provides space in the tail iovec that may be used
+ * for this purpose.
*/
- if (type == rpcrdma_writech && reminv_expected)
+ if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup)
return n;
if (xdrbuf->tail[0].iov_len) {
@@ -291,7 +292,8 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
if (rtype == rpcrdma_areadch)
pos = 0;
seg = req->rl_segments;
- nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false);
+ nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
+ rtype, seg);
if (nsegs < 0)
return ERR_PTR(nsegs);
@@ -353,10 +355,9 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
}
seg = req->rl_segments;
- nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf,
+ nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
rqst->rq_rcv_buf.head[0].iov_len,
- wtype, seg,
- r_xprt->rx_ia.ri_reminv_expected);
+ wtype, seg);
if (nsegs < 0)
return ERR_PTR(nsegs);
@@ -421,8 +422,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
}
seg = req->rl_segments;
- nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg,
- r_xprt->rx_ia.ri_reminv_expected);
+ nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
if (nsegs < 0)
return ERR_PTR(nsegs);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 8da7f6a4dfc3..39f9e31008bc 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -208,6 +208,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
/* Default settings for RPC-over-RDMA Version One */
r_xprt->rx_ia.ri_reminv_expected = false;
+ r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index f6ae1b22da47..07db2d1e072d 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -75,6 +75,7 @@ struct rpcrdma_ia {
unsigned int ri_max_inline_write;
unsigned int ri_max_inline_read;
bool ri_reminv_expected;
+ bool ri_implicit_roundup;
struct ib_qp_attr ri_qp_attr;
struct ib_qp_init_attr ri_qp_init_attr;
};