diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-03-02 03:10:30 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-03-02 03:10:30 +0300 |
commit | 8f03cf50bc9443e92d6e54ac4d599357d6cb7cbb (patch) | |
tree | 0c38aab3a4c24d06fb05376b651157627bc1669d /net/sunrpc/xprtrdma/rpc_rdma.c | |
parent | 25c4e6c3f0c14d1575aa488ff4ca47e045ae51a0 (diff) | |
parent | ed92d8c137b7794c2c2aa14479298b9885967607 (diff) | |
download | linux-8f03cf50bc9443e92d6e54ac4d599357d6cb7cbb.tar.xz |
Merge tag 'nfs-for-4.11-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
Pull NFS client updates from Anna Schumaker:
"Highlights include:
Stable bugfixes:
- NFSv4: Fix memory and state leak in _nfs4_open_and_get_state
- xprtrdma: Fix Read chunk padding
- xprtrdma: Per-connection pad optimization
- xprtrdma: Disable pad optimization by default
- xprtrdma: Reduce required number of send SGEs
- nlm: Ensure callback code also checks that the files match
- pNFS/flexfiles: If the layout is invalid, it must be updated before
retrying
- NFSv4: Fix reboot recovery in copy offload
- Revert "NFSv4.1: Handle NFS4ERR_BADSESSION/NFS4ERR_DEADSESSION
replies to OP_SEQUENCE"
- NFSv4: fix getacl head length estimation
- NFSv4: fix getacl ERANGE for sum ACL buffer sizes
Features:
- Add and use dprintk_cont macros
- Various cleanups to NFS v4.x to reduce code duplication and
complexity
- Remove unused cr_magic related code
- Improvements to sunrpc "read from buffer" code
- Clean up sunrpc timeout code and allow changing TCP timeout
parameters
- Remove duplicate mw_list management code in xprtrdma
- Add generic functions for encoding and decoding xdr streams
Bugfixes:
- Clean up nfs_show_mountd_netid
- Make layoutreturn_ops static and use NULL instead of 0 to fix
sparse warnings
- Properly handle -ERESTARTSYS in nfs_rename()
- Check if register_shrinker() failed during rpcauth_init()
- Properly clean up procfs/pipefs entries
- Various NFS over RDMA related fixes
- Silence unititialized variable warning in sunrpc"
* tag 'nfs-for-4.11-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (64 commits)
NFSv4: fix getacl ERANGE for some ACL buffer sizes
NFSv4: fix getacl head length estimation
Revert "NFSv4.1: Handle NFS4ERR_BADSESSION/NFS4ERR_DEADSESSION replies to OP_SEQUENCE"
NFSv4: Fix reboot recovery in copy offload
pNFS/flexfiles: If the layout is invalid, it must be updated before retrying
NFSv4: Clean up owner/group attribute decode
SUNRPC: Add a helper function xdr_stream_decode_string_dup()
NFSv4: Remove bogus "struct nfs_client" argument from decode_ace()
NFSv4: Fix the underestimation of delegation XDR space reservation
NFSv4: Replace callback string decode function with a generic
NFSv4: Replace the open coded decode_opaque_inline() with the new generic
NFSv4: Replace ad-hoc xdr encode/decode helpers with xdr_stream_* generics
SUNRPC: Add generic helpers for xdr_stream encode/decode
sunrpc: silence uninitialized variable warning
nlm: Ensure callback code also checks that the files match
sunrpc: Allow xprt->ops->timer method to sleep
xprtrdma: Refactor management of mw_list field
xprtrdma: Handle stale connection rejection
xprtrdma: Properly recover FRWRs with in-flight FASTREG WRs
xprtrdma: Shrink send SGEs array
...
Diffstat (limited to 'net/sunrpc/xprtrdma/rpc_rdma.c')
-rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 82 |
1 files changed, 51 insertions, 31 deletions
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index c52e0f2ffe52..a044be2d6ad7 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -125,14 +125,34 @@ void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt) /* The client can send a request inline as long as the RPCRDMA header * plus the RPC call fit under the transport's inline limit. If the * combined call message size exceeds that limit, the client must use - * the read chunk list for this operation. + * a Read chunk for this operation. + * + * A Read chunk is also required if sending the RPC call inline would + * exceed this device's max_sge limit. */ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) { - struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct xdr_buf *xdr = &rqst->rq_snd_buf; + unsigned int count, remaining, offset; + + if (xdr->len > r_xprt->rx_ia.ri_max_inline_write) + return false; + + if (xdr->page_len) { + remaining = xdr->page_len; + offset = xdr->page_base & ~PAGE_MASK; + count = 0; + while (remaining) { + remaining -= min_t(unsigned int, + PAGE_SIZE - offset, remaining); + offset = 0; + if (++count > r_xprt->rx_ia.ri_max_send_sges) + return false; + } + } - return rqst->rq_snd_buf.len <= ia->ri_max_inline_write; + return true; } /* The client can't know how large the actual reply will be. Thus it @@ -186,9 +206,9 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n) */ static int -rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, - enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, - bool reminv_expected) +rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, + unsigned int pos, enum rpcrdma_chunktype type, + struct rpcrdma_mr_seg *seg) { int len, n, p, page_base; struct page **ppages; @@ -226,22 +246,21 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, if (len && n == RPCRDMA_MAX_SEGS) goto out_overflow; - /* When encoding the read list, the tail is always sent inline */ - if (type == rpcrdma_readch) + /* When encoding a Read chunk, the tail iovec contains an + * XDR pad and may be omitted. + */ + if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup) return n; - /* When encoding the Write list, some servers need to see an extra - * segment for odd-length Write chunks. The upper layer provides - * space in the tail iovec for this purpose. + /* When encoding a Write chunk, some servers need to see an + * extra segment for non-XDR-aligned Write chunks. The upper + * layer provides space in the tail iovec that may be used + * for this purpose. */ - if (type == rpcrdma_writech && reminv_expected) + if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup) return n; if (xdrbuf->tail[0].iov_len) { - /* the rpcrdma protocol allows us to omit any trailing - * xdr pad bytes, saving the server an RDMA operation. */ - if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) - return n; n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n); if (n == RPCRDMA_MAX_SEGS) goto out_overflow; @@ -293,7 +312,8 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, if (rtype == rpcrdma_areadch) pos = 0; seg = req->rl_segments; - nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false); + nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos, + rtype, seg); if (nsegs < 0) return ERR_PTR(nsegs); @@ -302,7 +322,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, false, &mw); if (n < 0) return ERR_PTR(n); - list_add(&mw->mw_list, &req->rl_registered); + rpcrdma_push_mw(mw, &req->rl_registered); *iptr++ = xdr_one; /* item present */ @@ -355,10 +375,9 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, } seg = req->rl_segments; - nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, + nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, rqst->rq_rcv_buf.head[0].iov_len, - wtype, seg, - r_xprt->rx_ia.ri_reminv_expected); + wtype, seg); if (nsegs < 0) return ERR_PTR(nsegs); @@ -371,7 +390,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, true, &mw); if (n < 0) return ERR_PTR(n); - list_add(&mw->mw_list, &req->rl_registered); + rpcrdma_push_mw(mw, &req->rl_registered); iptr = xdr_encode_rdma_segment(iptr, mw); @@ -423,8 +442,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, } seg = req->rl_segments; - nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg, - r_xprt->rx_ia.ri_reminv_expected); + nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg); if (nsegs < 0) return ERR_PTR(nsegs); @@ -437,7 +455,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, true, &mw); if (n < 0) return ERR_PTR(n); - list_add(&mw->mw_list, &req->rl_registered); + rpcrdma_push_mw(mw, &req->rl_registered); iptr = xdr_encode_rdma_segment(iptr, mw); @@ -741,13 +759,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) iptr = headerp->rm_body.rm_chunks; iptr = rpcrdma_encode_read_list(r_xprt, req, rqst, iptr, rtype); if (IS_ERR(iptr)) - goto out_unmap; + goto out_err; iptr = rpcrdma_encode_write_list(r_xprt, req, rqst, iptr, wtype); if (IS_ERR(iptr)) - goto out_unmap; + goto out_err; iptr = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, iptr, wtype); if (IS_ERR(iptr)) - goto out_unmap; + goto out_err; hdrlen = (unsigned char *)iptr - (unsigned char *)headerp; dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n", @@ -758,12 +776,14 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen, &rqst->rq_snd_buf, rtype)) { iptr = ERR_PTR(-EIO); - goto out_unmap; + goto out_err; } return 0; -out_unmap: - r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); +out_err: + pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n", + PTR_ERR(iptr)); + r_xprt->rx_stats.failed_marshal_count++; return PTR_ERR(iptr); } |