diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-03-02 03:10:30 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-03-02 03:10:30 +0300 |
commit | 8f03cf50bc9443e92d6e54ac4d599357d6cb7cbb (patch) | |
tree | 0c38aab3a4c24d06fb05376b651157627bc1669d /net/sunrpc/xprtrdma/verbs.c | |
parent | 25c4e6c3f0c14d1575aa488ff4ca47e045ae51a0 (diff) | |
parent | ed92d8c137b7794c2c2aa14479298b9885967607 (diff) | |
download | linux-8f03cf50bc9443e92d6e54ac4d599357d6cb7cbb.tar.xz |
Merge tag 'nfs-for-4.11-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
Pull NFS client updates from Anna Schumaker:
"Highlights include:
Stable bugfixes:
- NFSv4: Fix memory and state leak in _nfs4_open_and_get_state
- xprtrdma: Fix Read chunk padding
- xprtrdma: Per-connection pad optimization
- xprtrdma: Disable pad optimization by default
- xprtrdma: Reduce required number of send SGEs
- nlm: Ensure callback code also checks that the files match
- pNFS/flexfiles: If the layout is invalid, it must be updated before
retrying
- NFSv4: Fix reboot recovery in copy offload
- Revert "NFSv4.1: Handle NFS4ERR_BADSESSION/NFS4ERR_DEADSESSION
replies to OP_SEQUENCE"
- NFSv4: fix getacl head length estimation
- NFSv4: fix getacl ERANGE for sum ACL buffer sizes
Features:
- Add and use dprintk_cont macros
- Various cleanups to NFS v4.x to reduce code duplication and
complexity
- Remove unused cr_magic related code
- Improvements to sunrpc "read from buffer" code
- Clean up sunrpc timeout code and allow changing TCP timeout
parameters
- Remove duplicate mw_list management code in xprtrdma
- Add generic functions for encoding and decoding xdr streams
Bugfixes:
- Clean up nfs_show_mountd_netid
- Make layoutreturn_ops static and use NULL instead of 0 to fix
sparse warnings
- Properly handle -ERESTARTSYS in nfs_rename()
- Check if register_shrinker() failed during rpcauth_init()
- Properly clean up procfs/pipefs entries
- Various NFS over RDMA related fixes
- Silence unititialized variable warning in sunrpc"
* tag 'nfs-for-4.11-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (64 commits)
NFSv4: fix getacl ERANGE for some ACL buffer sizes
NFSv4: fix getacl head length estimation
Revert "NFSv4.1: Handle NFS4ERR_BADSESSION/NFS4ERR_DEADSESSION replies to OP_SEQUENCE"
NFSv4: Fix reboot recovery in copy offload
pNFS/flexfiles: If the layout is invalid, it must be updated before retrying
NFSv4: Clean up owner/group attribute decode
SUNRPC: Add a helper function xdr_stream_decode_string_dup()
NFSv4: Remove bogus "struct nfs_client" argument from decode_ace()
NFSv4: Fix the underestimation of delegation XDR space reservation
NFSv4: Replace callback string decode function with a generic
NFSv4: Replace the open coded decode_opaque_inline() with the new generic
NFSv4: Replace ad-hoc xdr encode/decode helpers with xdr_stream_* generics
SUNRPC: Add generic helpers for xdr_stream encode/decode
sunrpc: silence uninitialized variable warning
nlm: Ensure callback code also checks that the files match
sunrpc: Allow xprt->ops->timer method to sleep
xprtrdma: Refactor management of mw_list field
xprtrdma: Handle stale connection rejection
xprtrdma: Properly recover FRWRs with in-flight FASTREG WRs
xprtrdma: Shrink send SGEs array
...
Diffstat (limited to 'net/sunrpc/xprtrdma/verbs.c')
-rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 96 |
1 files changed, 35 insertions, 61 deletions
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 11d07748f699..81cd31acf690 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -54,6 +54,7 @@ #include <linux/sunrpc/svc_rdma.h> #include <asm/bitops.h> #include <linux/module.h> /* try_module_get()/module_put() */ +#include <rdma/ib_cm.h> #include "xprt_rdma.h" @@ -208,6 +209,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, /* Default settings for RPC-over-RDMA Version One */ r_xprt->rx_ia.ri_reminv_expected = false; + r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize; rsize = RPCRDMA_V1_DEF_INLINE_SIZE; wsize = RPCRDMA_V1_DEF_INLINE_SIZE; @@ -215,6 +217,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, pmsg->cp_magic == rpcrdma_cmp_magic && pmsg->cp_version == RPCRDMA_CMP_VERSION) { r_xprt->rx_ia.ri_reminv_expected = true; + r_xprt->rx_ia.ri_implicit_roundup = true; rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); } @@ -277,7 +280,14 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) connstate = -ENETDOWN; goto connected; case RDMA_CM_EVENT_REJECTED: +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) + pr_info("rpcrdma: connection to %pIS:%u on %s rejected: %s\n", + sap, rpc_get_port(sap), ia->ri_device->name, + rdma_reject_msg(id, event->status)); +#endif connstate = -ECONNREFUSED; + if (event->status == IB_CM_REJ_STALE_CONN) + connstate = -EAGAIN; goto connected; case RDMA_CM_EVENT_DISCONNECTED: connstate = -ECONNABORTED; @@ -486,18 +496,19 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) */ int rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, - struct rpcrdma_create_data_internal *cdata) + struct rpcrdma_create_data_internal *cdata) { struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; + unsigned int max_qp_wr, max_sge; struct ib_cq *sendcq, *recvcq; - unsigned int max_qp_wr; int rc; - if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) { - dprintk("RPC: %s: insufficient sge's available\n", - __func__); + max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES); + if (max_sge < RPCRDMA_MIN_SEND_SGES) { + pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); return -ENOMEM; } + ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES; if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) { dprintk("RPC: %s: insufficient wqe's available\n", @@ -522,7 +533,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.cap.max_recv_wr = cdata->max_requests; ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */ - ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES; + ep->rep_attr.cap.max_send_sge = max_sge; ep->rep_attr.cap.max_recv_sge = 1; ep->rep_attr.cap.max_inline_data = 0; ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; @@ -640,20 +651,21 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) int rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { + struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, + rx_ia); struct rdma_cm_id *id, *old; + struct sockaddr *sap; + unsigned int extras; int rc = 0; - int retry_count = 0; if (ep->rep_connected != 0) { - struct rpcrdma_xprt *xprt; retry: dprintk("RPC: %s: reconnecting...\n", __func__); rpcrdma_ep_disconnect(ep, ia); - xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); - id = rpcrdma_create_id(xprt, ia, - (struct sockaddr *)&xprt->rx_data.addr); + sap = (struct sockaddr *)&r_xprt->rx_data.addr; + id = rpcrdma_create_id(r_xprt, ia, sap); if (IS_ERR(id)) { rc = -EHOSTUNREACH; goto out; @@ -708,51 +720,18 @@ retry: } wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); - - /* - * Check state. A non-peer reject indicates no listener - * (ECONNREFUSED), which may be a transient state. All - * others indicate a transport condition which has already - * undergone a best-effort. - */ - if (ep->rep_connected == -ECONNREFUSED && - ++retry_count <= RDMA_CONNECT_RETRY_MAX) { - dprintk("RPC: %s: non-peer_reject, retry\n", __func__); - goto retry; - } if (ep->rep_connected <= 0) { - /* Sometimes, the only way to reliably connect to remote - * CMs is to use same nonzero values for ORD and IRD. */ - if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 && - (ep->rep_remote_cma.responder_resources == 0 || - ep->rep_remote_cma.initiator_depth != - ep->rep_remote_cma.responder_resources)) { - if (ep->rep_remote_cma.responder_resources == 0) - ep->rep_remote_cma.responder_resources = 1; - ep->rep_remote_cma.initiator_depth = - ep->rep_remote_cma.responder_resources; + if (ep->rep_connected == -EAGAIN) goto retry; - } rc = ep->rep_connected; - } else { - struct rpcrdma_xprt *r_xprt; - unsigned int extras; - - dprintk("RPC: %s: connected\n", __func__); - - r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); - extras = r_xprt->rx_buf.rb_bc_srv_max_requests; - - if (extras) { - rc = rpcrdma_ep_post_extra_recv(r_xprt, extras); - if (rc) { - pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n", - __func__, rc); - rc = 0; - } - } + goto out; } + dprintk("RPC: %s: connected\n", __func__); + extras = r_xprt->rx_buf.rb_bc_srv_max_requests; + if (extras) + rpcrdma_ep_post_extra_recv(r_xprt, extras); + out: if (rc) ep->rep_connected = rc; @@ -797,9 +776,7 @@ rpcrdma_mr_recovery_worker(struct work_struct *work) spin_lock(&buf->rb_recovery_lock); while (!list_empty(&buf->rb_stale_mrs)) { - mw = list_first_entry(&buf->rb_stale_mrs, - struct rpcrdma_mw, mw_list); - list_del_init(&mw->mw_list); + mw = rpcrdma_pop_mw(&buf->rb_stale_mrs); spin_unlock(&buf->rb_recovery_lock); dprintk("RPC: %s: recovering MR %p\n", __func__, mw); @@ -817,7 +794,7 @@ rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw) struct rpcrdma_buffer *buf = &r_xprt->rx_buf; spin_lock(&buf->rb_recovery_lock); - list_add(&mw->mw_list, &buf->rb_stale_mrs); + rpcrdma_push_mw(mw, &buf->rb_stale_mrs); spin_unlock(&buf->rb_recovery_lock); schedule_delayed_work(&buf->rb_recovery_worker, 0); @@ -1093,11 +1070,8 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt) struct rpcrdma_mw *mw = NULL; spin_lock(&buf->rb_mwlock); - if (!list_empty(&buf->rb_mws)) { - mw = list_first_entry(&buf->rb_mws, - struct rpcrdma_mw, mw_list); - list_del_init(&mw->mw_list); - } + if (!list_empty(&buf->rb_mws)) + mw = rpcrdma_pop_mw(&buf->rb_mws); spin_unlock(&buf->rb_mwlock); if (!mw) @@ -1120,7 +1094,7 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) struct rpcrdma_buffer *buf = &r_xprt->rx_buf; spin_lock(&buf->rb_mwlock); - list_add_tail(&mw->mw_list, &buf->rb_mws); + rpcrdma_push_mw(mw, &buf->rb_mws); spin_unlock(&buf->rb_mwlock); } |