From 9dc6edcf676fe188430e8b119f91280bbf285163 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Aug 2018 14:24:16 -0400 Subject: SUNRPC: Clean up initialisation of the struct rpc_rqst Move the initialisation back into xprt.c. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 336fd1a19cca..3d80524e92d6 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -325,7 +325,6 @@ struct xprt_class { struct rpc_xprt *xprt_create_transport(struct xprt_create *args); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); -void xprt_request_init(struct rpc_task *task); void xprt_retry_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); -- cgit v1.2.3 From 3021a5bbbf0aa0252f2993b84ee903a0eca0b690 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Aug 2018 13:50:21 -0400 Subject: SUNRPC: The transmitted message must lie in the RPCSEC window of validity If a message has been encoded using RPCSEC_GSS, the server is maintaining a window of sequence numbers that it considers valid. The client should normally be tracking that window, and needs to verify that the sequence number used by the message being transmitted still lies inside the window of validity. So far, we've been able to assume this condition would be realised automatically, since the client has been encoding the message only after taking the socket lock. Once we change that condition, we will need the explicit check. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 2 ++ include/linux/sunrpc/auth_gss.h | 1 + net/sunrpc/auth.c | 10 ++++++++++ net/sunrpc/auth_gss/auth_gss.c | 41 +++++++++++++++++++++++++++++++++++++++++ net/sunrpc/clnt.c | 3 +++ net/sunrpc/xprt.c | 7 +++++++ 6 files changed, 64 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 58a6765c1c5e..2c97a3933ef9 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -157,6 +157,7 @@ struct rpc_credops { int (*crkey_timeout)(struct rpc_cred *); bool (*crkey_to_expire)(struct rpc_cred *); char * (*crstringify_acceptor)(struct rpc_cred *); + bool (*crneed_reencode)(struct rpc_task *); }; extern const struct rpc_authops authunix_ops; @@ -192,6 +193,7 @@ __be32 * rpcauth_marshcred(struct rpc_task *, __be32 *); __be32 * rpcauth_checkverf(struct rpc_task *, __be32 *); int rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj); int rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj); +bool rpcauth_xmit_need_reencode(struct rpc_task *task); int rpcauth_refreshcred(struct rpc_task *); void rpcauth_invalcred(struct rpc_task *); int rpcauth_uptodatecred(struct rpc_task *); diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 0c9eac351aab..30427b729070 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -70,6 +70,7 @@ struct gss_cl_ctx { refcount_t count; enum rpc_gss_proc gc_proc; u32 gc_seq; + u32 gc_seq_xmit; spinlock_t gc_seq_lock; struct gss_ctx *gc_gss_ctx; struct xdr_netobj gc_wire_ctx; diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 305ecea92170..59df5cdba0ac 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -817,6 +817,16 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, return rpcauth_unwrap_req_decode(decode, rqstp, data, obj); } +bool +rpcauth_xmit_need_reencode(struct rpc_task *task) +{ + struct rpc_cred *cred = task->tk_rqstp->rq_cred; + + if (!cred || !cred->cr_ops->crneed_reencode) + return false; + return cred->cr_ops->crneed_reencode(task); +} + int rpcauth_refreshcred(struct rpc_task *task) { diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 21c0aa0a0d1d..c898a7c75e84 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1984,6 +1984,46 @@ gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp, return decode(rqstp, &xdr, obj); } +static bool +gss_seq_is_newer(u32 new, u32 old) +{ + return (s32)(new - old) > 0; +} + +static bool +gss_xmit_need_reencode(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_cred *cred = req->rq_cred; + struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); + u32 win, seq_xmit; + bool ret = true; + + if (!ctx) + return true; + + if (gss_seq_is_newer(req->rq_seqno, READ_ONCE(ctx->gc_seq))) + goto out; + + seq_xmit = READ_ONCE(ctx->gc_seq_xmit); + while (gss_seq_is_newer(req->rq_seqno, seq_xmit)) { + u32 tmp = seq_xmit; + + seq_xmit = cmpxchg(&ctx->gc_seq_xmit, tmp, req->rq_seqno); + if (seq_xmit == tmp) { + ret = false; + goto out; + } + } + + win = ctx->gc_win; + if (win > 0) + ret = !gss_seq_is_newer(req->rq_seqno, seq_xmit - win); +out: + gss_put_ctx(ctx); + return ret; +} + static int gss_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj) @@ -2052,6 +2092,7 @@ static const struct rpc_credops gss_credops = { .crunwrap_resp = gss_unwrap_resp, .crkey_timeout = gss_key_timeout, .crstringify_acceptor = gss_stringify_acceptor, + .crneed_reencode = gss_xmit_need_reencode, }; static const struct rpc_credops gss_nullops = { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4f1ec8013332..d41b5ac1d4e8 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2184,6 +2184,9 @@ call_status(struct rpc_task *task) /* shutdown or soft timeout */ rpc_exit(task, status); break; + case -EBADMSG: + task->tk_action = call_transmit; + break; default: if (clnt->cl_chatty) printk("%s: RPC call returned error %d\n", diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 6aa09edc9567..3973e10ea2bd 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1014,6 +1014,13 @@ void xprt_transmit(struct rpc_task *task) dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); if (!req->rq_reply_bytes_recvd) { + + /* Verify that our message lies in the RPCSEC_GSS window */ + if (!req->rq_bytes_sent && rpcauth_xmit_need_reencode(task)) { + task->tk_status = -EBADMSG; + return; + } + if (list_empty(&req->rq_list) && rpc_reply_expected(task)) { /* * Add to the list only if we're expecting a reply -- cgit v1.2.3 From 7ebbbc6e7bd023903daa5bd95726edf2d60b559c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 28 Aug 2018 09:00:27 -0400 Subject: SUNRPC: Simplify identification of when the message send/receive is complete Add states to indicate that the message send and receive are not yet complete. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 6 ++++-- net/sunrpc/clnt.c | 19 +++++++------------ net/sunrpc/xprt.c | 17 ++++++++++++++--- 3 files changed, 25 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 592653becd91..9e655df70131 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -140,8 +140,10 @@ struct rpc_task_setup { #define RPC_TASK_RUNNING 0 #define RPC_TASK_QUEUED 1 #define RPC_TASK_ACTIVE 2 -#define RPC_TASK_MSG_RECV 3 -#define RPC_TASK_MSG_RECV_WAIT 4 +#define RPC_TASK_NEED_XMIT 3 +#define RPC_TASK_NEED_RECV 4 +#define RPC_TASK_MSG_RECV 5 +#define RPC_TASK_MSG_RECV_WAIT 6 #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) #define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d41b5ac1d4e8..e5ac35e803ad 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1156,6 +1156,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) */ xbufp->len = xbufp->head[0].iov_len + xbufp->page_len + xbufp->tail[0].iov_len; + set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); task->tk_action = call_bc_transmit; atomic_inc(&task->tk_count); @@ -1720,17 +1721,10 @@ call_allocate(struct rpc_task *task) rpc_exit(task, -ERESTARTSYS); } -static inline int +static int rpc_task_need_encode(struct rpc_task *task) { - return task->tk_rqstp->rq_snd_buf.len == 0; -} - -static inline void -rpc_task_force_reencode(struct rpc_task *task) -{ - task->tk_rqstp->rq_snd_buf.len = 0; - task->tk_rqstp->rq_bytes_sent = 0; + return test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) == 0; } /* @@ -1765,6 +1759,8 @@ rpc_xdr_encode(struct rpc_task *task) task->tk_status = rpcauth_wrap_req(task, encode, req, p, task->tk_msg.rpc_argp); + if (task->tk_status == 0) + set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); } /* @@ -1999,7 +1995,6 @@ call_transmit_status(struct rpc_task *task) */ if (task->tk_status == 0) { xprt_end_transmit(task); - rpc_task_force_reencode(task); return; } @@ -2010,7 +2005,6 @@ call_transmit_status(struct rpc_task *task) default: dprint_status(task); xprt_end_transmit(task); - rpc_task_force_reencode(task); break; /* * Special cases: if we've been waiting on the @@ -2038,7 +2032,7 @@ call_transmit_status(struct rpc_task *task) case -EADDRINUSE: case -ENOTCONN: case -EPIPE: - rpc_task_force_reencode(task); + break; } } @@ -2185,6 +2179,7 @@ call_status(struct rpc_task *task) rpc_exit(task, status); break; case -EBADMSG: + clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); task->tk_action = call_transmit; break; default: diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 3973e10ea2bd..45d580cd93ac 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -936,10 +936,18 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) /* req->rq_reply_bytes_recvd */ smp_wmb(); req->rq_reply_bytes_recvd = copied; + clear_bit(RPC_TASK_NEED_RECV, &task->tk_runstate); rpc_wake_up_queued_task(&xprt->pending, task); } EXPORT_SYMBOL_GPL(xprt_complete_rqst); +static bool +xprt_request_data_received(struct rpc_task *task) +{ + return !test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) && + task->tk_rqstp->rq_reply_bytes_recvd != 0; +} + static void xprt_timer(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; @@ -1031,12 +1039,13 @@ void xprt_transmit(struct rpc_task *task) /* Add request to the receive list */ spin_lock(&xprt->recv_lock); list_add_tail(&req->rq_list, &xprt->recv); + set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate); spin_unlock(&xprt->recv_lock); xprt_reset_majortimeo(req); /* Turn off autodisconnect */ del_singleshot_timer_sync(&xprt->timer); } - } else if (!req->rq_bytes_sent) + } else if (xprt_request_data_received(task) && !req->rq_bytes_sent) return; connect_cookie = xprt->connect_cookie; @@ -1046,9 +1055,11 @@ void xprt_transmit(struct rpc_task *task) task->tk_status = status; return; } + xprt_inject_disconnect(xprt); dprintk("RPC: %5u xmit complete\n", task->tk_pid); + clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); task->tk_flags |= RPC_TASK_SENT; spin_lock_bh(&xprt->transport_lock); @@ -1062,14 +1073,14 @@ void xprt_transmit(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); req->rq_connect_cookie = connect_cookie; - if (rpc_reply_expected(task) && !READ_ONCE(req->rq_reply_bytes_recvd)) { + if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) { /* * Sleep on the pending queue if we're expecting a reply. * The spinlock ensures atomicity between the test of * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on(). */ spin_lock(&xprt->recv_lock); - if (!req->rq_reply_bytes_recvd) { + if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) { rpc_sleep_on(&xprt->pending, task, xprt_timer); /* * Send an extra queue wakeup call if the -- cgit v1.2.3 From d1109aa56c71e19fc117e75bff11384fc7279a3b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 13 Aug 2018 15:48:42 -0400 Subject: SUNRPC: Rename TCP receive-specific state variables Since we will want to introduce similar TCP state variables for the transmission of requests, let's rename the existing ones to label that they are for the receive side. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 16 ++-- include/trace/events/sunrpc.h | 10 +-- net/sunrpc/xprtsock.c | 178 ++++++++++++++++++++-------------------- 3 files changed, 103 insertions(+), 101 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index ae0f99b9b965..90d5ca8e65f4 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -30,15 +30,17 @@ struct sock_xprt { /* * State of TCP reply receive */ - __be32 tcp_fraghdr, - tcp_xid, - tcp_calldir; + struct { + __be32 fraghdr, + xid, + calldir; - u32 tcp_offset, - tcp_reclen; + u32 offset, + len; - unsigned long tcp_copied, - tcp_flags; + unsigned long copied, + flags; + } recv; /* * Connection of transports diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index bbb08a3ef5cc..0aa347194e0f 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -525,11 +525,11 @@ TRACE_EVENT(xs_tcp_data_recv, TP_fast_assign( __assign_str(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR]); __assign_str(port, xs->xprt.address_strings[RPC_DISPLAY_PORT]); - __entry->xid = be32_to_cpu(xs->tcp_xid); - __entry->flags = xs->tcp_flags; - __entry->copied = xs->tcp_copied; - __entry->reclen = xs->tcp_reclen; - __entry->offset = xs->tcp_offset; + __entry->xid = be32_to_cpu(xs->recv.xid); + __entry->flags = xs->recv.flags; + __entry->copied = xs->recv.copied; + __entry->reclen = xs->recv.len; + __entry->offset = xs->recv.offset; ), TP_printk("peer=[%s]:%s xid=0x%08x flags=%s copied=%lu reclen=%u offset=%lu", diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 6b7539c0466e..cd7d093721ae 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1169,42 +1169,42 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_rea size_t len, used; char *p; - p = ((char *) &transport->tcp_fraghdr) + transport->tcp_offset; - len = sizeof(transport->tcp_fraghdr) - transport->tcp_offset; + p = ((char *) &transport->recv.fraghdr) + transport->recv.offset; + len = sizeof(transport->recv.fraghdr) - transport->recv.offset; used = xdr_skb_read_bits(desc, p, len); - transport->tcp_offset += used; + transport->recv.offset += used; if (used != len) return; - transport->tcp_reclen = ntohl(transport->tcp_fraghdr); - if (transport->tcp_reclen & RPC_LAST_STREAM_FRAGMENT) - transport->tcp_flags |= TCP_RCV_LAST_FRAG; + transport->recv.len = ntohl(transport->recv.fraghdr); + if (transport->recv.len & RPC_LAST_STREAM_FRAGMENT) + transport->recv.flags |= TCP_RCV_LAST_FRAG; else - transport->tcp_flags &= ~TCP_RCV_LAST_FRAG; - transport->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK; + transport->recv.flags &= ~TCP_RCV_LAST_FRAG; + transport->recv.len &= RPC_FRAGMENT_SIZE_MASK; - transport->tcp_flags &= ~TCP_RCV_COPY_FRAGHDR; - transport->tcp_offset = 0; + transport->recv.flags &= ~TCP_RCV_COPY_FRAGHDR; + transport->recv.offset = 0; /* Sanity check of the record length */ - if (unlikely(transport->tcp_reclen < 8)) { + if (unlikely(transport->recv.len < 8)) { dprintk("RPC: invalid TCP record fragment length\n"); xs_tcp_force_close(xprt); return; } dprintk("RPC: reading TCP record fragment of length %d\n", - transport->tcp_reclen); + transport->recv.len); } static void xs_tcp_check_fraghdr(struct sock_xprt *transport) { - if (transport->tcp_offset == transport->tcp_reclen) { - transport->tcp_flags |= TCP_RCV_COPY_FRAGHDR; - transport->tcp_offset = 0; - if (transport->tcp_flags & TCP_RCV_LAST_FRAG) { - transport->tcp_flags &= ~TCP_RCV_COPY_DATA; - transport->tcp_flags |= TCP_RCV_COPY_XID; - transport->tcp_copied = 0; + if (transport->recv.offset == transport->recv.len) { + transport->recv.flags |= TCP_RCV_COPY_FRAGHDR; + transport->recv.offset = 0; + if (transport->recv.flags & TCP_RCV_LAST_FRAG) { + transport->recv.flags &= ~TCP_RCV_COPY_DATA; + transport->recv.flags |= TCP_RCV_COPY_XID; + transport->recv.copied = 0; } } } @@ -1214,20 +1214,20 @@ static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_r size_t len, used; char *p; - len = sizeof(transport->tcp_xid) - transport->tcp_offset; + len = sizeof(transport->recv.xid) - transport->recv.offset; dprintk("RPC: reading XID (%zu bytes)\n", len); - p = ((char *) &transport->tcp_xid) + transport->tcp_offset; + p = ((char *) &transport->recv.xid) + transport->recv.offset; used = xdr_skb_read_bits(desc, p, len); - transport->tcp_offset += used; + transport->recv.offset += used; if (used != len) return; - transport->tcp_flags &= ~TCP_RCV_COPY_XID; - transport->tcp_flags |= TCP_RCV_READ_CALLDIR; - transport->tcp_copied = 4; + transport->recv.flags &= ~TCP_RCV_COPY_XID; + transport->recv.flags |= TCP_RCV_READ_CALLDIR; + transport->recv.copied = 4; dprintk("RPC: reading %s XID %08x\n", - (transport->tcp_flags & TCP_RPC_REPLY) ? "reply for" + (transport->recv.flags & TCP_RPC_REPLY) ? "reply for" : "request with", - ntohl(transport->tcp_xid)); + ntohl(transport->recv.xid)); xs_tcp_check_fraghdr(transport); } @@ -1239,34 +1239,34 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport, char *p; /* - * We want transport->tcp_offset to be 8 at the end of this routine + * We want transport->recv.offset to be 8 at the end of this routine * (4 bytes for the xid and 4 bytes for the call/reply flag). * When this function is called for the first time, - * transport->tcp_offset is 4 (after having already read the xid). + * transport->recv.offset is 4 (after having already read the xid). */ - offset = transport->tcp_offset - sizeof(transport->tcp_xid); - len = sizeof(transport->tcp_calldir) - offset; + offset = transport->recv.offset - sizeof(transport->recv.xid); + len = sizeof(transport->recv.calldir) - offset; dprintk("RPC: reading CALL/REPLY flag (%zu bytes)\n", len); - p = ((char *) &transport->tcp_calldir) + offset; + p = ((char *) &transport->recv.calldir) + offset; used = xdr_skb_read_bits(desc, p, len); - transport->tcp_offset += used; + transport->recv.offset += used; if (used != len) return; - transport->tcp_flags &= ~TCP_RCV_READ_CALLDIR; + transport->recv.flags &= ~TCP_RCV_READ_CALLDIR; /* * We don't yet have the XDR buffer, so we will write the calldir * out after we get the buffer from the 'struct rpc_rqst' */ - switch (ntohl(transport->tcp_calldir)) { + switch (ntohl(transport->recv.calldir)) { case RPC_REPLY: - transport->tcp_flags |= TCP_RCV_COPY_CALLDIR; - transport->tcp_flags |= TCP_RCV_COPY_DATA; - transport->tcp_flags |= TCP_RPC_REPLY; + transport->recv.flags |= TCP_RCV_COPY_CALLDIR; + transport->recv.flags |= TCP_RCV_COPY_DATA; + transport->recv.flags |= TCP_RPC_REPLY; break; case RPC_CALL: - transport->tcp_flags |= TCP_RCV_COPY_CALLDIR; - transport->tcp_flags |= TCP_RCV_COPY_DATA; - transport->tcp_flags &= ~TCP_RPC_REPLY; + transport->recv.flags |= TCP_RCV_COPY_CALLDIR; + transport->recv.flags |= TCP_RCV_COPY_DATA; + transport->recv.flags &= ~TCP_RPC_REPLY; break; default: dprintk("RPC: invalid request message type\n"); @@ -1287,21 +1287,21 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt, rcvbuf = &req->rq_private_buf; - if (transport->tcp_flags & TCP_RCV_COPY_CALLDIR) { + if (transport->recv.flags & TCP_RCV_COPY_CALLDIR) { /* * Save the RPC direction in the XDR buffer */ - memcpy(rcvbuf->head[0].iov_base + transport->tcp_copied, - &transport->tcp_calldir, - sizeof(transport->tcp_calldir)); - transport->tcp_copied += sizeof(transport->tcp_calldir); - transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR; + memcpy(rcvbuf->head[0].iov_base + transport->recv.copied, + &transport->recv.calldir, + sizeof(transport->recv.calldir)); + transport->recv.copied += sizeof(transport->recv.calldir); + transport->recv.flags &= ~TCP_RCV_COPY_CALLDIR; } len = desc->count; - if (len > transport->tcp_reclen - transport->tcp_offset) - desc->count = transport->tcp_reclen - transport->tcp_offset; - r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied, + if (len > transport->recv.len - transport->recv.offset) + desc->count = transport->recv.len - transport->recv.offset; + r = xdr_partial_copy_from_skb(rcvbuf, transport->recv.copied, desc, xdr_skb_read_bits); if (desc->count) { @@ -1314,31 +1314,31 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt, * Any remaining data from this record will * be discarded. */ - transport->tcp_flags &= ~TCP_RCV_COPY_DATA; + transport->recv.flags &= ~TCP_RCV_COPY_DATA; dprintk("RPC: XID %08x truncated request\n", - ntohl(transport->tcp_xid)); - dprintk("RPC: xprt = %p, tcp_copied = %lu, " - "tcp_offset = %u, tcp_reclen = %u\n", - xprt, transport->tcp_copied, - transport->tcp_offset, transport->tcp_reclen); + ntohl(transport->recv.xid)); + dprintk("RPC: xprt = %p, recv.copied = %lu, " + "recv.offset = %u, recv.len = %u\n", + xprt, transport->recv.copied, + transport->recv.offset, transport->recv.len); return; } - transport->tcp_copied += r; - transport->tcp_offset += r; + transport->recv.copied += r; + transport->recv.offset += r; desc->count = len - r; dprintk("RPC: XID %08x read %zd bytes\n", - ntohl(transport->tcp_xid), r); - dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, " - "tcp_reclen = %u\n", xprt, transport->tcp_copied, - transport->tcp_offset, transport->tcp_reclen); - - if (transport->tcp_copied == req->rq_private_buf.buflen) - transport->tcp_flags &= ~TCP_RCV_COPY_DATA; - else if (transport->tcp_offset == transport->tcp_reclen) { - if (transport->tcp_flags & TCP_RCV_LAST_FRAG) - transport->tcp_flags &= ~TCP_RCV_COPY_DATA; + ntohl(transport->recv.xid), r); + dprintk("RPC: xprt = %p, recv.copied = %lu, recv.offset = %u, " + "recv.len = %u\n", xprt, transport->recv.copied, + transport->recv.offset, transport->recv.len); + + if (transport->recv.copied == req->rq_private_buf.buflen) + transport->recv.flags &= ~TCP_RCV_COPY_DATA; + else if (transport->recv.offset == transport->recv.len) { + if (transport->recv.flags & TCP_RCV_LAST_FRAG) + transport->recv.flags &= ~TCP_RCV_COPY_DATA; } } @@ -1353,14 +1353,14 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, container_of(xprt, struct sock_xprt, xprt); struct rpc_rqst *req; - dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid)); + dprintk("RPC: read reply XID %08x\n", ntohl(transport->recv.xid)); /* Find and lock the request corresponding to this xid */ spin_lock(&xprt->recv_lock); - req = xprt_lookup_rqst(xprt, transport->tcp_xid); + req = xprt_lookup_rqst(xprt, transport->recv.xid); if (!req) { dprintk("RPC: XID %08x request not found!\n", - ntohl(transport->tcp_xid)); + ntohl(transport->recv.xid)); spin_unlock(&xprt->recv_lock); return -1; } @@ -1370,8 +1370,8 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, xs_tcp_read_common(xprt, desc, req); spin_lock(&xprt->recv_lock); - if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) - xprt_complete_rqst(req->rq_task, transport->tcp_copied); + if (!(transport->recv.flags & TCP_RCV_COPY_DATA)) + xprt_complete_rqst(req->rq_task, transport->recv.copied); xprt_unpin_rqst(req); spin_unlock(&xprt->recv_lock); return 0; @@ -1393,7 +1393,7 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt, struct rpc_rqst *req; /* Look up the request corresponding to the given XID */ - req = xprt_lookup_bc_request(xprt, transport->tcp_xid); + req = xprt_lookup_bc_request(xprt, transport->recv.xid); if (req == NULL) { printk(KERN_WARNING "Callback slot table overflowed\n"); xprt_force_disconnect(xprt); @@ -1403,8 +1403,8 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt, dprintk("RPC: read callback XID %08x\n", ntohl(req->rq_xid)); xs_tcp_read_common(xprt, desc, req); - if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) - xprt_complete_bc_request(req, transport->tcp_copied); + if (!(transport->recv.flags & TCP_RCV_COPY_DATA)) + xprt_complete_bc_request(req, transport->recv.copied); return 0; } @@ -1415,7 +1415,7 @@ static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - return (transport->tcp_flags & TCP_RPC_REPLY) ? + return (transport->recv.flags & TCP_RPC_REPLY) ? xs_tcp_read_reply(xprt, desc) : xs_tcp_read_callback(xprt, desc); } @@ -1458,9 +1458,9 @@ static void xs_tcp_read_data(struct rpc_xprt *xprt, else { /* * The transport_lock protects the request handling. - * There's no need to hold it to update the tcp_flags. + * There's no need to hold it to update the recv.flags. */ - transport->tcp_flags &= ~TCP_RCV_COPY_DATA; + transport->recv.flags &= ~TCP_RCV_COPY_DATA; } } @@ -1468,12 +1468,12 @@ static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_s { size_t len; - len = transport->tcp_reclen - transport->tcp_offset; + len = transport->recv.len - transport->recv.offset; if (len > desc->count) len = desc->count; desc->count -= len; desc->offset += len; - transport->tcp_offset += len; + transport->recv.offset += len; dprintk("RPC: discarded %zu bytes\n", len); xs_tcp_check_fraghdr(transport); } @@ -1494,22 +1494,22 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns trace_xs_tcp_data_recv(transport); /* Read in a new fragment marker if necessary */ /* Can we ever really expect to get completely empty fragments? */ - if (transport->tcp_flags & TCP_RCV_COPY_FRAGHDR) { + if (transport->recv.flags & TCP_RCV_COPY_FRAGHDR) { xs_tcp_read_fraghdr(xprt, &desc); continue; } /* Read in the xid if necessary */ - if (transport->tcp_flags & TCP_RCV_COPY_XID) { + if (transport->recv.flags & TCP_RCV_COPY_XID) { xs_tcp_read_xid(transport, &desc); continue; } /* Read in the call/reply flag */ - if (transport->tcp_flags & TCP_RCV_READ_CALLDIR) { + if (transport->recv.flags & TCP_RCV_READ_CALLDIR) { xs_tcp_read_calldir(transport, &desc); continue; } /* Read in the request data */ - if (transport->tcp_flags & TCP_RCV_COPY_DATA) { + if (transport->recv.flags & TCP_RCV_COPY_DATA) { xs_tcp_read_data(xprt, &desc); continue; } @@ -1602,10 +1602,10 @@ static void xs_tcp_state_change(struct sock *sk) if (!xprt_test_and_set_connected(xprt)) { /* Reset TCP record info */ - transport->tcp_offset = 0; - transport->tcp_reclen = 0; - transport->tcp_copied = 0; - transport->tcp_flags = + transport->recv.offset = 0; + transport->recv.len = 0; + transport->recv.copied = 0; + transport->recv.flags = TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; xprt->connect_cookie++; clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); -- cgit v1.2.3 From 6c7a64e5a44dbc6d073b83a56a48d0a4099f1dd2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 13 Aug 2018 16:54:57 -0400 Subject: SUNRPC: Add socket transmit queue offset tracking Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 7 +++++++ net/sunrpc/xprtsock.c | 40 ++++++++++++++++++++++------------------ 2 files changed, 29 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 90d5ca8e65f4..005cfb6e7238 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -42,6 +42,13 @@ struct sock_xprt { flags; } recv; + /* + * State of TCP transmit queue + */ + struct { + u32 offset; + } xmit; + /* * Connection of transports */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ec1e3f93e707..629cc45e1e6c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -461,7 +461,7 @@ static int xs_nospace(struct rpc_task *task) int ret = -EAGAIN; dprintk("RPC: %5u xmit incomplete (%u left of %u)\n", - task->tk_pid, req->rq_slen - req->rq_bytes_sent, + task->tk_pid, req->rq_slen - transport->xmit.offset, req->rq_slen); /* Protect against races with write_space */ @@ -528,19 +528,22 @@ static int xs_local_send_request(struct rpc_task *task) req->rq_svec->iov_base, req->rq_svec->iov_len); req->rq_xtime = ktime_get(); - status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent, + status = xs_sendpages(transport->sock, NULL, 0, xdr, + transport->xmit.offset, true, &sent); dprintk("RPC: %s(%u) = %d\n", - __func__, xdr->len - req->rq_bytes_sent, status); + __func__, xdr->len - transport->xmit.offset, status); if (status == -EAGAIN && sock_writeable(transport->inet)) status = -ENOBUFS; if (likely(sent > 0) || status == 0) { - req->rq_bytes_sent += sent; - req->rq_xmit_bytes_sent += sent; + transport->xmit.offset += sent; + req->rq_bytes_sent = transport->xmit.offset; if (likely(req->rq_bytes_sent >= req->rq_slen)) { + req->rq_xmit_bytes_sent += transport->xmit.offset; req->rq_bytes_sent = 0; + transport->xmit.offset = 0; return 0; } status = -EAGAIN; @@ -592,10 +595,10 @@ static int xs_udp_send_request(struct rpc_task *task) return -ENOTCONN; req->rq_xtime = ktime_get(); status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen, - xdr, req->rq_bytes_sent, true, &sent); + xdr, 0, true, &sent); dprintk("RPC: xs_udp_send_request(%u) = %d\n", - xdr->len - req->rq_bytes_sent, status); + xdr->len, status); /* firewall is blocking us, don't return -EAGAIN or we end up looping */ if (status == -EPERM) @@ -684,17 +687,20 @@ static int xs_tcp_send_request(struct rpc_task *task) while (1) { sent = 0; status = xs_sendpages(transport->sock, NULL, 0, xdr, - req->rq_bytes_sent, zerocopy, &sent); + transport->xmit.offset, + zerocopy, &sent); dprintk("RPC: xs_tcp_send_request(%u) = %d\n", - xdr->len - req->rq_bytes_sent, status); + xdr->len - transport->xmit.offset, status); /* If we've sent the entire packet, immediately * reset the count of bytes sent. */ - req->rq_bytes_sent += sent; - req->rq_xmit_bytes_sent += sent; + transport->xmit.offset += sent; + req->rq_bytes_sent = transport->xmit.offset; if (likely(req->rq_bytes_sent >= req->rq_slen)) { + req->rq_xmit_bytes_sent += transport->xmit.offset; req->rq_bytes_sent = 0; + transport->xmit.offset = 0; return 0; } @@ -760,18 +766,13 @@ static int xs_tcp_send_request(struct rpc_task *task) */ static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) { - struct rpc_rqst *req; + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); if (task != xprt->snd_task) return; if (task == NULL) goto out_release; - req = task->tk_rqstp; - if (req == NULL) - goto out_release; - if (req->rq_bytes_sent == 0) - goto out_release; - if (req->rq_bytes_sent == req->rq_snd_buf.len) + if (transport->xmit.offset == 0 || !xprt_connected(xprt)) goto out_release; set_bit(XPRT_CLOSE_WAIT, &xprt->state); out_release: @@ -2021,6 +2022,8 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, write_unlock_bh(&sk->sk_callback_lock); } + transport->xmit.offset = 0; + /* Tell the socket layer to start connecting... */ xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; @@ -2384,6 +2387,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) transport->recv.len = 0; transport->recv.copied = 0; transport->recv.flags = TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; + transport->xmit.offset = 0; /* Tell the socket layer to start connecting... */ xprt->stat.connect_count++; -- cgit v1.2.3 From cf9946cd6144410ced00d52586ff5a2cb4868fc5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Aug 2018 12:55:34 -0400 Subject: SUNRPC: Refactor the transport request pinning We are going to need to pin for both send and receive. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 3 +-- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/xprt.c | 43 +++++++++++++++++++++++-------------------- 3 files changed, 25 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 9e655df70131..8062ce6b18e5 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -142,8 +142,7 @@ struct rpc_task_setup { #define RPC_TASK_ACTIVE 2 #define RPC_TASK_NEED_XMIT 3 #define RPC_TASK_NEED_RECV 4 -#define RPC_TASK_MSG_RECV 5 -#define RPC_TASK_MSG_RECV_WAIT 6 +#define RPC_TASK_MSG_PIN_WAIT 5 #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) #define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 3d80524e92d6..bd743c51a865 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -103,6 +103,7 @@ struct rpc_rqst { /* A cookie used to track the state of the transport connection */ + atomic_t rq_pin; /* * Partial send handling diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 45d580cd93ac..649a40cfae6d 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -847,16 +847,22 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) } EXPORT_SYMBOL_GPL(xprt_lookup_rqst); +static bool +xprt_is_pinned_rqst(struct rpc_rqst *req) +{ + return atomic_read(&req->rq_pin) != 0; +} + /** * xprt_pin_rqst - Pin a request on the transport receive list * @req: Request to pin * * Caller must ensure this is atomic with the call to xprt_lookup_rqst() - * so should be holding the xprt transport lock. + * so should be holding the xprt receive lock. */ void xprt_pin_rqst(struct rpc_rqst *req) { - set_bit(RPC_TASK_MSG_RECV, &req->rq_task->tk_runstate); + atomic_inc(&req->rq_pin); } EXPORT_SYMBOL_GPL(xprt_pin_rqst); @@ -864,31 +870,22 @@ EXPORT_SYMBOL_GPL(xprt_pin_rqst); * xprt_unpin_rqst - Unpin a request on the transport receive list * @req: Request to pin * - * Caller should be holding the xprt transport lock. + * Caller should be holding the xprt receive lock. */ void xprt_unpin_rqst(struct rpc_rqst *req) { - struct rpc_task *task = req->rq_task; - - clear_bit(RPC_TASK_MSG_RECV, &task->tk_runstate); - if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate)) - wake_up_bit(&task->tk_runstate, RPC_TASK_MSG_RECV); + if (!test_bit(RPC_TASK_MSG_PIN_WAIT, &req->rq_task->tk_runstate)) { + atomic_dec(&req->rq_pin); + return; + } + if (atomic_dec_and_test(&req->rq_pin)) + wake_up_var(&req->rq_pin); } EXPORT_SYMBOL_GPL(xprt_unpin_rqst); static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req) -__must_hold(&req->rq_xprt->recv_lock) { - struct rpc_task *task = req->rq_task; - - if (task && test_bit(RPC_TASK_MSG_RECV, &task->tk_runstate)) { - spin_unlock(&req->rq_xprt->recv_lock); - set_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate); - wait_on_bit(&task->tk_runstate, RPC_TASK_MSG_RECV, - TASK_UNINTERRUPTIBLE); - clear_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate); - spin_lock(&req->rq_xprt->recv_lock); - } + wait_var_event(&req->rq_pin, !xprt_is_pinned_rqst(req)); } /** @@ -1388,7 +1385,13 @@ void xprt_release(struct rpc_task *task) spin_lock(&xprt->recv_lock); if (!list_empty(&req->rq_list)) { list_del_init(&req->rq_list); - xprt_wait_on_pinned_rqst(req); + if (xprt_is_pinned_rqst(req)) { + set_bit(RPC_TASK_MSG_PIN_WAIT, &req->rq_task->tk_runstate); + spin_unlock(&xprt->recv_lock); + xprt_wait_on_pinned_rqst(req); + spin_lock(&xprt->recv_lock); + clear_bit(RPC_TASK_MSG_PIN_WAIT, &req->rq_task->tk_runstate); + } } spin_unlock(&xprt->recv_lock); spin_lock_bh(&xprt->transport_lock); -- cgit v1.2.3 From 359c48c04af25397ecefec1ccf200ddd199617ce Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 29 Aug 2018 09:22:28 -0400 Subject: SUNRPC: Add a helper to wake up a sleeping rpc_task and set its status Add a helper that will wake up a task that is sleeping on a specific queue, and will set the value of task->tk_status. This is mainly intended for use by the transport layer to notify the task of an error condition. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 3 ++ net/sunrpc/sched.c | 65 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 58 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 8062ce6b18e5..8840a420cf4c 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -235,6 +235,9 @@ void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq, struct rpc_task *task); void rpc_wake_up_queued_task(struct rpc_wait_queue *, struct rpc_task *); +void rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *, + struct rpc_task *, + int); void rpc_wake_up(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq, diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 3fe5d60ab0e2..dec01bd1b71c 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -440,14 +440,28 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq, /* * Wake up a queued task while the queue lock is being held */ -static void rpc_wake_up_task_on_wq_queue_locked(struct workqueue_struct *wq, - struct rpc_wait_queue *queue, struct rpc_task *task) +static struct rpc_task * +rpc_wake_up_task_on_wq_queue_action_locked(struct workqueue_struct *wq, + struct rpc_wait_queue *queue, struct rpc_task *task, + bool (*action)(struct rpc_task *, void *), void *data) { if (RPC_IS_QUEUED(task)) { smp_rmb(); - if (task->tk_waitqueue == queue) - __rpc_do_wake_up_task_on_wq(wq, queue, task); + if (task->tk_waitqueue == queue) { + if (action == NULL || action(task, data)) { + __rpc_do_wake_up_task_on_wq(wq, queue, task); + return task; + } + } } + return NULL; +} + +static void +rpc_wake_up_task_on_wq_queue_locked(struct workqueue_struct *wq, + struct rpc_wait_queue *queue, struct rpc_task *task) +{ + rpc_wake_up_task_on_wq_queue_action_locked(wq, queue, task, NULL, NULL); } /* @@ -481,6 +495,40 @@ void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task } EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task); +static bool rpc_task_action_set_status(struct rpc_task *task, void *status) +{ + task->tk_status = *(int *)status; + return true; +} + +static void +rpc_wake_up_task_queue_set_status_locked(struct rpc_wait_queue *queue, + struct rpc_task *task, int status) +{ + rpc_wake_up_task_on_wq_queue_action_locked(rpciod_workqueue, queue, + task, rpc_task_action_set_status, &status); +} + +/** + * rpc_wake_up_queued_task_set_status - wake up a task and set task->tk_status + * @queue: pointer to rpc_wait_queue + * @task: pointer to rpc_task + * @status: integer error value + * + * If @task is queued on @queue, then it is woken up, and @task->tk_status is + * set to the value of @status. + */ +void +rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *queue, + struct rpc_task *task, int status) +{ + if (!RPC_IS_QUEUED(task)) + return; + spin_lock_bh(&queue->lock); + rpc_wake_up_task_queue_set_status_locked(queue, task, status); + spin_unlock_bh(&queue->lock); +} + /* * Wake up the next task on a priority queue. */ @@ -553,12 +601,9 @@ struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq, queue, rpc_qname(queue)); spin_lock_bh(&queue->lock); task = __rpc_find_next_queued(queue); - if (task != NULL) { - if (func(task, data)) - rpc_wake_up_task_on_wq_queue_locked(wq, queue, task); - else - task = NULL; - } + if (task != NULL) + task = rpc_wake_up_task_on_wq_queue_action_locked(wq, queue, + task, func, data); spin_unlock_bh(&queue->lock); return task; -- cgit v1.2.3 From 75c84151a9dc7a755c607e6761d8f14a1690dbf0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 31 Aug 2018 10:21:00 -0400 Subject: SUNRPC: Rename xprt->recv_lock to xprt->queue_lock We will use the same lock to protect both the transmit and receive queues. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 +- net/sunrpc/svcsock.c | 6 +++--- net/sunrpc/xprt.c | 24 ++++++++++++------------ net/sunrpc/xprtrdma/rpc_rdma.c | 10 +++++----- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 4 ++-- net/sunrpc/xprtsock.c | 30 +++++++++++++++--------------- 6 files changed, 38 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index bd743c51a865..c25d0a5fda69 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -235,7 +235,7 @@ struct rpc_xprt { */ spinlock_t transport_lock; /* lock transport info */ spinlock_t reserve_lock; /* lock slot table */ - spinlock_t recv_lock; /* lock receive list */ + spinlock_t queue_lock; /* send/receive queue lock */ u32 xid; /* Next XID value to use */ struct rpc_task * snd_task; /* Task blocked in send */ struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5445145e639c..db8bb6b3a2b0 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1004,7 +1004,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) if (!bc_xprt) return -EAGAIN; - spin_lock(&bc_xprt->recv_lock); + spin_lock(&bc_xprt->queue_lock); req = xprt_lookup_rqst(bc_xprt, xid); if (!req) goto unlock_notfound; @@ -1022,7 +1022,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) memcpy(dst->iov_base, src->iov_base, src->iov_len); xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len); rqstp->rq_arg.len = 0; - spin_unlock(&bc_xprt->recv_lock); + spin_unlock(&bc_xprt->queue_lock); return 0; unlock_notfound: printk(KERN_NOTICE @@ -1031,7 +1031,7 @@ unlock_notfound: __func__, ntohl(calldir), bc_xprt, ntohl(xid)); unlock_eagain: - spin_unlock(&bc_xprt->recv_lock); + spin_unlock(&bc_xprt->queue_lock); return -EAGAIN; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 3a3b3445a7c0..6e3d4b4ee79e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -826,7 +826,7 @@ static void xprt_connect_status(struct rpc_task *task) * @xprt: transport on which the original request was transmitted * @xid: RPC XID of incoming reply * - * Caller holds xprt->recv_lock. + * Caller holds xprt->queue_lock. */ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) { @@ -892,7 +892,7 @@ static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req) * xprt_update_rtt - Update RPC RTT statistics * @task: RPC request that recently completed * - * Caller holds xprt->recv_lock. + * Caller holds xprt->queue_lock. */ void xprt_update_rtt(struct rpc_task *task) { @@ -914,7 +914,7 @@ EXPORT_SYMBOL_GPL(xprt_update_rtt); * @task: RPC request that recently completed * @copied: actual number of bytes received from the transport * - * Caller holds xprt->recv_lock. + * Caller holds xprt->queue_lock. */ void xprt_complete_rqst(struct rpc_task *task, int copied) { @@ -1034,10 +1034,10 @@ void xprt_transmit(struct rpc_task *task) memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(req->rq_private_buf)); /* Add request to the receive list */ - spin_lock(&xprt->recv_lock); + spin_lock(&xprt->queue_lock); list_add_tail(&req->rq_list, &xprt->recv); set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate); - spin_unlock(&xprt->recv_lock); + spin_unlock(&xprt->queue_lock); xprt_reset_majortimeo(req); /* Turn off autodisconnect */ del_singleshot_timer_sync(&xprt->timer); @@ -1076,7 +1076,7 @@ void xprt_transmit(struct rpc_task *task) * The spinlock ensures atomicity between the test of * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on(). */ - spin_lock(&xprt->recv_lock); + spin_lock(&xprt->queue_lock); if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) { rpc_sleep_on(&xprt->pending, task, xprt_timer); /* Wake up immediately if the connection was dropped */ @@ -1084,7 +1084,7 @@ void xprt_transmit(struct rpc_task *task) rpc_wake_up_queued_task_set_status(&xprt->pending, task, -ENOTCONN); } - spin_unlock(&xprt->recv_lock); + spin_unlock(&xprt->queue_lock); } } @@ -1379,18 +1379,18 @@ void xprt_release(struct rpc_task *task) task->tk_ops->rpc_count_stats(task, task->tk_calldata); else if (task->tk_client) rpc_count_iostats(task, task->tk_client->cl_metrics); - spin_lock(&xprt->recv_lock); + spin_lock(&xprt->queue_lock); if (!list_empty(&req->rq_list)) { list_del_init(&req->rq_list); if (xprt_is_pinned_rqst(req)) { set_bit(RPC_TASK_MSG_PIN_WAIT, &req->rq_task->tk_runstate); - spin_unlock(&xprt->recv_lock); + spin_unlock(&xprt->queue_lock); xprt_wait_on_pinned_rqst(req); - spin_lock(&xprt->recv_lock); + spin_lock(&xprt->queue_lock); clear_bit(RPC_TASK_MSG_PIN_WAIT, &req->rq_task->tk_runstate); } } - spin_unlock(&xprt->recv_lock); + spin_unlock(&xprt->queue_lock); spin_lock_bh(&xprt->transport_lock); xprt->ops->release_xprt(xprt, task); if (xprt->ops->release_request) @@ -1420,7 +1420,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net) spin_lock_init(&xprt->transport_lock); spin_lock_init(&xprt->reserve_lock); - spin_lock_init(&xprt->recv_lock); + spin_lock_init(&xprt->queue_lock); INIT_LIST_HEAD(&xprt->free); INIT_LIST_HEAD(&xprt->recv); diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index c8ae983c6cc0..0020dc401215 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1238,7 +1238,7 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep) goto out_badheader; out: - spin_lock(&xprt->recv_lock); + spin_lock(&xprt->queue_lock); cwnd = xprt->cwnd; xprt->cwnd = r_xprt->rx_buf.rb_credits << RPC_CWNDSHIFT; if (xprt->cwnd > cwnd) @@ -1246,7 +1246,7 @@ out: xprt_complete_rqst(rqst->rq_task, status); xprt_unpin_rqst(rqst); - spin_unlock(&xprt->recv_lock); + spin_unlock(&xprt->queue_lock); return; /* If the incoming reply terminated a pending RPC, the next @@ -1345,7 +1345,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) /* Match incoming rpcrdma_rep to an rpcrdma_req to * get context for handling any incoming chunks. */ - spin_lock(&xprt->recv_lock); + spin_lock(&xprt->queue_lock); rqst = xprt_lookup_rqst(xprt, rep->rr_xid); if (!rqst) goto out_norqst; @@ -1357,7 +1357,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) credits = buf->rb_max_requests; buf->rb_credits = credits; - spin_unlock(&xprt->recv_lock); + spin_unlock(&xprt->queue_lock); req = rpcr_to_rdmar(rqst); req->rl_reply = rep; @@ -1378,7 +1378,7 @@ out_badversion: * is corrupt. */ out_norqst: - spin_unlock(&xprt->recv_lock); + spin_unlock(&xprt->queue_lock); trace_xprtrdma_reply_rqst(rep); goto repost; diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index a68180090554..09b12b7568fe 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -56,7 +56,7 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, if (src->iov_len < 24) goto out_shortreply; - spin_lock(&xprt->recv_lock); + spin_lock(&xprt->queue_lock); req = xprt_lookup_rqst(xprt, xid); if (!req) goto out_notfound; @@ -86,7 +86,7 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, rcvbuf->len = 0; out_unlock: - spin_unlock(&xprt->recv_lock); + spin_unlock(&xprt->queue_lock); out: return ret; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3fbccebd0b10..8d6404259ff9 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -966,12 +966,12 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt, return; /* Look up and lock the request corresponding to the given XID */ - spin_lock(&xprt->recv_lock); + spin_lock(&xprt->queue_lock); rovr = xprt_lookup_rqst(xprt, *xp); if (!rovr) goto out_unlock; xprt_pin_rqst(rovr); - spin_unlock(&xprt->recv_lock); + spin_unlock(&xprt->queue_lock); task = rovr->rq_task; copied = rovr->rq_private_buf.buflen; @@ -980,16 +980,16 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt, if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) { dprintk("RPC: sk_buff copy failed\n"); - spin_lock(&xprt->recv_lock); + spin_lock(&xprt->queue_lock); goto out_unpin; } - spin_lock(&xprt->recv_lock); + spin_lock(&xprt->queue_lock); xprt_complete_rqst(task, copied); out_unpin: xprt_unpin_rqst(rovr); out_unlock: - spin_unlock(&xprt->recv_lock); + spin_unlock(&xprt->queue_lock); } static void xs_local_data_receive(struct sock_xprt *transport) @@ -1058,13 +1058,13 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt, return; /* Look up and lock the request corresponding to the given XID */ - spin_lock(&xprt->recv_lock); + spin_lock(&xprt->queue_lock); rovr = xprt_lookup_rqst(xprt, *xp); if (!rovr) goto out_unlock; xprt_pin_rqst(rovr); xprt_update_rtt(rovr->rq_task); - spin_unlock(&xprt->recv_lock); + spin_unlock(&xprt->queue_lock); task = rovr->rq_task; if ((copied = rovr->rq_private_buf.buflen) > repsize) @@ -1072,7 +1072,7 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt, /* Suck it into the iovec, verify checksum if not done by hw. */ if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) { - spin_lock(&xprt->recv_lock); + spin_lock(&xprt->queue_lock); __UDPX_INC_STATS(sk, UDP_MIB_INERRORS); goto out_unpin; } @@ -1081,13 +1081,13 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt, spin_lock_bh(&xprt->transport_lock); xprt_adjust_cwnd(xprt, task, copied); spin_unlock_bh(&xprt->transport_lock); - spin_lock(&xprt->recv_lock); + spin_lock(&xprt->queue_lock); xprt_complete_rqst(task, copied); __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS); out_unpin: xprt_unpin_rqst(rovr); out_unlock: - spin_unlock(&xprt->recv_lock); + spin_unlock(&xprt->queue_lock); } static void xs_udp_data_receive(struct sock_xprt *transport) @@ -1356,24 +1356,24 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, dprintk("RPC: read reply XID %08x\n", ntohl(transport->recv.xid)); /* Find and lock the request corresponding to this xid */ - spin_lock(&xprt->recv_lock); + spin_lock(&xprt->queue_lock); req = xprt_lookup_rqst(xprt, transport->recv.xid); if (!req) { dprintk("RPC: XID %08x request not found!\n", ntohl(transport->recv.xid)); - spin_unlock(&xprt->recv_lock); + spin_unlock(&xprt->queue_lock); return -1; } xprt_pin_rqst(req); - spin_unlock(&xprt->recv_lock); + spin_unlock(&xprt->queue_lock); xs_tcp_read_common(xprt, desc, req); - spin_lock(&xprt->recv_lock); + spin_lock(&xprt->queue_lock); if (!(transport->recv.flags & TCP_RCV_COPY_DATA)) xprt_complete_rqst(req->rq_task, transport->recv.copied); xprt_unpin_rqst(req); - spin_unlock(&xprt->recv_lock); + spin_unlock(&xprt->queue_lock); return 0; } -- cgit v1.2.3 From edc81dcd5b7f699c4049042b35c904396642032e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Aug 2018 17:55:46 -0400 Subject: SUNRPC: Refactor xprt_transmit() to remove the reply queue code Separate out the action of adding a request to the reply queue so that the backchannel code can simply skip calling it altogether. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/backchannel_rqst.c | 1 - net/sunrpc/clnt.c | 5 ++ net/sunrpc/xprt.c | 127 +++++++++++++++++++++++++------------- net/sunrpc/xprtrdma/backchannel.c | 1 - 5 files changed, 89 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index c25d0a5fda69..0250294c904a 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -334,6 +334,7 @@ void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req); void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); bool xprt_prepare_transmit(struct rpc_task *task); +void xprt_request_enqueue_receive(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); void xprt_end_transmit(struct rpc_task *task); int xprt_adjust_timeout(struct rpc_rqst *req); diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 3c15a99b9700..fa5ba6ed3197 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -91,7 +91,6 @@ struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags) return NULL; req->rq_xprt = xprt; - INIT_LIST_HEAD(&req->rq_list); INIT_LIST_HEAD(&req->rq_bc_list); /* Preallocate one XDR receive buffer */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a858366cd15d..414966273a3f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1962,6 +1962,11 @@ call_transmit(struct rpc_task *task) return; } } + + /* Add task to reply queue before transmission to avoid races */ + if (rpc_reply_expected(task)) + xprt_request_enqueue_receive(task); + if (!xprt_prepare_transmit(task)) return; task->tk_action = call_transmit_status; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 6e3d4b4ee79e..2ae0a4c47d59 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -888,6 +888,62 @@ static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req) wait_var_event(&req->rq_pin, !xprt_is_pinned_rqst(req)); } +static bool +xprt_request_data_received(struct rpc_task *task) +{ + return !test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) && + READ_ONCE(task->tk_rqstp->rq_reply_bytes_recvd) != 0; +} + +static bool +xprt_request_need_enqueue_receive(struct rpc_task *task, struct rpc_rqst *req) +{ + return !test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) && + READ_ONCE(task->tk_rqstp->rq_reply_bytes_recvd) == 0; +} + +/** + * xprt_request_enqueue_receive - Add an request to the receive queue + * @task: RPC task + * + */ +void +xprt_request_enqueue_receive(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; + + if (!xprt_request_need_enqueue_receive(task, req)) + return; + spin_lock(&xprt->queue_lock); + + /* Update the softirq receive buffer */ + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, + sizeof(req->rq_private_buf)); + + /* Add request to the receive list */ + list_add_tail(&req->rq_list, &xprt->recv); + set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate); + spin_unlock(&xprt->queue_lock); + + xprt_reset_majortimeo(req); + /* Turn off autodisconnect */ + del_singleshot_timer_sync(&xprt->timer); +} + +/** + * xprt_request_dequeue_receive_locked - Remove a request from the receive queue + * @task: RPC task + * + * Caller must hold xprt->queue_lock. + */ +static void +xprt_request_dequeue_receive_locked(struct rpc_task *task) +{ + if (test_and_clear_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) + list_del(&task->tk_rqstp->rq_list); +} + /** * xprt_update_rtt - Update RPC RTT statistics * @task: RPC request that recently completed @@ -927,24 +983,16 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) xprt->stat.recvs++; - list_del_init(&req->rq_list); req->rq_private_buf.len = copied; /* Ensure all writes are done before we update */ /* req->rq_reply_bytes_recvd */ smp_wmb(); req->rq_reply_bytes_recvd = copied; - clear_bit(RPC_TASK_NEED_RECV, &task->tk_runstate); + xprt_request_dequeue_receive_locked(task); rpc_wake_up_queued_task(&xprt->pending, task); } EXPORT_SYMBOL_GPL(xprt_complete_rqst); -static bool -xprt_request_data_received(struct rpc_task *task) -{ - return !test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) && - task->tk_rqstp->rq_reply_bytes_recvd != 0; -} - static void xprt_timer(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; @@ -1018,32 +1066,15 @@ void xprt_transmit(struct rpc_task *task) dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); - if (!req->rq_reply_bytes_recvd) { - + if (!req->rq_bytes_sent) { + if (xprt_request_data_received(task)) + return; /* Verify that our message lies in the RPCSEC_GSS window */ - if (!req->rq_bytes_sent && rpcauth_xmit_need_reencode(task)) { + if (rpcauth_xmit_need_reencode(task)) { task->tk_status = -EBADMSG; return; } - - if (list_empty(&req->rq_list) && rpc_reply_expected(task)) { - /* - * Add to the list only if we're expecting a reply - */ - /* Update the softirq receive buffer */ - memcpy(&req->rq_private_buf, &req->rq_rcv_buf, - sizeof(req->rq_private_buf)); - /* Add request to the receive list */ - spin_lock(&xprt->queue_lock); - list_add_tail(&req->rq_list, &xprt->recv); - set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate); - spin_unlock(&xprt->queue_lock); - xprt_reset_majortimeo(req); - /* Turn off autodisconnect */ - del_singleshot_timer_sync(&xprt->timer); - } - } else if (xprt_request_data_received(task) && !req->rq_bytes_sent) - return; + } connect_cookie = xprt->connect_cookie; status = xprt->ops->send_request(task); @@ -1285,7 +1316,6 @@ xprt_request_init(struct rpc_task *task) struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req = task->tk_rqstp; - INIT_LIST_HEAD(&req->rq_list); req->rq_timeout = task->tk_client->cl_timeout->to_initval; req->rq_task = task; req->rq_xprt = xprt; @@ -1355,6 +1385,26 @@ void xprt_retry_reserve(struct rpc_task *task) xprt_do_reserve(xprt, task); } +static void +xprt_request_dequeue_all(struct rpc_task *task, struct rpc_rqst *req) +{ + struct rpc_xprt *xprt = req->rq_xprt; + + if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) || + xprt_is_pinned_rqst(req)) { + spin_lock(&xprt->queue_lock); + xprt_request_dequeue_receive_locked(task); + while (xprt_is_pinned_rqst(req)) { + set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate); + spin_unlock(&xprt->queue_lock); + xprt_wait_on_pinned_rqst(req); + spin_lock(&xprt->queue_lock); + clear_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate); + } + spin_unlock(&xprt->queue_lock); + } +} + /** * xprt_release - release an RPC request slot * @task: task which is finished with the slot @@ -1379,18 +1429,7 @@ void xprt_release(struct rpc_task *task) task->tk_ops->rpc_count_stats(task, task->tk_calldata); else if (task->tk_client) rpc_count_iostats(task, task->tk_client->cl_metrics); - spin_lock(&xprt->queue_lock); - if (!list_empty(&req->rq_list)) { - list_del_init(&req->rq_list); - if (xprt_is_pinned_rqst(req)) { - set_bit(RPC_TASK_MSG_PIN_WAIT, &req->rq_task->tk_runstate); - spin_unlock(&xprt->queue_lock); - xprt_wait_on_pinned_rqst(req); - spin_lock(&xprt->queue_lock); - clear_bit(RPC_TASK_MSG_PIN_WAIT, &req->rq_task->tk_runstate); - } - } - spin_unlock(&xprt->queue_lock); + xprt_request_dequeue_all(task, req); spin_lock_bh(&xprt->transport_lock); xprt->ops->release_xprt(xprt, task); if (xprt->ops->release_request) diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 90adeff4c06b..ed58761e6b23 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -51,7 +51,6 @@ static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt, rqst = &req->rl_slot; rqst->rq_xprt = xprt; - INIT_LIST_HEAD(&rqst->rq_list); INIT_LIST_HEAD(&rqst->rq_bc_list); __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); spin_lock_bh(&xprt->bc_pa_lock); -- cgit v1.2.3 From 7f3a1d1e1806a0eb9b200e3aed2a04431f2bcc6a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 23 Aug 2018 00:03:43 -0400 Subject: SUNRPC: Refactor xprt_transmit() to remove wait for reply code Allow the caller in clnt.c to call into the code to wait for a reply after calling xprt_transmit(). Again, the reason is that the backchannel code does not need this functionality. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/clnt.c | 10 +----- net/sunrpc/xprt.c | 74 +++++++++++++++++++++++++++++++-------------- 3 files changed, 54 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 0250294c904a..4fa2af087cff 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -335,6 +335,7 @@ void xprt_free_slot(struct rpc_xprt *xprt, void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); bool xprt_prepare_transmit(struct rpc_task *task); void xprt_request_enqueue_receive(struct rpc_task *task); +void xprt_request_wait_receive(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); void xprt_end_transmit(struct rpc_task *task); int xprt_adjust_timeout(struct rpc_rqst *req); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 414966273a3f..775d6e80b6e8 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1975,15 +1975,6 @@ call_transmit(struct rpc_task *task) return; if (is_retrans) task->tk_client->cl_stats->rpcretrans++; - /* - * On success, ensure that we call xprt_end_transmit() before sleeping - * in order to allow access to the socket to other RPC requests. - */ - call_transmit_status(task); - if (rpc_reply_expected(task)) - return; - task->tk_action = rpc_exit_task; - rpc_wake_up_queued_task(&task->tk_rqstp->rq_xprt->pending, task); } /* @@ -2000,6 +1991,7 @@ call_transmit_status(struct rpc_task *task) */ if (task->tk_status == 0) { xprt_end_transmit(task); + xprt_request_wait_receive(task); return; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 2ae0a4c47d59..a6a33c178870 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -654,6 +654,22 @@ void xprt_force_disconnect(struct rpc_xprt *xprt) } EXPORT_SYMBOL_GPL(xprt_force_disconnect); +static unsigned int +xprt_connect_cookie(struct rpc_xprt *xprt) +{ + return READ_ONCE(xprt->connect_cookie); +} + +static bool +xprt_request_retransmit_after_disconnect(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; + + return req->rq_connect_cookie != xprt_connect_cookie(xprt) || + !xprt_connected(xprt); +} + /** * xprt_conditional_disconnect - force a transport to disconnect * @xprt: transport to disconnect @@ -1009,6 +1025,39 @@ static void xprt_timer(struct rpc_task *task) task->tk_status = 0; } +/** + * xprt_request_wait_receive - wait for the reply to an RPC request + * @task: RPC task about to send a request + * + */ +void xprt_request_wait_receive(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; + + if (!test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) + return; + /* + * Sleep on the pending queue if we're expecting a reply. + * The spinlock ensures atomicity between the test of + * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on(). + */ + spin_lock(&xprt->queue_lock); + if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) { + xprt->ops->set_retrans_timeout(task); + rpc_sleep_on(&xprt->pending, task, xprt_timer); + /* + * Send an extra queue wakeup call if the + * connection was dropped in case the call to + * rpc_sleep_on() raced. + */ + if (xprt_request_retransmit_after_disconnect(task)) + rpc_wake_up_queued_task_set_status(&xprt->pending, + task, -ENOTCONN); + } + spin_unlock(&xprt->queue_lock); +} + /** * xprt_prepare_transmit - reserve the transport before sending a request * @task: RPC task about to send a request @@ -1028,9 +1077,8 @@ bool xprt_prepare_transmit(struct rpc_task *task) task->tk_status = req->rq_reply_bytes_recvd; goto out_unlock; } - if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) - && xprt_connected(xprt) - && req->rq_connect_cookie == xprt->connect_cookie) { + if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) && + !xprt_request_retransmit_after_disconnect(task)) { xprt->ops->set_retrans_timeout(task); rpc_sleep_on(&xprt->pending, task, xprt_timer); goto out_unlock; @@ -1091,8 +1139,6 @@ void xprt_transmit(struct rpc_task *task) task->tk_flags |= RPC_TASK_SENT; spin_lock_bh(&xprt->transport_lock); - xprt->ops->set_retrans_timeout(task); - xprt->stat.sends++; xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs; xprt->stat.bklog_u += xprt->backlog.qlen; @@ -1101,22 +1147,6 @@ void xprt_transmit(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); req->rq_connect_cookie = connect_cookie; - if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) { - /* - * Sleep on the pending queue if we're expecting a reply. - * The spinlock ensures atomicity between the test of - * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on(). - */ - spin_lock(&xprt->queue_lock); - if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) { - rpc_sleep_on(&xprt->pending, task, xprt_timer); - /* Wake up immediately if the connection was dropped */ - if (!xprt_connected(xprt)) - rpc_wake_up_queued_task_set_status(&xprt->pending, - task, -ENOTCONN); - } - spin_unlock(&xprt->queue_lock); - } } static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) @@ -1321,7 +1351,7 @@ xprt_request_init(struct rpc_task *task) req->rq_xprt = xprt; req->rq_buffer = NULL; req->rq_xid = xprt_alloc_xid(xprt); - req->rq_connect_cookie = xprt->connect_cookie - 1; + req->rq_connect_cookie = xprt_connect_cookie(xprt) - 1; req->rq_bytes_sent = 0; req->rq_snd_buf.len = 0; req->rq_snd_buf.buflen = 0; -- cgit v1.2.3 From ef3f54347f690d06649c0d7a1f63d3410b3d08d3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 8 Aug 2018 09:23:32 -0400 Subject: SUNRPC: Distinguish between the slot allocation list and receive queue When storing a struct rpc_rqst on the slot allocation list, we currently use the same field 'rq_list' as we use to store the request on the receive queue. Since the structure is never on both lists at the same time, this is OK. However, for clarity, let's make that a union with different names for the different lists so that we can more easily distinguish between the two states. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 9 +++++++-- net/sunrpc/xprt.c | 12 ++++++------ 2 files changed, 13 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 4fa2af087cff..9cec2d0811f2 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -82,7 +82,11 @@ struct rpc_rqst { struct page **rq_enc_pages; /* scratch pages for use by gss privacy code */ void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ - struct list_head rq_list; + + union { + struct list_head rq_list; /* Slot allocation list */ + struct list_head rq_recv; /* Receive queue */ + }; void *rq_buffer; /* Call XDR encode buffer */ size_t rq_callsize; @@ -249,7 +253,8 @@ struct rpc_xprt { struct list_head bc_pa_list; /* List of preallocated * backchannel rpc_rqst's */ #endif /* CONFIG_SUNRPC_BACKCHANNEL */ - struct list_head recv; + + struct list_head recv_queue; /* Receive queue */ struct { unsigned long bind_count, /* total number of binds */ diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a6a33c178870..d527dc08540e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -708,7 +708,7 @@ static void xprt_schedule_autodisconnect(struct rpc_xprt *xprt) __must_hold(&xprt->transport_lock) { - if (list_empty(&xprt->recv) && xprt_has_timer(xprt)) + if (list_empty(&xprt->recv_queue) && xprt_has_timer(xprt)) mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout); } @@ -718,7 +718,7 @@ xprt_init_autodisconnect(struct timer_list *t) struct rpc_xprt *xprt = from_timer(xprt, t, timer); spin_lock(&xprt->transport_lock); - if (!list_empty(&xprt->recv)) + if (!list_empty(&xprt->recv_queue)) goto out_abort; /* Reset xprt->last_used to avoid connect/autodisconnect cycling */ xprt->last_used = jiffies; @@ -848,7 +848,7 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) { struct rpc_rqst *entry; - list_for_each_entry(entry, &xprt->recv, rq_list) + list_for_each_entry(entry, &xprt->recv_queue, rq_recv) if (entry->rq_xid == xid) { trace_xprt_lookup_rqst(xprt, xid, 0); entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime); @@ -938,7 +938,7 @@ xprt_request_enqueue_receive(struct rpc_task *task) sizeof(req->rq_private_buf)); /* Add request to the receive list */ - list_add_tail(&req->rq_list, &xprt->recv); + list_add_tail(&req->rq_recv, &xprt->recv_queue); set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate); spin_unlock(&xprt->queue_lock); @@ -957,7 +957,7 @@ static void xprt_request_dequeue_receive_locked(struct rpc_task *task) { if (test_and_clear_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) - list_del(&task->tk_rqstp->rq_list); + list_del(&task->tk_rqstp->rq_recv); } /** @@ -1492,7 +1492,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net) spin_lock_init(&xprt->queue_lock); INIT_LIST_HEAD(&xprt->free); - INIT_LIST_HEAD(&xprt->recv); + INIT_LIST_HEAD(&xprt->recv_queue); #if defined(CONFIG_SUNRPC_BACKCHANNEL) spin_lock_init(&xprt->bc_pa_lock); INIT_LIST_HEAD(&xprt->bc_pa_list); -- cgit v1.2.3 From 944b042921a17d1a4e51bb05f8edf2b93d26e36f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 9 Aug 2018 23:33:21 -0400 Subject: SUNRPC: Add a transmission queue for RPC requests Add the queue that will enforce the ordering of RPC task transmission. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 6 ++++ net/sunrpc/clnt.c | 6 ++-- net/sunrpc/xprt.c | 84 ++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 83 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 9cec2d0811f2..81a6c2c8dfc7 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -88,6 +88,8 @@ struct rpc_rqst { struct list_head rq_recv; /* Receive queue */ }; + struct list_head rq_xmit; /* Send queue */ + void *rq_buffer; /* Call XDR encode buffer */ size_t rq_callsize; void *rq_rbuffer; /* Reply XDR decode buffer */ @@ -242,6 +244,9 @@ struct rpc_xprt { spinlock_t queue_lock; /* send/receive queue lock */ u32 xid; /* Next XID value to use */ struct rpc_task * snd_task; /* Task blocked in send */ + + struct list_head xmit_queue; /* Send queue */ + struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct svc_serv *bc_serv; /* The RPC service which will */ @@ -339,6 +344,7 @@ void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req); void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); bool xprt_prepare_transmit(struct rpc_task *task); +void xprt_request_enqueue_transmit(struct rpc_task *task); void xprt_request_enqueue_receive(struct rpc_task *task); void xprt_request_wait_receive(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index be0f06a8156b..c1a19a3e1356 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1156,11 +1156,11 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) */ xbufp->len = xbufp->head[0].iov_len + xbufp->page_len + xbufp->tail[0].iov_len; - set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); task->tk_action = call_bc_transmit; atomic_inc(&task->tk_count); WARN_ON_ONCE(atomic_read(&task->tk_count) != 2); + xprt_request_enqueue_transmit(task); rpc_execute(task); dprintk("RPC: rpc_run_bc_task: task= %p\n", task); @@ -1759,8 +1759,6 @@ rpc_xdr_encode(struct rpc_task *task) task->tk_status = rpcauth_wrap_req(task, encode, req, p, task->tk_msg.rpc_argp); - if (task->tk_status == 0) - set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); } /* @@ -1964,6 +1962,7 @@ call_transmit(struct rpc_task *task) /* Add task to reply queue before transmission to avoid races */ if (rpc_reply_expected(task)) xprt_request_enqueue_receive(task); + xprt_request_enqueue_transmit(task); if (!xprt_prepare_transmit(task)) return; @@ -1998,7 +1997,6 @@ call_transmit_status(struct rpc_task *task) xprt_end_transmit(task); break; case -EBADMSG: - clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); task->tk_action = call_transmit; task->tk_status = 0; xprt_end_transmit(task); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index d527dc08540e..1f69d9f219af 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1058,6 +1058,72 @@ void xprt_request_wait_receive(struct rpc_task *task) spin_unlock(&xprt->queue_lock); } +static bool +xprt_request_need_transmit(struct rpc_task *task) +{ + return !(task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) || + xprt_request_retransmit_after_disconnect(task); +} + +static bool +xprt_request_need_enqueue_transmit(struct rpc_task *task, struct rpc_rqst *req) +{ + return xprt_request_need_transmit(task) && + !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); +} + +/** + * xprt_request_enqueue_transmit - queue a task for transmission + * @task: pointer to rpc_task + * + * Add a task to the transmission queue. + */ +void +xprt_request_enqueue_transmit(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; + + if (xprt_request_need_enqueue_transmit(task, req)) { + spin_lock(&xprt->queue_lock); + list_add_tail(&req->rq_xmit, &xprt->xmit_queue); + set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); + spin_unlock(&xprt->queue_lock); + } +} + +/** + * xprt_request_dequeue_transmit_locked - remove a task from the transmission queue + * @task: pointer to rpc_task + * + * Remove a task from the transmission queue + * Caller must hold xprt->queue_lock + */ +static void +xprt_request_dequeue_transmit_locked(struct rpc_task *task) +{ + xprt_task_clear_bytes_sent(task); + if (test_and_clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) + list_del(&task->tk_rqstp->rq_xmit); +} + +/** + * xprt_request_dequeue_transmit - remove a task from the transmission queue + * @task: pointer to rpc_task + * + * Remove a task from the transmission queue + */ +static void +xprt_request_dequeue_transmit(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; + + spin_lock(&xprt->queue_lock); + xprt_request_dequeue_transmit_locked(task); + spin_unlock(&xprt->queue_lock); +} + /** * xprt_prepare_transmit - reserve the transport before sending a request * @task: RPC task about to send a request @@ -1077,12 +1143,8 @@ bool xprt_prepare_transmit(struct rpc_task *task) task->tk_status = req->rq_reply_bytes_recvd; goto out_unlock; } - if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) && - !xprt_request_retransmit_after_disconnect(task)) { - xprt->ops->set_retrans_timeout(task); - rpc_sleep_on(&xprt->pending, task, xprt_timer); + if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) goto out_unlock; - } } if (!xprt->ops->reserve_xprt(xprt, task)) { task->tk_status = -EAGAIN; @@ -1116,11 +1178,11 @@ void xprt_transmit(struct rpc_task *task) if (!req->rq_bytes_sent) { if (xprt_request_data_received(task)) - return; + goto out_dequeue; /* Verify that our message lies in the RPCSEC_GSS window */ if (rpcauth_xmit_need_reencode(task)) { task->tk_status = -EBADMSG; - return; + goto out_dequeue; } } @@ -1135,7 +1197,6 @@ void xprt_transmit(struct rpc_task *task) xprt_inject_disconnect(xprt); dprintk("RPC: %5u xmit complete\n", task->tk_pid); - clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); task->tk_flags |= RPC_TASK_SENT; spin_lock_bh(&xprt->transport_lock); @@ -1147,6 +1208,8 @@ void xprt_transmit(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); req->rq_connect_cookie = connect_cookie; +out_dequeue: + xprt_request_dequeue_transmit(task); } static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) @@ -1420,9 +1483,11 @@ xprt_request_dequeue_all(struct rpc_task *task, struct rpc_rqst *req) { struct rpc_xprt *xprt = req->rq_xprt; - if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) || + if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) || + test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) || xprt_is_pinned_rqst(req)) { spin_lock(&xprt->queue_lock); + xprt_request_dequeue_transmit_locked(task); xprt_request_dequeue_receive_locked(task); while (xprt_is_pinned_rqst(req)) { set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate); @@ -1493,6 +1558,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net) INIT_LIST_HEAD(&xprt->free); INIT_LIST_HEAD(&xprt->recv_queue); + INIT_LIST_HEAD(&xprt->xmit_queue); #if defined(CONFIG_SUNRPC_BACKCHANNEL) spin_lock_init(&xprt->bc_pa_lock); INIT_LIST_HEAD(&xprt->bc_pa_list); -- cgit v1.2.3 From 762e4e67b356ab7b8fbfc39bc07dc6110121505e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 24 Aug 2018 16:28:28 -0400 Subject: SUNRPC: Refactor RPC call encoding Move the call encoding so that it occurs before the transport connection etc. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/clnt.c | 81 +++++++++++++++++++++++++++------------------ net/sunrpc/xprt.c | 22 +++++++----- 3 files changed, 63 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 81a6c2c8dfc7..b8a7de161f67 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -347,6 +347,7 @@ bool xprt_prepare_transmit(struct rpc_task *task); void xprt_request_enqueue_transmit(struct rpc_task *task); void xprt_request_enqueue_receive(struct rpc_task *task); void xprt_request_wait_receive(struct rpc_task *task); +bool xprt_request_need_retransmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); void xprt_end_transmit(struct rpc_task *task); int xprt_adjust_timeout(struct rpc_rqst *req); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c1a19a3e1356..64159716be30 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -61,6 +61,7 @@ static void call_start(struct rpc_task *task); static void call_reserve(struct rpc_task *task); static void call_reserveresult(struct rpc_task *task); static void call_allocate(struct rpc_task *task); +static void call_encode(struct rpc_task *task); static void call_decode(struct rpc_task *task); static void call_bind(struct rpc_task *task); static void call_bind_status(struct rpc_task *task); @@ -1140,7 +1141,8 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) struct xdr_buf *xbufp = &req->rq_snd_buf; struct rpc_task_setup task_setup_data = { .callback_ops = &rpc_default_ops, - .flags = RPC_TASK_SOFTCONN, + .flags = RPC_TASK_SOFTCONN | + RPC_TASK_NO_RETRANS_TIMEOUT, }; dprintk("RPC: rpc_run_bc_task req= %p\n", req); @@ -1160,7 +1162,6 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) task->tk_action = call_bc_transmit; atomic_inc(&task->tk_count); WARN_ON_ONCE(atomic_read(&task->tk_count) != 2); - xprt_request_enqueue_transmit(task); rpc_execute(task); dprintk("RPC: rpc_run_bc_task: task= %p\n", task); @@ -1680,7 +1681,7 @@ call_allocate(struct rpc_task *task) dprint_status(task); task->tk_status = 0; - task->tk_action = call_bind; + task->tk_action = call_encode; if (req->rq_buffer) return; @@ -1724,12 +1725,12 @@ call_allocate(struct rpc_task *task) static int rpc_task_need_encode(struct rpc_task *task) { - return test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) == 0; + return test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) == 0 && + (!(task->tk_flags & RPC_TASK_SENT) || + !(task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) || + xprt_request_need_retransmit(task)); } -/* - * 3. Encode arguments of an RPC call - */ static void rpc_xdr_encode(struct rpc_task *task) { @@ -1745,6 +1746,7 @@ rpc_xdr_encode(struct rpc_task *task) xdr_buf_init(&req->rq_rcv_buf, req->rq_rbuffer, req->rq_rcvsize); + req->rq_bytes_sent = 0; p = rpc_encode_header(task); if (p == NULL) { @@ -1761,6 +1763,34 @@ rpc_xdr_encode(struct rpc_task *task) task->tk_msg.rpc_argp); } +/* + * 3. Encode arguments of an RPC call + */ +static void +call_encode(struct rpc_task *task) +{ + if (!rpc_task_need_encode(task)) + goto out; + /* Encode here so that rpcsec_gss can use correct sequence number. */ + rpc_xdr_encode(task); + /* Did the encode result in an error condition? */ + if (task->tk_status != 0) { + /* Was the error nonfatal? */ + if (task->tk_status == -EAGAIN) + rpc_delay(task, HZ >> 4); + else + rpc_exit(task, task->tk_status); + return; + } + + /* Add task to reply queue before transmission to avoid races */ + if (rpc_reply_expected(task)) + xprt_request_enqueue_receive(task); + xprt_request_enqueue_transmit(task); +out: + task->tk_action = call_bind; +} + /* * 4. Get the server port number if not yet set */ @@ -1945,24 +1975,8 @@ call_transmit(struct rpc_task *task) dprint_status(task); task->tk_action = call_transmit_status; - /* Encode here so that rpcsec_gss can use correct sequence number. */ - if (rpc_task_need_encode(task)) { - rpc_xdr_encode(task); - /* Did the encode result in an error condition? */ - if (task->tk_status != 0) { - /* Was the error nonfatal? */ - if (task->tk_status == -EAGAIN) - rpc_delay(task, HZ >> 4); - else - rpc_exit(task, task->tk_status); - return; - } - } - - /* Add task to reply queue before transmission to avoid races */ - if (rpc_reply_expected(task)) - xprt_request_enqueue_receive(task); - xprt_request_enqueue_transmit(task); + if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) + return; if (!xprt_prepare_transmit(task)) return; @@ -1997,9 +2011,9 @@ call_transmit_status(struct rpc_task *task) xprt_end_transmit(task); break; case -EBADMSG: - task->tk_action = call_transmit; - task->tk_status = 0; xprt_end_transmit(task); + task->tk_status = 0; + task->tk_action = call_encode; break; /* * Special cases: if we've been waiting on the @@ -2048,6 +2062,9 @@ call_bc_transmit(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; + if (rpc_task_need_encode(task)) + xprt_request_enqueue_transmit(task); + if (!xprt_prepare_transmit(task)) goto out_retry; @@ -2169,7 +2186,7 @@ call_status(struct rpc_task *task) case -EPIPE: case -ENOTCONN: case -EAGAIN: - task->tk_action = call_bind; + task->tk_action = call_encode; break; case -EIO: /* shutdown or soft timeout */ @@ -2234,7 +2251,7 @@ call_timeout(struct rpc_task *task) rpcauth_invalcred(task); retry: - task->tk_action = call_bind; + task->tk_action = call_encode; task->tk_status = 0; } @@ -2278,7 +2295,7 @@ call_decode(struct rpc_task *task) if (req->rq_rcv_buf.len < 12) { if (!RPC_IS_SOFT(task)) { - task->tk_action = call_bind; + task->tk_action = call_encode; goto out_retry; } dprintk("RPC: %s: too small RPC reply size (%d bytes)\n", @@ -2409,7 +2426,7 @@ rpc_verify_header(struct rpc_task *task) task->tk_garb_retry--; dprintk("RPC: %5u %s: retry garbled creds\n", task->tk_pid, __func__); - task->tk_action = call_bind; + task->tk_action = call_encode; goto out_retry; case RPC_AUTH_TOOWEAK: printk(KERN_NOTICE "RPC: server %s requires stronger " @@ -2478,7 +2495,7 @@ out_garbage: task->tk_garb_retry--; dprintk("RPC: %5u %s: retrying\n", task->tk_pid, __func__); - task->tk_action = call_bind; + task->tk_action = call_encode; out_retry: return ERR_PTR(-EAGAIN); } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 1f69d9f219af..613f558a3791 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1058,18 +1058,10 @@ void xprt_request_wait_receive(struct rpc_task *task) spin_unlock(&xprt->queue_lock); } -static bool -xprt_request_need_transmit(struct rpc_task *task) -{ - return !(task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) || - xprt_request_retransmit_after_disconnect(task); -} - static bool xprt_request_need_enqueue_transmit(struct rpc_task *task, struct rpc_rqst *req) { - return xprt_request_need_transmit(task) && - !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); + return !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); } /** @@ -1124,6 +1116,18 @@ xprt_request_dequeue_transmit(struct rpc_task *task) spin_unlock(&xprt->queue_lock); } +/** + * xprt_request_need_retransmit - Test if a task needs retransmission + * @task: pointer to rpc_task + * + * Test for whether a connection breakage requires the task to retransmit + */ +bool +xprt_request_need_retransmit(struct rpc_task *task) +{ + return xprt_request_retransmit_after_disconnect(task); +} + /** * xprt_prepare_transmit - reserve the transport before sending a request * @task: RPC task about to send a request -- cgit v1.2.3 From 902c58872e1e9a2c146a55b0701c0b26cc5a4b24 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 1 Sep 2018 17:21:01 -0400 Subject: SUNRPC: Fix up the back channel transmit Fix up the back channel code to recognise that it has already been transmitted, so does not need to be called again. Also ensure that we set req->rq_task. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/bc_xprt.h | 1 + net/sunrpc/clnt.c | 19 +++++-------------- net/sunrpc/xprt.c | 27 ++++++++++++++++++++++++++- 3 files changed, 32 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 4397a4824c81..28721cf73ec3 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -34,6 +34,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef CONFIG_SUNRPC_BACKCHANNEL struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid); void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied); +void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 64159716be30..dcefbf406482 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1138,7 +1138,6 @@ EXPORT_SYMBOL_GPL(rpc_call_async); struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) { struct rpc_task *task; - struct xdr_buf *xbufp = &req->rq_snd_buf; struct rpc_task_setup task_setup_data = { .callback_ops = &rpc_default_ops, .flags = RPC_TASK_SOFTCONN | @@ -1150,14 +1149,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) * Create an rpc_task to send the data */ task = rpc_new_task(&task_setup_data); - task->tk_rqstp = req; - - /* - * Set up the xdr_buf length. - * This also indicates that the buffer is XDR encoded already. - */ - xbufp->len = xbufp->head[0].iov_len + xbufp->page_len + - xbufp->tail[0].iov_len; + xprt_init_bc_request(req, task); task->tk_action = call_bc_transmit; atomic_inc(&task->tk_count); @@ -2064,6 +2056,8 @@ call_bc_transmit(struct rpc_task *task) if (rpc_task_need_encode(task)) xprt_request_enqueue_transmit(task); + if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) + goto out_wakeup; if (!xprt_prepare_transmit(task)) goto out_retry; @@ -2073,13 +2067,11 @@ call_bc_transmit(struct rpc_task *task) "error: %d\n", task->tk_status); goto out_done; } - if (req->rq_connect_cookie != req->rq_xprt->connect_cookie) - req->rq_bytes_sent = 0; xprt_transmit(task); if (task->tk_status == -EAGAIN) - goto out_nospace; + goto out_retry; xprt_end_transmit(task); dprint_status(task); @@ -2119,12 +2111,11 @@ call_bc_transmit(struct rpc_task *task) "error: %d\n", task->tk_status); break; } +out_wakeup: rpc_wake_up_queued_task(&req->rq_xprt->pending, task); out_done: task->tk_action = rpc_exit_task; return; -out_nospace: - req->rq_connect_cookie = req->rq_xprt->connect_cookie; out_retry: task->tk_status = 0; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 613f558a3791..f5be739492d4 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1390,6 +1390,12 @@ void xprt_free(struct rpc_xprt *xprt) } EXPORT_SYMBOL_GPL(xprt_free); +static void +xprt_init_connect_cookie(struct rpc_rqst *req, struct rpc_xprt *xprt) +{ + req->rq_connect_cookie = xprt_connect_cookie(xprt) - 1; +} + static __be32 xprt_alloc_xid(struct rpc_xprt *xprt) { @@ -1418,7 +1424,7 @@ xprt_request_init(struct rpc_task *task) req->rq_xprt = xprt; req->rq_buffer = NULL; req->rq_xid = xprt_alloc_xid(xprt); - req->rq_connect_cookie = xprt_connect_cookie(xprt) - 1; + xprt_init_connect_cookie(req, xprt); req->rq_bytes_sent = 0; req->rq_snd_buf.len = 0; req->rq_snd_buf.buflen = 0; @@ -1552,6 +1558,25 @@ void xprt_release(struct rpc_task *task) xprt_free_bc_request(req); } +#ifdef CONFIG_SUNRPC_BACKCHANNEL +void +xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task) +{ + struct xdr_buf *xbufp = &req->rq_snd_buf; + + task->tk_rqstp = req; + req->rq_task = task; + xprt_init_connect_cookie(req, req->rq_xprt); + /* + * Set up the xdr_buf length. + * This also indicates that the buffer is XDR encoded already. + */ + xbufp->len = xbufp->head[0].iov_len + xbufp->page_len + + xbufp->tail[0].iov_len; + req->rq_bytes_sent = 0; +} +#endif + static void xprt_init(struct rpc_xprt *xprt, struct net *net) { kref_init(&xprt->kref); -- cgit v1.2.3 From 50f484e298218b7271fad8a23bd44c82fb3110e1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 30 Aug 2018 13:27:29 -0400 Subject: SUNRPC: Treat the task and request as separate in the xprt_ops->send_request() When we shift to using the transmit queue, then the task that holds the write lock will not necessarily be the same as the one being transmitted. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 +- net/sunrpc/xprt.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 3 +-- net/sunrpc/xprtrdma/transport.c | 5 ++--- net/sunrpc/xprtsock.c | 27 +++++++++++++-------------- 5 files changed, 18 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index b8a7de161f67..8c2bb078f00c 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -140,7 +140,7 @@ struct rpc_xprt_ops { void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); int (*buf_alloc)(struct rpc_task *task); void (*buf_free)(struct rpc_task *task); - int (*send_request)(struct rpc_task *task); + int (*send_request)(struct rpc_rqst *req, struct rpc_task *task); void (*set_retrans_timeout)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); void (*release_request)(struct rpc_task *task); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index f5be739492d4..6e735dd1fde0 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1191,7 +1191,7 @@ void xprt_transmit(struct rpc_task *task) } connect_cookie = xprt->connect_cookie; - status = xprt->ops->send_request(task); + status = xprt->ops->send_request(req, task); trace_xprt_transmit(xprt, req->rq_xid, status); if (status != 0) { task->tk_status = status; diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 09b12b7568fe..d1618c70edb4 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -215,9 +215,8 @@ drop_connection: * connection. */ static int -xprt_rdma_bc_send_request(struct rpc_task *task) +xprt_rdma_bc_send_request(struct rpc_rqst *rqst, struct rpc_task *task) { - struct rpc_rqst *rqst = task->tk_rqstp; struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt; struct svcxprt_rdma *rdma; int ret; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 143ce2579ba9..fa684bf4d090 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -706,9 +706,8 @@ xprt_rdma_free(struct rpc_task *task) * sent. Do not try to send this message again. */ static int -xprt_rdma_send_request(struct rpc_task *task) +xprt_rdma_send_request(struct rpc_rqst *rqst, struct rpc_task *task) { - struct rpc_rqst *rqst = task->tk_rqstp; struct rpc_xprt *xprt = rqst->rq_xprt; struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); @@ -741,7 +740,7 @@ xprt_rdma_send_request(struct rpc_task *task) /* An RPC with no reply will throw off credit accounting, * so drop the connection to reset the credit grant. */ - if (!rpc_reply_expected(task)) + if (!rpc_reply_expected(rqst->rq_task)) goto drop_connection; return 0; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8d6404259ff9..b8143eded4af 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -449,12 +449,12 @@ static void xs_nospace_callback(struct rpc_task *task) /** * xs_nospace - place task on wait queue if transmit was incomplete + * @req: pointer to RPC request * @task: task to put to sleep * */ -static int xs_nospace(struct rpc_task *task) +static int xs_nospace(struct rpc_rqst *req, struct rpc_task *task) { - struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct sock *sk = transport->inet; @@ -513,6 +513,7 @@ static inline void xs_encode_stream_record_marker(struct xdr_buf *buf) /** * xs_local_send_request - write an RPC request to an AF_LOCAL socket + * @req: pointer to RPC request * @task: RPC task that manages the state of an RPC request * * Return values: @@ -522,9 +523,8 @@ static inline void xs_encode_stream_record_marker(struct xdr_buf *buf) * ENOTCONN: Caller needs to invoke connect logic then call again * other: Some other error occured, the request was not sent */ -static int xs_local_send_request(struct rpc_task *task) +static int xs_local_send_request(struct rpc_rqst *req, struct rpc_task *task) { - struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -569,7 +569,7 @@ static int xs_local_send_request(struct rpc_task *task) case -ENOBUFS: break; case -EAGAIN: - status = xs_nospace(task); + status = xs_nospace(req, task); break; default: dprintk("RPC: sendmsg returned unrecognized error %d\n", @@ -585,6 +585,7 @@ static int xs_local_send_request(struct rpc_task *task) /** * xs_udp_send_request - write an RPC request to a UDP socket + * @req: pointer to RPC request * @task: address of RPC task that manages the state of an RPC request * * Return values: @@ -594,9 +595,8 @@ static int xs_local_send_request(struct rpc_task *task) * ENOTCONN: Caller needs to invoke connect logic then call again * other: Some other error occurred, the request was not sent */ -static int xs_udp_send_request(struct rpc_task *task) +static int xs_udp_send_request(struct rpc_rqst *req, struct rpc_task *task) { - struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; @@ -638,7 +638,7 @@ process_status: /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_nospace(task); + status = xs_nospace(req, task); break; case -ENETUNREACH: case -ENOBUFS: @@ -658,6 +658,7 @@ process_status: /** * xs_tcp_send_request - write an RPC request to a TCP socket + * @req: pointer to RPC request * @task: address of RPC task that manages the state of an RPC request * * Return values: @@ -670,9 +671,8 @@ process_status: * XXX: In the case of soft timeouts, should we eventually give up * if sendmsg is not able to make progress? */ -static int xs_tcp_send_request(struct rpc_task *task) +static int xs_tcp_send_request(struct rpc_rqst *req, struct rpc_task *task) { - struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; @@ -697,7 +697,7 @@ static int xs_tcp_send_request(struct rpc_task *task) * completes while the socket holds a reference to the pages, * then we may end up resending corrupted data. */ - if (task->tk_flags & RPC_TASK_SENT) + if (req->rq_task->tk_flags & RPC_TASK_SENT) zerocopy = false; if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state)) @@ -761,7 +761,7 @@ static int xs_tcp_send_request(struct rpc_task *task) /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_nospace(task); + status = xs_nospace(req, task); break; case -ECONNRESET: case -ECONNREFUSED: @@ -2706,9 +2706,8 @@ static int bc_sendto(struct rpc_rqst *req) /* * The send routine. Borrows from svc_send */ -static int bc_send_request(struct rpc_task *task) +static int bc_send_request(struct rpc_rqst *req, struct rpc_task *task) { - struct rpc_rqst *req = task->tk_rqstp; struct svc_xprt *xprt; int len; -- cgit v1.2.3 From 918f3c1fe83c5baa4892b943d3f5ac7191d8fb74 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 9 Sep 2018 11:37:22 -0400 Subject: SUNRPC: Improve latency for interactive tasks One of the intentions with the priority queues was to ensure that no single process can hog the transport. The field task->tk_owner therefore identifies the RPC call's origin, and is intended to allow the RPC layer to organise queues for fairness. This commit therefore modifies the transmit queue to group requests by task->tk_owner, and ensures that we round robin among those groups. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/xprt.c | 27 ++++++++++++++++++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 8c2bb078f00c..e377620b9744 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -89,6 +89,7 @@ struct rpc_rqst { }; struct list_head rq_xmit; /* Send queue */ + struct list_head rq_xmit2; /* Send queue */ void *rq_buffer; /* Call XDR encode buffer */ size_t rq_callsize; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 9c5a8514d264..44d0eeaddaac 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1053,12 +1053,21 @@ xprt_request_need_enqueue_transmit(struct rpc_task *task, struct rpc_rqst *req) void xprt_request_enqueue_transmit(struct rpc_task *task) { - struct rpc_rqst *req = task->tk_rqstp; + struct rpc_rqst *pos, *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; if (xprt_request_need_enqueue_transmit(task, req)) { spin_lock(&xprt->queue_lock); + list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) { + if (pos->rq_task->tk_owner != task->tk_owner) + continue; + list_add_tail(&req->rq_xmit2, &pos->rq_xmit2); + INIT_LIST_HEAD(&req->rq_xmit); + goto out; + } list_add_tail(&req->rq_xmit, &xprt->xmit_queue); + INIT_LIST_HEAD(&req->rq_xmit2); +out: set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); spin_unlock(&xprt->queue_lock); } @@ -1074,8 +1083,20 @@ xprt_request_enqueue_transmit(struct rpc_task *task) static void xprt_request_dequeue_transmit_locked(struct rpc_task *task) { - if (test_and_clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) - list_del(&task->tk_rqstp->rq_xmit); + struct rpc_rqst *req = task->tk_rqstp; + + if (!test_and_clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) + return; + if (!list_empty(&req->rq_xmit)) { + list_del(&req->rq_xmit); + if (!list_empty(&req->rq_xmit2)) { + struct rpc_rqst *next = list_first_entry(&req->rq_xmit2, + struct rpc_rqst, rq_xmit2); + list_del(&req->rq_xmit2); + list_add_tail(&next->rq_xmit, &next->rq_xprt->xmit_queue); + } + } else + list_del(&req->rq_xmit2); } /** -- cgit v1.2.3 From 75891f502f5fc70f52a01af5b924384ed4866907 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Sep 2018 17:37:36 -0400 Subject: SUNRPC: Support for congestion control when queuing is enabled Both RDMA and UDP transports require the request to get a "congestion control" credit before they can be transmitted. Right now, this is done when the request locks the socket. We'd like it to happen when a request attempts to be transmitted for the first time. In order to support retransmission of requests that already hold such credits, we also want to ensure that they get queued first, so that we don't deadlock with requests that have yet to obtain a credit. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 + net/sunrpc/clnt.c | 5 ++ net/sunrpc/xprt.c | 128 +++++++++++++++++++++++++++----------- net/sunrpc/xprtrdma/backchannel.c | 3 + net/sunrpc/xprtrdma/transport.c | 3 + net/sunrpc/xprtsock.c | 4 ++ 6 files changed, 109 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index e377620b9744..0d0cc127615e 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -397,6 +397,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied); void xprt_pin_rqst(struct rpc_rqst *req); void xprt_unpin_rqst(struct rpc_rqst *req); void xprt_release_rqst_cong(struct rpc_task *task); +bool xprt_request_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); @@ -415,6 +416,7 @@ void xprt_unlock_connect(struct rpc_xprt *, void *); #define XPRT_BINDING (5) #define XPRT_CLOSING (6) #define XPRT_CONGESTED (9) +#define XPRT_CWND_WAIT (10) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8dc3d33827c4..f03911f84953 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1996,6 +1996,11 @@ call_transmit_status(struct rpc_task *task) dprint_status(task); xprt_end_transmit(task); break; + case -EBADSLT: + xprt_end_transmit(task); + task->tk_action = call_transmit; + task->tk_status = 0; + break; case -EBADMSG: xprt_end_transmit(task); task->tk_status = 0; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 44d0eeaddaac..b03355ae7b16 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -68,8 +68,6 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net); static __be32 xprt_alloc_xid(struct rpc_xprt *xprt); static void xprt_connect_status(struct rpc_task *task); -static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); -static void __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *); static void xprt_destroy(struct rpc_xprt *xprt); static DEFINE_SPINLOCK(xprt_list_lock); @@ -221,6 +219,31 @@ static void xprt_clear_locked(struct rpc_xprt *xprt) queue_work(xprtiod_workqueue, &xprt->task_cleanup); } +static bool +xprt_need_congestion_window_wait(struct rpc_xprt *xprt) +{ + return test_bit(XPRT_CWND_WAIT, &xprt->state); +} + +static void +xprt_set_congestion_window_wait(struct rpc_xprt *xprt) +{ + if (!list_empty(&xprt->xmit_queue)) { + /* Peek at head of queue to see if it can make progress */ + if (list_first_entry(&xprt->xmit_queue, struct rpc_rqst, + rq_xmit)->rq_cong) + return; + } + set_bit(XPRT_CWND_WAIT, &xprt->state); +} + +static void +xprt_test_and_clear_congestion_window_wait(struct rpc_xprt *xprt) +{ + if (!RPCXPRT_CONGESTED(xprt)) + clear_bit(XPRT_CWND_WAIT, &xprt->state); +} + /* * xprt_reserve_xprt_cong - serialize write access to transports * @task: task that is requesting access to the transport @@ -228,6 +251,7 @@ static void xprt_clear_locked(struct rpc_xprt *xprt) * Same as xprt_reserve_xprt, but Van Jacobson congestion control is * integrated into the decision of whether a request is allowed to be * woken up and given access to the transport. + * Note that the lock is only granted if we know there are free slots. */ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) { @@ -243,14 +267,12 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) xprt->snd_task = task; return 1; } - if (__xprt_get_cong(xprt, task)) { + if (!xprt_need_congestion_window_wait(xprt)) { xprt->snd_task = task; return 1; } xprt_clear_locked(xprt); out_sleep: - if (req) - __xprt_put_cong(xprt, req); dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); task->tk_timeout = 0; task->tk_status = -EAGAIN; @@ -294,32 +316,14 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt) xprt_clear_locked(xprt); } -static bool __xprt_lock_write_cong_func(struct rpc_task *task, void *data) -{ - struct rpc_xprt *xprt = data; - struct rpc_rqst *req; - - req = task->tk_rqstp; - if (req == NULL) { - xprt->snd_task = task; - return true; - } - if (__xprt_get_cong(xprt, task)) { - xprt->snd_task = task; - req->rq_ntrans++; - return true; - } - return false; -} - static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) { if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) return; - if (RPCXPRT_CONGESTED(xprt)) + if (xprt_need_congestion_window_wait(xprt)) goto out_unlock; if (rpc_wake_up_first_on_wq(xprtiod_workqueue, &xprt->sending, - __xprt_lock_write_cong_func, xprt)) + __xprt_lock_write_func, xprt)) return; out_unlock: xprt_clear_locked(xprt); @@ -370,16 +374,16 @@ static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *ta * overflowed. Put the task to sleep if this is the case. */ static int -__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task) +__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) { - struct rpc_rqst *req = task->tk_rqstp; - if (req->rq_cong) return 1; dprintk("RPC: %5u xprt_cwnd_limited cong = %lu cwnd = %lu\n", - task->tk_pid, xprt->cong, xprt->cwnd); - if (RPCXPRT_CONGESTED(xprt)) + req->rq_task->tk_pid, xprt->cong, xprt->cwnd); + if (RPCXPRT_CONGESTED(xprt)) { + xprt_set_congestion_window_wait(xprt); return 0; + } req->rq_cong = 1; xprt->cong += RPC_CWNDSCALE; return 1; @@ -396,9 +400,31 @@ __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) return; req->rq_cong = 0; xprt->cong -= RPC_CWNDSCALE; + xprt_test_and_clear_congestion_window_wait(xprt); __xprt_lock_write_next_cong(xprt); } +/** + * xprt_request_get_cong - Request congestion control credits + * @xprt: pointer to transport + * @req: pointer to RPC request + * + * Useful for transports that require congestion control. + */ +bool +xprt_request_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) +{ + bool ret = false; + + if (req->rq_cong) + return true; + spin_lock_bh(&xprt->transport_lock); + ret = __xprt_get_cong(xprt, req) != 0; + spin_unlock_bh(&xprt->transport_lock); + return ret; +} +EXPORT_SYMBOL_GPL(xprt_request_get_cong); + /** * xprt_release_rqst_cong - housekeeping when request is complete * @task: RPC request that recently completed @@ -413,6 +439,20 @@ void xprt_release_rqst_cong(struct rpc_task *task) } EXPORT_SYMBOL_GPL(xprt_release_rqst_cong); +/* + * Clear the congestion window wait flag and wake up the next + * entry on xprt->sending + */ +static void +xprt_clear_congestion_window_wait(struct rpc_xprt *xprt) +{ + if (test_and_clear_bit(XPRT_CWND_WAIT, &xprt->state)) { + spin_lock_bh(&xprt->transport_lock); + __xprt_lock_write_next_cong(xprt); + spin_unlock_bh(&xprt->transport_lock); + } +} + /** * xprt_adjust_cwnd - adjust transport congestion window * @xprt: pointer to xprt @@ -1058,12 +1098,28 @@ xprt_request_enqueue_transmit(struct rpc_task *task) if (xprt_request_need_enqueue_transmit(task, req)) { spin_lock(&xprt->queue_lock); - list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) { - if (pos->rq_task->tk_owner != task->tk_owner) - continue; - list_add_tail(&req->rq_xmit2, &pos->rq_xmit2); - INIT_LIST_HEAD(&req->rq_xmit); - goto out; + /* + * Requests that carry congestion control credits are added + * to the head of the list to avoid starvation issues. + */ + if (req->rq_cong) { + xprt_clear_congestion_window_wait(xprt); + list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) { + if (pos->rq_cong) + continue; + /* Note: req is added _before_ pos */ + list_add_tail(&req->rq_xmit, &pos->rq_xmit); + INIT_LIST_HEAD(&req->rq_xmit2); + goto out; + } + } else { + list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) { + if (pos->rq_task->tk_owner != task->tk_owner) + continue; + list_add_tail(&req->rq_xmit2, &pos->rq_xmit2); + INIT_LIST_HEAD(&req->rq_xmit); + goto out; + } } list_add_tail(&req->rq_xmit, &xprt->xmit_queue); INIT_LIST_HEAD(&req->rq_xmit2); diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index ed58761e6b23..e7c445cee16f 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -200,6 +200,9 @@ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst) if (!xprt_connected(rqst->rq_xprt)) goto drop_connection; + if (!xprt_request_get_cong(rqst->rq_xprt, rqst)) + return -EBADSLT; + rc = rpcrdma_bc_marshal_reply(rqst); if (rc < 0) goto failed_marshal; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index fa684bf4d090..9ff322e53f37 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -721,6 +721,9 @@ xprt_rdma_send_request(struct rpc_rqst *rqst, struct rpc_task *task) if (!xprt_connected(xprt)) goto drop_connection; + if (!xprt_request_get_cong(xprt, rqst)) + return -EBADSLT; + rc = rpcrdma_marshal_req(r_xprt, rqst); if (rc < 0) goto failed_marshal; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b8143eded4af..8831e84a058a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -609,6 +609,10 @@ static int xs_udp_send_request(struct rpc_rqst *req, struct rpc_task *task) if (!xprt_bound(xprt)) return -ENOTCONN; + + if (!xprt_request_get_cong(xprt, req)) + return -EBADSLT; + req->rq_xtime = ktime_get(); status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen, xdr, 0, true, &sent); -- cgit v1.2.3 From 36bd7de949f41d586ef7794169af75462b67acbc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Sep 2018 18:41:32 -0400 Subject: SUNRPC: Turn off throttling of RPC slots for TCP sockets The theory was that we would need to grab the socket lock anyway, so we might as well use it to gate the allocation of RPC slots for a TCP socket. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 - net/sunrpc/xprt.c | 14 -------------- net/sunrpc/xprtsock.c | 2 +- 3 files changed, 1 insertion(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 0d0cc127615e..14c9b4d49fb4 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -343,7 +343,6 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req); -void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); bool xprt_prepare_transmit(struct rpc_task *task); void xprt_request_enqueue_transmit(struct rpc_task *task); void xprt_request_enqueue_receive(struct rpc_task *task); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 533df198a0e9..849e102e3c5a 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1429,20 +1429,6 @@ out_init_req: } EXPORT_SYMBOL_GPL(xprt_alloc_slot); -void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) -{ - /* Note: grabbing the xprt_lock_write() ensures that we throttle - * new slot allocation if the transport is congested (i.e. when - * reconnecting a stream transport or when out of socket write - * buffer space). - */ - if (xprt_lock_write(xprt, task)) { - xprt_alloc_slot(xprt, task); - xprt_release_write(xprt, task); - } -} -EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot); - void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) { spin_lock(&xprt->reserve_lock); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8831e84a058a..f54e8110f4c6 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2809,7 +2809,7 @@ static const struct rpc_xprt_ops xs_udp_ops = { static const struct rpc_xprt_ops xs_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xprt_release_xprt, - .alloc_slot = xprt_lock_and_alloc_slot, + .alloc_slot = xprt_alloc_slot, .free_slot = xprt_free_slot, .rpcbind = rpcb_getport_async, .set_port = xs_set_port, -- cgit v1.2.3 From c544577daddb618c7dd5fa7fb98d6a41782f020e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Sep 2018 23:39:27 -0400 Subject: SUNRPC: Clean up transport write space handling Treat socket write space handling in the same way we now treat transport congestion: by denying the XPRT_LOCK until the transport signals that it has free buffer space. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/svc_xprt.h | 1 - include/linux/sunrpc/xprt.h | 5 +- net/sunrpc/clnt.c | 28 ++++------- net/sunrpc/svc_xprt.c | 2 - net/sunrpc/xprt.c | 77 ++++++++++++++++++------------ net/sunrpc/xprtrdma/rpc_rdma.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 7 +-- net/sunrpc/xprtsock.c | 33 +++++-------- 8 files changed, 73 insertions(+), 82 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index c3d72066d4b1..6b7a86c4d6e6 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -84,7 +84,6 @@ struct svc_xprt { struct sockaddr_storage xpt_remote; /* remote peer's address */ size_t xpt_remotelen; /* length of address */ char xpt_remotebuf[INET6_ADDRSTRLEN + 10]; - struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */ struct list_head xpt_users; /* callbacks on free */ struct net *xpt_net; diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 14c9b4d49fb4..5600242ccbf9 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -387,8 +387,8 @@ int xprt_load_transport(const char *); void xprt_set_retrans_timeout_def(struct rpc_task *task); void xprt_set_retrans_timeout_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); -void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action); -void xprt_write_space(struct rpc_xprt *xprt); +void xprt_wait_for_buffer_space(struct rpc_xprt *xprt); +bool xprt_write_space(struct rpc_xprt *xprt); void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result); struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid); void xprt_update_rtt(struct rpc_task *task); @@ -416,6 +416,7 @@ void xprt_unlock_connect(struct rpc_xprt *, void *); #define XPRT_CLOSING (6) #define XPRT_CONGESTED (9) #define XPRT_CWND_WAIT (10) +#define XPRT_WRITE_SPACE (11) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f03911f84953..0c4b2e7d791f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1964,13 +1964,14 @@ call_transmit(struct rpc_task *task) { dprint_status(task); + task->tk_status = 0; + if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) { + if (!xprt_prepare_transmit(task)) + return; + xprt_transmit(task); + } task->tk_action = call_transmit_status; - if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) - return; - - if (!xprt_prepare_transmit(task)) - return; - xprt_transmit(task); + xprt_end_transmit(task); } /* @@ -1986,7 +1987,6 @@ call_transmit_status(struct rpc_task *task) * test first. */ if (task->tk_status == 0) { - xprt_end_transmit(task); xprt_request_wait_receive(task); return; } @@ -1994,15 +1994,8 @@ call_transmit_status(struct rpc_task *task) switch (task->tk_status) { default: dprint_status(task); - xprt_end_transmit(task); - break; - case -EBADSLT: - xprt_end_transmit(task); - task->tk_action = call_transmit; - task->tk_status = 0; break; case -EBADMSG: - xprt_end_transmit(task); task->tk_status = 0; task->tk_action = call_encode; break; @@ -2015,6 +2008,7 @@ call_transmit_status(struct rpc_task *task) case -ENOBUFS: rpc_delay(task, HZ>>2); /* fall through */ + case -EBADSLT: case -EAGAIN: task->tk_action = call_transmit; task->tk_status = 0; @@ -2026,7 +2020,6 @@ call_transmit_status(struct rpc_task *task) case -ENETUNREACH: case -EPERM: if (RPC_IS_SOFTCONN(task)) { - xprt_end_transmit(task); if (!task->tk_msg.rpc_proc->p_proc) trace_xprt_ping(task->tk_xprt, task->tk_status); @@ -2069,9 +2062,6 @@ call_bc_transmit(struct rpc_task *task) xprt_transmit(task); - if (task->tk_status == -EAGAIN) - goto out_retry; - xprt_end_transmit(task); dprint_status(task); switch (task->tk_status) { @@ -2087,6 +2077,8 @@ call_bc_transmit(struct rpc_task *task) case -ENOTCONN: case -EPIPE: break; + case -EAGAIN: + goto out_retry; case -ETIMEDOUT: /* * Problem reaching the server. Disconnect and let the diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 5185efb9027b..87533fbb96cf 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -171,7 +171,6 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl, mutex_init(&xprt->xpt_mutex); spin_lock_init(&xprt->xpt_lock); set_bit(XPT_BUSY, &xprt->xpt_flags); - rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending"); xprt->xpt_net = get_net(net); strcpy(xprt->xpt_remotebuf, "uninitialized"); } @@ -895,7 +894,6 @@ int svc_send(struct svc_rqst *rqstp) else len = xprt->xpt_ops->xpo_sendto(rqstp); mutex_unlock(&xprt->xpt_mutex); - rpc_wake_up(&xprt->xpt_bc_pending); trace_svc_send(rqstp, len); svc_xprt_release(rqstp); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 849e102e3c5a..55dc5c7069b9 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -169,6 +169,17 @@ out: } EXPORT_SYMBOL_GPL(xprt_load_transport); +static void xprt_clear_locked(struct rpc_xprt *xprt) +{ + xprt->snd_task = NULL; + if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) { + smp_mb__before_atomic(); + clear_bit(XPRT_LOCKED, &xprt->state); + smp_mb__after_atomic(); + } else + queue_work(xprtiod_workqueue, &xprt->task_cleanup); +} + /** * xprt_reserve_xprt - serialize write access to transports * @task: task that is requesting access to the transport @@ -188,10 +199,14 @@ int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) return 1; goto out_sleep; } + if (test_bit(XPRT_WRITE_SPACE, &xprt->state)) + goto out_unlock; xprt->snd_task = task; return 1; +out_unlock: + xprt_clear_locked(xprt); out_sleep: dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); @@ -208,17 +223,6 @@ out_sleep: } EXPORT_SYMBOL_GPL(xprt_reserve_xprt); -static void xprt_clear_locked(struct rpc_xprt *xprt) -{ - xprt->snd_task = NULL; - if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) { - smp_mb__before_atomic(); - clear_bit(XPRT_LOCKED, &xprt->state); - smp_mb__after_atomic(); - } else - queue_work(xprtiod_workqueue, &xprt->task_cleanup); -} - static bool xprt_need_congestion_window_wait(struct rpc_xprt *xprt) { @@ -267,10 +271,13 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) xprt->snd_task = task; return 1; } + if (test_bit(XPRT_WRITE_SPACE, &xprt->state)) + goto out_unlock; if (!xprt_need_congestion_window_wait(xprt)) { xprt->snd_task = task; return 1; } +out_unlock: xprt_clear_locked(xprt); out_sleep: dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); @@ -309,10 +316,12 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt) { if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) return; - + if (test_bit(XPRT_WRITE_SPACE, &xprt->state)) + goto out_unlock; if (rpc_wake_up_first_on_wq(xprtiod_workqueue, &xprt->sending, __xprt_lock_write_func, xprt)) return; +out_unlock: xprt_clear_locked(xprt); } @@ -320,6 +329,8 @@ static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) { if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) return; + if (test_bit(XPRT_WRITE_SPACE, &xprt->state)) + goto out_unlock; if (xprt_need_congestion_window_wait(xprt)) goto out_unlock; if (rpc_wake_up_first_on_wq(xprtiod_workqueue, &xprt->sending, @@ -510,39 +521,46 @@ EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks); /** * xprt_wait_for_buffer_space - wait for transport output buffer to clear - * @task: task to be put to sleep - * @action: function pointer to be executed after wait + * @xprt: transport * * Note that we only set the timer for the case of RPC_IS_SOFT(), since * we don't in general want to force a socket disconnection due to * an incomplete RPC call transmission. */ -void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action) +void xprt_wait_for_buffer_space(struct rpc_xprt *xprt) { - struct rpc_rqst *req = task->tk_rqstp; - struct rpc_xprt *xprt = req->rq_xprt; - - task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0; - rpc_sleep_on(&xprt->pending, task, action); + set_bit(XPRT_WRITE_SPACE, &xprt->state); } EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space); +static bool +xprt_clear_write_space_locked(struct rpc_xprt *xprt) +{ + if (test_and_clear_bit(XPRT_WRITE_SPACE, &xprt->state)) { + __xprt_lock_write_next(xprt); + dprintk("RPC: write space: waking waiting task on " + "xprt %p\n", xprt); + return true; + } + return false; +} + /** * xprt_write_space - wake the task waiting for transport output buffer space * @xprt: transport with waiting tasks * * Can be called in a soft IRQ context, so xprt_write_space never sleeps. */ -void xprt_write_space(struct rpc_xprt *xprt) +bool xprt_write_space(struct rpc_xprt *xprt) { + bool ret; + + if (!test_bit(XPRT_WRITE_SPACE, &xprt->state)) + return false; spin_lock_bh(&xprt->transport_lock); - if (xprt->snd_task) { - dprintk("RPC: write space: waking waiting task on " - "xprt %p\n", xprt); - rpc_wake_up_queued_task_on_wq(xprtiod_workqueue, - &xprt->pending, xprt->snd_task); - } + ret = xprt_clear_write_space_locked(xprt); spin_unlock_bh(&xprt->transport_lock); + return ret; } EXPORT_SYMBOL_GPL(xprt_write_space); @@ -653,6 +671,7 @@ void xprt_disconnect_done(struct rpc_xprt *xprt) dprintk("RPC: disconnected transport %p\n", xprt); spin_lock_bh(&xprt->transport_lock); xprt_clear_connected(xprt); + xprt_clear_write_space_locked(xprt); xprt_wake_pending_tasks(xprt, -EAGAIN); spin_unlock_bh(&xprt->transport_lock); } @@ -1326,9 +1345,7 @@ xprt_transmit(struct rpc_task *task) if (!xprt_request_data_received(task) || test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) continue; - } else if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) - rpc_wake_up_queued_task(&xprt->pending, task); - else + } else if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) task->tk_status = status; break; } diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 0020dc401215..53fa95d60015 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -866,7 +866,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) out_err: switch (ret) { case -EAGAIN: - xprt_wait_for_buffer_space(rqst->rq_task, NULL); + xprt_wait_for_buffer_space(rqst->rq_xprt); break; case -ENOBUFS: break; diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index d1618c70edb4..35a8c3aab302 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -224,12 +224,7 @@ xprt_rdma_bc_send_request(struct rpc_rqst *rqst, struct rpc_task *task) dprintk("svcrdma: sending bc call with xid: %08x\n", be32_to_cpu(rqst->rq_xid)); - if (!mutex_trylock(&sxprt->xpt_mutex)) { - rpc_sleep_on(&sxprt->xpt_bc_pending, task, NULL); - if (!mutex_trylock(&sxprt->xpt_mutex)) - return -EAGAIN; - rpc_wake_up_queued_task(&sxprt->xpt_bc_pending, task); - } + mutex_lock(&sxprt->xpt_mutex); ret = -ENOTCONN; rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index f54e8110f4c6..ef8d0e81cbda 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -440,20 +440,12 @@ out: return err; } -static void xs_nospace_callback(struct rpc_task *task) -{ - struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt); - - transport->inet->sk_write_pending--; -} - /** - * xs_nospace - place task on wait queue if transmit was incomplete + * xs_nospace - handle transmit was incomplete * @req: pointer to RPC request - * @task: task to put to sleep * */ -static int xs_nospace(struct rpc_rqst *req, struct rpc_task *task) +static int xs_nospace(struct rpc_rqst *req) { struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -461,7 +453,8 @@ static int xs_nospace(struct rpc_rqst *req, struct rpc_task *task) int ret = -EAGAIN; dprintk("RPC: %5u xmit incomplete (%u left of %u)\n", - task->tk_pid, req->rq_slen - transport->xmit.offset, + req->rq_task->tk_pid, + req->rq_slen - transport->xmit.offset, req->rq_slen); /* Protect against races with write_space */ @@ -471,7 +464,7 @@ static int xs_nospace(struct rpc_rqst *req, struct rpc_task *task) if (xprt_connected(xprt)) { /* wait for more buffer space */ sk->sk_write_pending++; - xprt_wait_for_buffer_space(task, xs_nospace_callback); + xprt_wait_for_buffer_space(xprt); } else ret = -ENOTCONN; @@ -569,7 +562,7 @@ static int xs_local_send_request(struct rpc_rqst *req, struct rpc_task *task) case -ENOBUFS: break; case -EAGAIN: - status = xs_nospace(req, task); + status = xs_nospace(req); break; default: dprintk("RPC: sendmsg returned unrecognized error %d\n", @@ -642,7 +635,7 @@ process_status: /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_nospace(req, task); + status = xs_nospace(req); break; case -ENETUNREACH: case -ENOBUFS: @@ -765,7 +758,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req, struct rpc_task *task) /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_nospace(req, task); + status = xs_nospace(req); break; case -ECONNRESET: case -ECONNREFUSED: @@ -1672,7 +1665,8 @@ static void xs_write_space(struct sock *sk) if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0) goto out; - xprt_write_space(xprt); + if (xprt_write_space(xprt)) + sk->sk_write_pending--; out: rcu_read_unlock(); } @@ -2725,12 +2719,7 @@ static int bc_send_request(struct rpc_rqst *req, struct rpc_task *task) * Grab the mutex to serialize data as the connection is shared * with the fore channel */ - if (!mutex_trylock(&xprt->xpt_mutex)) { - rpc_sleep_on(&xprt->xpt_bc_pending, task, NULL); - if (!mutex_trylock(&xprt->xpt_mutex)) - return -EAGAIN; - rpc_wake_up_queued_task(&xprt->xpt_bc_pending, task); - } + mutex_lock(&xprt->xpt_mutex); if (test_bit(XPT_DEAD, &xprt->xpt_flags)) len = -ENOTCONN; else -- cgit v1.2.3 From adfa71446dd0943ba376eff3e05c7c89582f8038 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Sep 2018 23:58:59 -0400 Subject: SUNRPC: Cleanup: remove the unused 'task' argument from the request_send() Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 +- net/sunrpc/xprt.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 2 +- net/sunrpc/xprtrdma/transport.c | 4 ++-- net/sunrpc/xprtsock.c | 11 ++++------- 5 files changed, 9 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 5600242ccbf9..823860cce0bc 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -141,7 +141,7 @@ struct rpc_xprt_ops { void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); int (*buf_alloc)(struct rpc_task *task); void (*buf_free)(struct rpc_task *task); - int (*send_request)(struct rpc_rqst *req, struct rpc_task *task); + int (*send_request)(struct rpc_rqst *req); void (*set_retrans_timeout)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); void (*release_request)(struct rpc_task *task); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 55dc5c7069b9..c86a5df6c338 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1283,7 +1283,7 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task) req->rq_ntrans++; connect_cookie = xprt->connect_cookie; - status = xprt->ops->send_request(req, snd_task); + status = xprt->ops->send_request(req); trace_xprt_transmit(xprt, req->rq_xid, status); if (status != 0) { req->rq_ntrans--; diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 35a8c3aab302..992312504cfd 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -215,7 +215,7 @@ drop_connection: * connection. */ static int -xprt_rdma_bc_send_request(struct rpc_rqst *rqst, struct rpc_task *task) +xprt_rdma_bc_send_request(struct rpc_rqst *rqst) { struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt; struct svcxprt_rdma *rdma; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 9ff322e53f37..a5a6a4a353f2 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -693,7 +693,7 @@ xprt_rdma_free(struct rpc_task *task) /** * xprt_rdma_send_request - marshal and send an RPC request - * @task: RPC task with an RPC message in rq_snd_buf + * @rqst: RPC message in rq_snd_buf * * Caller holds the transport's write lock. * @@ -706,7 +706,7 @@ xprt_rdma_free(struct rpc_task *task) * sent. Do not try to send this message again. */ static int -xprt_rdma_send_request(struct rpc_rqst *rqst, struct rpc_task *task) +xprt_rdma_send_request(struct rpc_rqst *rqst) { struct rpc_xprt *xprt = rqst->rq_xprt; struct rpcrdma_req *req = rpcr_to_rdmar(rqst); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ef8d0e81cbda..f16406228ead 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -507,7 +507,6 @@ static inline void xs_encode_stream_record_marker(struct xdr_buf *buf) /** * xs_local_send_request - write an RPC request to an AF_LOCAL socket * @req: pointer to RPC request - * @task: RPC task that manages the state of an RPC request * * Return values: * 0: The request has been sent @@ -516,7 +515,7 @@ static inline void xs_encode_stream_record_marker(struct xdr_buf *buf) * ENOTCONN: Caller needs to invoke connect logic then call again * other: Some other error occured, the request was not sent */ -static int xs_local_send_request(struct rpc_rqst *req, struct rpc_task *task) +static int xs_local_send_request(struct rpc_rqst *req) { struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = @@ -579,7 +578,6 @@ static int xs_local_send_request(struct rpc_rqst *req, struct rpc_task *task) /** * xs_udp_send_request - write an RPC request to a UDP socket * @req: pointer to RPC request - * @task: address of RPC task that manages the state of an RPC request * * Return values: * 0: The request has been sent @@ -588,7 +586,7 @@ static int xs_local_send_request(struct rpc_rqst *req, struct rpc_task *task) * ENOTCONN: Caller needs to invoke connect logic then call again * other: Some other error occurred, the request was not sent */ -static int xs_udp_send_request(struct rpc_rqst *req, struct rpc_task *task) +static int xs_udp_send_request(struct rpc_rqst *req) { struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -656,7 +654,6 @@ process_status: /** * xs_tcp_send_request - write an RPC request to a TCP socket * @req: pointer to RPC request - * @task: address of RPC task that manages the state of an RPC request * * Return values: * 0: The request has been sent @@ -668,7 +665,7 @@ process_status: * XXX: In the case of soft timeouts, should we eventually give up * if sendmsg is not able to make progress? */ -static int xs_tcp_send_request(struct rpc_rqst *req, struct rpc_task *task) +static int xs_tcp_send_request(struct rpc_rqst *req) { struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -2704,7 +2701,7 @@ static int bc_sendto(struct rpc_rqst *req) /* * The send routine. Borrows from svc_send */ -static int bc_send_request(struct rpc_rqst *req, struct rpc_task *task) +static int bc_send_request(struct rpc_rqst *req) { struct svc_xprt *xprt; int len; -- cgit v1.2.3 From 95f7691daa57bbd68caac2bdad79e0b08f4d46c1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 7 Sep 2018 08:35:22 -0400 Subject: SUNRPC: Convert xprt receive queue to use an rbtree If the server is slow, we can find ourselves with quite a lot of entries on the receive queue. Converting the search from an O(n) to O(log(n)) can make a significant difference, particularly since we have to hold a number of locks while searching. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 4 +- net/sunrpc/xprt.c | 93 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 84 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 823860cce0bc..9be399020dab 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -85,7 +85,7 @@ struct rpc_rqst { union { struct list_head rq_list; /* Slot allocation list */ - struct list_head rq_recv; /* Receive queue */ + struct rb_node rq_recv; /* Receive queue */ }; struct list_head rq_xmit; /* Send queue */ @@ -260,7 +260,7 @@ struct rpc_xprt { * backchannel rpc_rqst's */ #endif /* CONFIG_SUNRPC_BACKCHANNEL */ - struct list_head recv_queue; /* Receive queue */ + struct rb_root recv_queue; /* Receive queue */ struct { unsigned long bind_count, /* total number of binds */ diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 11133ba716b9..480461ad0c86 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -753,7 +753,7 @@ static void xprt_schedule_autodisconnect(struct rpc_xprt *xprt) __must_hold(&xprt->transport_lock) { - if (list_empty(&xprt->recv_queue) && xprt_has_timer(xprt)) + if (RB_EMPTY_ROOT(&xprt->recv_queue) && xprt_has_timer(xprt)) mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout); } @@ -763,7 +763,7 @@ xprt_init_autodisconnect(struct timer_list *t) struct rpc_xprt *xprt = from_timer(xprt, t, timer); spin_lock(&xprt->transport_lock); - if (!list_empty(&xprt->recv_queue)) + if (!RB_EMPTY_ROOT(&xprt->recv_queue)) goto out_abort; /* Reset xprt->last_used to avoid connect/autodisconnect cycling */ xprt->last_used = jiffies; @@ -880,6 +880,75 @@ static void xprt_connect_status(struct rpc_task *task) } } +enum xprt_xid_rb_cmp { + XID_RB_EQUAL, + XID_RB_LEFT, + XID_RB_RIGHT, +}; +static enum xprt_xid_rb_cmp +xprt_xid_cmp(__be32 xid1, __be32 xid2) +{ + if (xid1 == xid2) + return XID_RB_EQUAL; + if ((__force u32)xid1 < (__force u32)xid2) + return XID_RB_LEFT; + return XID_RB_RIGHT; +} + +static struct rpc_rqst * +xprt_request_rb_find(struct rpc_xprt *xprt, __be32 xid) +{ + struct rb_node *n = xprt->recv_queue.rb_node; + struct rpc_rqst *req; + + while (n != NULL) { + req = rb_entry(n, struct rpc_rqst, rq_recv); + switch (xprt_xid_cmp(xid, req->rq_xid)) { + case XID_RB_LEFT: + n = n->rb_left; + break; + case XID_RB_RIGHT: + n = n->rb_right; + break; + case XID_RB_EQUAL: + return req; + } + } + return NULL; +} + +static void +xprt_request_rb_insert(struct rpc_xprt *xprt, struct rpc_rqst *new) +{ + struct rb_node **p = &xprt->recv_queue.rb_node; + struct rb_node *n = NULL; + struct rpc_rqst *req; + + while (*p != NULL) { + n = *p; + req = rb_entry(n, struct rpc_rqst, rq_recv); + switch(xprt_xid_cmp(new->rq_xid, req->rq_xid)) { + case XID_RB_LEFT: + p = &n->rb_left; + break; + case XID_RB_RIGHT: + p = &n->rb_right; + break; + case XID_RB_EQUAL: + WARN_ON_ONCE(new != req); + return; + } + } + rb_link_node(&new->rq_recv, n, p); + rb_insert_color(&new->rq_recv, &xprt->recv_queue); +} + +static void +xprt_request_rb_remove(struct rpc_xprt *xprt, struct rpc_rqst *req) +{ + rb_erase(&req->rq_recv, &xprt->recv_queue); +} + /** * xprt_lookup_rqst - find an RPC request corresponding to an XID * @xprt: transport on which the original request was transmitted @@ -891,12 +960,12 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) { struct rpc_rqst *entry; - list_for_each_entry(entry, &xprt->recv_queue, rq_recv) - if (entry->rq_xid == xid) { - trace_xprt_lookup_rqst(xprt, xid, 0); - entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime); - return entry; - } + entry = xprt_request_rb_find(xprt, xid); + if (entry != NULL) { + trace_xprt_lookup_rqst(xprt, xid, 0); + entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime); + return entry; + } dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n", ntohl(xid)); @@ -981,7 +1050,7 @@ xprt_request_enqueue_receive(struct rpc_task *task) sizeof(req->rq_private_buf)); /* Add request to the receive list */ - list_add_tail(&req->rq_recv, &xprt->recv_queue); + xprt_request_rb_insert(xprt, req); set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate); spin_unlock(&xprt->queue_lock); @@ -999,8 +1068,10 @@ xprt_request_enqueue_receive(struct rpc_task *task) static void xprt_request_dequeue_receive_locked(struct rpc_task *task) { + struct rpc_rqst *req = task->tk_rqstp; + if (test_and_clear_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) - list_del(&task->tk_rqstp->rq_recv); + xprt_request_rb_remove(req->rq_xprt, req); } /** @@ -1711,7 +1782,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net) spin_lock_init(&xprt->queue_lock); INIT_LIST_HEAD(&xprt->free); - INIT_LIST_HEAD(&xprt->recv_queue); + xprt->recv_queue = RB_ROOT; INIT_LIST_HEAD(&xprt->xmit_queue); #if defined(CONFIG_SUNRPC_BACKCHANNEL) spin_lock_init(&xprt->bc_pa_lock); -- cgit v1.2.3 From f42f7c283078ce3c1e8368b140e270755b1ae313 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 8 Sep 2018 22:09:48 -0400 Subject: SUNRPC: Fix priority queue fairness Fix up the priority queue to not batch by owner, but by queue, so that we allow '1 << priority' elements to be dequeued before switching to the next priority queue. The owner field is still used to wake up requests in round robin order by owner to avoid single processes hogging the RPC layer by loading the queues. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 2 - net/sunrpc/sched.c | 109 +++++++++++++++++++++---------------------- 2 files changed, 54 insertions(+), 57 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 8840a420cf4c..7b540c066594 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -189,7 +189,6 @@ struct rpc_timer { struct rpc_wait_queue { spinlock_t lock; struct list_head tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */ - pid_t owner; /* process id of last task serviced */ unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ unsigned char priority; /* current priority */ unsigned char nr; /* # tasks remaining for cookie */ @@ -205,7 +204,6 @@ struct rpc_wait_queue { * from a single cookie. The aim is to improve * performance of NFS operations such as read/write. */ -#define RPC_BATCH_COUNT 16 #define RPC_IS_PRIORITY(q) ((q)->maxpriority > 0) /* diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 9a8ec012b449..57ca5bead1cb 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -99,64 +99,78 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); } -static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue) -{ - struct list_head *q = &queue->tasks[queue->priority]; - struct rpc_task *task; - - if (!list_empty(q)) { - task = list_first_entry(q, struct rpc_task, u.tk_wait.list); - if (task->tk_owner == queue->owner) - list_move_tail(&task->u.tk_wait.list, q); - } -} - static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) { if (queue->priority != priority) { - /* Fairness: rotate the list when changing priority */ - rpc_rotate_queue_owner(queue); queue->priority = priority; + queue->nr = 1U << priority; } } -static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid) -{ - queue->owner = pid; - queue->nr = RPC_BATCH_COUNT; -} - static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue) { rpc_set_waitqueue_priority(queue, queue->maxpriority); - rpc_set_waitqueue_owner(queue, 0); } /* - * Add new request to a priority queue. + * Add a request to a queue list */ -static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, - struct rpc_task *task, - unsigned char queue_priority) +static void +__rpc_list_enqueue_task(struct list_head *q, struct rpc_task *task) { - struct list_head *q; struct rpc_task *t; - INIT_LIST_HEAD(&task->u.tk_wait.links); - if (unlikely(queue_priority > queue->maxpriority)) - queue_priority = queue->maxpriority; - if (queue_priority > queue->priority) - rpc_set_waitqueue_priority(queue, queue_priority); - q = &queue->tasks[queue_priority]; list_for_each_entry(t, q, u.tk_wait.list) { if (t->tk_owner == task->tk_owner) { - list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links); + list_add_tail(&task->u.tk_wait.links, + &t->u.tk_wait.links); + /* Cache the queue head in task->u.tk_wait.list */ + task->u.tk_wait.list.next = q; + task->u.tk_wait.list.prev = NULL; return; } } + INIT_LIST_HEAD(&task->u.tk_wait.links); list_add_tail(&task->u.tk_wait.list, q); } +/* + * Remove request from a queue list + */ +static void +__rpc_list_dequeue_task(struct rpc_task *task) +{ + struct list_head *q; + struct rpc_task *t; + + if (task->u.tk_wait.list.prev == NULL) { + list_del(&task->u.tk_wait.links); + return; + } + if (!list_empty(&task->u.tk_wait.links)) { + t = list_first_entry(&task->u.tk_wait.links, + struct rpc_task, + u.tk_wait.links); + /* Assume __rpc_list_enqueue_task() cached the queue head */ + q = t->u.tk_wait.list.next; + list_add_tail(&t->u.tk_wait.list, q); + list_del(&task->u.tk_wait.links); + } + list_del(&task->u.tk_wait.list); +} + +/* + * Add new request to a priority queue. + */ +static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, + struct rpc_task *task, + unsigned char queue_priority) +{ + if (unlikely(queue_priority > queue->maxpriority)) + queue_priority = queue->maxpriority; + __rpc_list_enqueue_task(&queue->tasks[queue_priority], task); +} + /* * Add new request to wait queue. * @@ -194,13 +208,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, */ static void __rpc_remove_wait_queue_priority(struct rpc_task *task) { - struct rpc_task *t; - - if (!list_empty(&task->u.tk_wait.links)) { - t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list); - list_move(&t->u.tk_wait.list, &task->u.tk_wait.list); - list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links); - } + __rpc_list_dequeue_task(task); } /* @@ -212,7 +220,8 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas __rpc_disable_timer(queue, task); if (RPC_IS_PRIORITY(queue)) __rpc_remove_wait_queue_priority(task); - list_del(&task->u.tk_wait.list); + else + list_del(&task->u.tk_wait.list); queue->qlen--; dprintk("RPC: %5u removed from queue %p \"%s\"\n", task->tk_pid, queue, rpc_qname(queue)); @@ -545,17 +554,9 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q * Service a batch of tasks from a single owner. */ q = &queue->tasks[queue->priority]; - if (!list_empty(q)) { - task = list_entry(q->next, struct rpc_task, u.tk_wait.list); - if (queue->owner == task->tk_owner) { - if (--queue->nr) - goto out; - list_move_tail(&task->u.tk_wait.list, q); - } - /* - * Check if we need to switch queues. - */ - goto new_owner; + if (!list_empty(q) && --queue->nr) { + task = list_first_entry(q, struct rpc_task, u.tk_wait.list); + goto out; } /* @@ -567,7 +568,7 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q else q = q - 1; if (!list_empty(q)) { - task = list_entry(q->next, struct rpc_task, u.tk_wait.list); + task = list_first_entry(q, struct rpc_task, u.tk_wait.list); goto new_queue; } } while (q != &queue->tasks[queue->priority]); @@ -577,8 +578,6 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q new_queue: rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0])); -new_owner: - rpc_set_waitqueue_owner(queue, task->tk_owner); out: return task; } -- cgit v1.2.3 From 431f6eb3570f286036bc8718a908a283f5d99473 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Sep 2018 00:08:20 -0400 Subject: SUNRPC: Add a label for RPC calls that require allocation on receive If the RPC call relies on the receive call allocating pages as buffers, then let's label it so that we a) Don't leak memory by allocating pages for requests that do not expect this behaviour b) Can optimise for the common case where calls do not require allocation. Signed-off-by: Trond Myklebust --- fs/nfs/nfs3xdr.c | 4 +++- include/linux/sunrpc/xdr.h | 1 + net/sunrpc/auth_gss/gss_rpc_xdr.c | 1 + net/sunrpc/socklib.c | 2 +- 4 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 64e4fa33d89f..d8c4c10b15f7 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1364,10 +1364,12 @@ static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req, encode_nfs_fh3(xdr, args->fh); encode_uint32(xdr, args->mask); - if (args->mask & (NFS_ACL | NFS_DFACL)) + if (args->mask & (NFS_ACL | NFS_DFACL)) { prepare_reply_buffer(req, args->pages, 0, NFSACL_MAXPAGES << PAGE_SHIFT, ACL3_getaclres_sz); + req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; + } } static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req, diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 2bd68177a442..431829233392 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -58,6 +58,7 @@ struct xdr_buf { flags; /* Flags for data disposition */ #define XDRBUF_READ 0x01 /* target of file read */ #define XDRBUF_WRITE 0x02 /* source of file write */ +#define XDRBUF_SPARSE_PAGES 0x04 /* Page array is sparse */ unsigned int buflen, /* Total length of storage buffer */ len; /* Length of XDR encoded message */ diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index 444380f968f1..006062ad5f58 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -784,6 +784,7 @@ void gssx_enc_accept_sec_context(struct rpc_rqst *req, xdr_inline_pages(&req->rq_rcv_buf, PAGE_SIZE/2 /* pretty arbitrary */, arg->pages, 0 /* page base */, arg->npages * PAGE_SIZE); + req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; done: if (err) dprintk("RPC: gssx_enc_accept_sec_context: %d\n", err); diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index f217c348b341..08f00a98151f 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -104,7 +104,7 @@ ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct /* ACL likes to be lazy in allocating pages - ACLs * are small by default but can get huge. */ - if (unlikely(*ppage == NULL)) { + if ((xdr->flags & XDRBUF_SPARSE_PAGES) && *ppage == NULL) { *ppage = alloc_page(GFP_ATOMIC); if (unlikely(*ppage == NULL)) { if (copied == 0) -- cgit v1.2.3 From 9d96acbc7f376dc1ffcedca0c349dd3389187a38 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Sep 2018 12:22:04 -0400 Subject: SUNRPC: Add a bvec array to struct xdr_buf for use with iovec_iter() Add a bvec array to struct xdr_buf, and have the client allocate it when we need to receive data into pages. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 7 +++++++ include/linux/sunrpc/xprt.h | 2 ++ net/sunrpc/clnt.c | 4 +++- net/sunrpc/xdr.c | 34 ++++++++++++++++++++++++++++++++++ net/sunrpc/xprt.c | 17 +++++++++++++++++ 5 files changed, 63 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 431829233392..745587132a87 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -18,6 +18,7 @@ #include #include +struct bio_vec; struct rpc_rqst; /* @@ -52,6 +53,7 @@ struct xdr_buf { struct kvec head[1], /* RPC header + non-page data */ tail[1]; /* Appended after page data */ + struct bio_vec *bvec; struct page ** pages; /* Array of pages */ unsigned int page_base, /* Start of page data */ page_len, /* Length of page data */ @@ -70,6 +72,8 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) buf->head[0].iov_base = start; buf->head[0].iov_len = len; buf->tail[0].iov_len = 0; + buf->bvec = NULL; + buf->pages = NULL; buf->page_len = 0; buf->flags = 0; buf->len = 0; @@ -116,6 +120,9 @@ __be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); void xdr_inline_pages(struct xdr_buf *, unsigned int, struct page **, unsigned int, unsigned int); void xdr_terminate_string(struct xdr_buf *, const u32); +size_t xdr_buf_pagecount(struct xdr_buf *buf); +int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp); +void xdr_free_bvec(struct xdr_buf *buf); static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len) { diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 9be399020dab..a4ab4f8d9140 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -141,6 +141,7 @@ struct rpc_xprt_ops { void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); int (*buf_alloc)(struct rpc_task *task); void (*buf_free)(struct rpc_task *task); + void (*prepare_request)(struct rpc_rqst *req); int (*send_request)(struct rpc_rqst *req); void (*set_retrans_timeout)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); @@ -343,6 +344,7 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req); +void xprt_request_prepare(struct rpc_rqst *req); bool xprt_prepare_transmit(struct rpc_task *task); void xprt_request_enqueue_transmit(struct rpc_task *task); void xprt_request_enqueue_receive(struct rpc_task *task); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0c4b2e7d791f..ae3b8145da35 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1753,6 +1753,8 @@ rpc_xdr_encode(struct rpc_task *task) task->tk_status = rpcauth_wrap_req(task, encode, req, p, task->tk_msg.rpc_argp); + if (task->tk_status == 0) + xprt_request_prepare(req); } /* @@ -1768,7 +1770,7 @@ call_encode(struct rpc_task *task) /* Did the encode result in an error condition? */ if (task->tk_status != 0) { /* Was the error nonfatal? */ - if (task->tk_status == -EAGAIN) + if (task->tk_status == -EAGAIN || task->tk_status == -ENOMEM) rpc_delay(task, HZ >> 4); else rpc_exit(task, task->tk_status); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 30afbd236656..2bbb8d38d2bf 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -15,6 +15,7 @@ #include #include #include +#include /* * XDR functions for basic NFS types @@ -128,6 +129,39 @@ xdr_terminate_string(struct xdr_buf *buf, const u32 len) } EXPORT_SYMBOL_GPL(xdr_terminate_string); +size_t +xdr_buf_pagecount(struct xdr_buf *buf) +{ + if (!buf->page_len) + return 0; + return (buf->page_base + buf->page_len + PAGE_SIZE - 1) >> PAGE_SHIFT; +} + +int +xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp) +{ + size_t i, n = xdr_buf_pagecount(buf); + + if (n != 0 && buf->bvec == NULL) { + buf->bvec = kmalloc_array(n, sizeof(buf->bvec[0]), gfp); + if (!buf->bvec) + return -ENOMEM; + for (i = 0; i < n; i++) { + buf->bvec[i].bv_page = buf->pages[i]; + buf->bvec[i].bv_len = PAGE_SIZE; + buf->bvec[i].bv_offset = 0; + } + } + return 0; +} + +void +xdr_free_bvec(struct xdr_buf *buf) +{ + kfree(buf->bvec); + buf->bvec = NULL; +} + void xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, struct page **pages, unsigned int base, unsigned int len) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 7333874c6595..7ee9f1e996db 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1263,6 +1263,22 @@ xprt_request_dequeue_transmit(struct rpc_task *task) spin_unlock(&xprt->queue_lock); } +/** + * xprt_request_prepare - prepare an encoded request for transport + * @req: pointer to rpc_rqst + * + * Calls into the transport layer to do whatever is needed to prepare + * the request for transmission or receive. + */ +void +xprt_request_prepare(struct rpc_rqst *req) +{ + struct rpc_xprt *xprt = req->rq_xprt; + + if (xprt->ops->prepare_request) + xprt->ops->prepare_request(req); +} + /** * xprt_request_need_retransmit - Test if a task needs retransmission * @task: pointer to rpc_task @@ -1727,6 +1743,7 @@ void xprt_release(struct rpc_task *task) if (req->rq_buffer) xprt->ops->buf_free(task); xprt_inject_disconnect(xprt); + xdr_free_bvec(&req->rq_rcv_buf); if (req->rq_cred != NULL) put_rpccred(req->rq_cred); task->tk_rqstp = NULL; -- cgit v1.2.3 From 277e4ab7d530bf287e02b65cfcd3ea8f489784f6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 14 Sep 2018 09:49:06 -0400 Subject: SUNRPC: Simplify TCP receive code by switching to using iterators Most of this code should also be reusable with other socket types. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 19 +- include/trace/events/sunrpc.h | 15 +- net/sunrpc/xprtsock.c | 697 +++++++++++++++++++--------------------- 3 files changed, 338 insertions(+), 393 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 005cfb6e7238..458bfe0137f5 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -31,15 +31,16 @@ struct sock_xprt { * State of TCP reply receive */ struct { - __be32 fraghdr, + struct { + __be32 fraghdr, xid, calldir; + } __attribute__((packed)); u32 offset, len; - unsigned long copied, - flags; + unsigned long copied; } recv; /* @@ -76,21 +77,9 @@ struct sock_xprt { void (*old_error_report)(struct sock *); }; -/* - * TCP receive state flags - */ -#define TCP_RCV_LAST_FRAG (1UL << 0) -#define TCP_RCV_COPY_FRAGHDR (1UL << 1) -#define TCP_RCV_COPY_XID (1UL << 2) -#define TCP_RCV_COPY_DATA (1UL << 3) -#define TCP_RCV_READ_CALLDIR (1UL << 4) -#define TCP_RCV_COPY_CALLDIR (1UL << 5) - /* * TCP RPC flags */ -#define TCP_RPC_REPLY (1UL << 6) - #define XPRT_SOCK_CONNECTING 1U #define XPRT_SOCK_DATA_READY (2) #define XPRT_SOCK_UPD_TIMEOUT (3) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 0aa347194e0f..19e08d12696c 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -497,16 +497,6 @@ TRACE_EVENT(xs_tcp_data_ready, __get_str(port), __entry->err, __entry->total) ); -#define rpc_show_sock_xprt_flags(flags) \ - __print_flags(flags, "|", \ - { TCP_RCV_LAST_FRAG, "TCP_RCV_LAST_FRAG" }, \ - { TCP_RCV_COPY_FRAGHDR, "TCP_RCV_COPY_FRAGHDR" }, \ - { TCP_RCV_COPY_XID, "TCP_RCV_COPY_XID" }, \ - { TCP_RCV_COPY_DATA, "TCP_RCV_COPY_DATA" }, \ - { TCP_RCV_READ_CALLDIR, "TCP_RCV_READ_CALLDIR" }, \ - { TCP_RCV_COPY_CALLDIR, "TCP_RCV_COPY_CALLDIR" }, \ - { TCP_RPC_REPLY, "TCP_RPC_REPLY" }) - TRACE_EVENT(xs_tcp_data_recv, TP_PROTO(struct sock_xprt *xs), @@ -516,7 +506,6 @@ TRACE_EVENT(xs_tcp_data_recv, __string(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR]) __string(port, xs->xprt.address_strings[RPC_DISPLAY_PORT]) __field(u32, xid) - __field(unsigned long, flags) __field(unsigned long, copied) __field(unsigned int, reclen) __field(unsigned long, offset) @@ -526,15 +515,13 @@ TRACE_EVENT(xs_tcp_data_recv, __assign_str(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR]); __assign_str(port, xs->xprt.address_strings[RPC_DISPLAY_PORT]); __entry->xid = be32_to_cpu(xs->recv.xid); - __entry->flags = xs->recv.flags; __entry->copied = xs->recv.copied; __entry->reclen = xs->recv.len; __entry->offset = xs->recv.offset; ), - TP_printk("peer=[%s]:%s xid=0x%08x flags=%s copied=%lu reclen=%u offset=%lu", + TP_printk("peer=[%s]:%s xid=0x%08x copied=%lu reclen=%u offset=%lu", __get_str(addr), __get_str(port), __entry->xid, - rpc_show_sock_xprt_flags(__entry->flags), __entry->copied, __entry->reclen, __entry->offset) ); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index f16406228ead..06aa75008708 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -47,13 +47,13 @@ #include #include #include +#include +#include #include #include "sunrpc.h" -#define RPC_TCP_READ_CHUNK_SZ (3*512*1024) - static void xs_close(struct rpc_xprt *xprt); static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, struct socket *sock); @@ -325,6 +325,323 @@ static void xs_free_peer_addresses(struct rpc_xprt *xprt) } } +static size_t +xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp) +{ + size_t i,n; + + if (!(buf->flags & XDRBUF_SPARSE_PAGES)) + return want; + if (want > buf->page_len) + want = buf->page_len; + n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < n; i++) { + if (buf->pages[i]) + continue; + buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp); + if (!buf->pages[i]) { + buf->page_len = (i * PAGE_SIZE) - buf->page_base; + return buf->page_len; + } + } + return want; +} + +static ssize_t +xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek) +{ + ssize_t ret; + if (seek != 0) + iov_iter_advance(&msg->msg_iter, seek); + ret = sock_recvmsg(sock, msg, flags); + return ret > 0 ? ret + seek : ret; +} + +static ssize_t +xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags, + struct kvec *kvec, size_t count, size_t seek) +{ + iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, kvec, 1, count); + return xs_sock_recvmsg(sock, msg, flags, seek); +} + +static ssize_t +xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags, + struct bio_vec *bvec, unsigned long nr, size_t count, + size_t seek) +{ + iov_iter_bvec(&msg->msg_iter, READ | ITER_BVEC, bvec, nr, count); + return xs_sock_recvmsg(sock, msg, flags, seek); +} + +static ssize_t +xs_read_discard(struct socket *sock, struct msghdr *msg, int flags, + size_t count) +{ + struct kvec kvec = { 0 }; + return xs_read_kvec(sock, msg, flags | MSG_TRUNC, &kvec, count, 0); +} + +static ssize_t +xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, + struct xdr_buf *buf, size_t count, size_t seek, size_t *read) +{ + size_t want, seek_init = seek, offset = 0; + ssize_t ret; + + if (seek < buf->head[0].iov_len) { + want = min_t(size_t, count, buf->head[0].iov_len); + ret = xs_read_kvec(sock, msg, flags, &buf->head[0], want, seek); + if (ret <= 0) + goto sock_err; + offset += ret; + if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) + goto out; + if (ret != want) + goto eagain; + seek = 0; + } else { + seek -= buf->head[0].iov_len; + offset += buf->head[0].iov_len; + } + if (seek < buf->page_len) { + want = xs_alloc_sparse_pages(buf, + min_t(size_t, count - offset, buf->page_len), + GFP_NOWAIT); + ret = xs_read_bvec(sock, msg, flags, buf->bvec, + xdr_buf_pagecount(buf), + want + buf->page_base, + seek + buf->page_base); + if (ret <= 0) + goto sock_err; + offset += ret - buf->page_base; + if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) + goto out; + if (ret != want) + goto eagain; + seek = 0; + } else { + seek -= buf->page_len; + offset += buf->page_len; + } + if (seek < buf->tail[0].iov_len) { + want = min_t(size_t, count - offset, buf->tail[0].iov_len); + ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek); + if (ret <= 0) + goto sock_err; + offset += ret; + if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) + goto out; + if (ret != want) + goto eagain; + } else + offset += buf->tail[0].iov_len; + ret = -EMSGSIZE; + msg->msg_flags |= MSG_TRUNC; +out: + *read = offset - seek_init; + return ret; +eagain: + ret = -EAGAIN; + goto out; +sock_err: + offset += seek; + goto out; +} + +static void +xs_read_header(struct sock_xprt *transport, struct xdr_buf *buf) +{ + if (!transport->recv.copied) { + if (buf->head[0].iov_len >= transport->recv.offset) + memcpy(buf->head[0].iov_base, + &transport->recv.xid, + transport->recv.offset); + transport->recv.copied = transport->recv.offset; + } +} + +static bool +xs_read_stream_request_done(struct sock_xprt *transport) +{ + return transport->recv.fraghdr & cpu_to_be32(RPC_LAST_STREAM_FRAGMENT); +} + +static ssize_t +xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg, + int flags, struct rpc_rqst *req) +{ + struct xdr_buf *buf = &req->rq_private_buf; + size_t want, read; + ssize_t ret; + + xs_read_header(transport, buf); + + want = transport->recv.len - transport->recv.offset; + ret = xs_read_xdr_buf(transport->sock, msg, flags, buf, + transport->recv.copied + want, transport->recv.copied, + &read); + transport->recv.offset += read; + transport->recv.copied += read; + if (transport->recv.offset == transport->recv.len) { + if (xs_read_stream_request_done(transport)) + msg->msg_flags |= MSG_EOR; + return transport->recv.copied; + } + + switch (ret) { + case -EMSGSIZE: + return transport->recv.copied; + case 0: + return -ESHUTDOWN; + default: + if (ret < 0) + return ret; + } + return -EAGAIN; +} + +static size_t +xs_read_stream_headersize(bool isfrag) +{ + if (isfrag) + return sizeof(__be32); + return 3 * sizeof(__be32); +} + +static ssize_t +xs_read_stream_header(struct sock_xprt *transport, struct msghdr *msg, + int flags, size_t want, size_t seek) +{ + struct kvec kvec = { + .iov_base = &transport->recv.fraghdr, + .iov_len = want, + }; + return xs_read_kvec(transport->sock, msg, flags, &kvec, want, seek); +} + +#if defined(CONFIG_SUNRPC_BACKCHANNEL) +static ssize_t +xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags) +{ + struct rpc_xprt *xprt = &transport->xprt; + struct rpc_rqst *req; + ssize_t ret; + + /* Look up and lock the request corresponding to the given XID */ + req = xprt_lookup_bc_request(xprt, transport->recv.xid); + if (!req) { + printk(KERN_WARNING "Callback slot table overflowed\n"); + return -ESHUTDOWN; + } + + ret = xs_read_stream_request(transport, msg, flags, req); + if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) + xprt_complete_bc_request(req, ret); + + return ret; +} +#else /* CONFIG_SUNRPC_BACKCHANNEL */ +static ssize_t +xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags) +{ + return -ESHUTDOWN; +} +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ + +static ssize_t +xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags) +{ + struct rpc_xprt *xprt = &transport->xprt; + struct rpc_rqst *req; + ssize_t ret = 0; + + /* Look up and lock the request corresponding to the given XID */ + spin_lock(&xprt->queue_lock); + req = xprt_lookup_rqst(xprt, transport->recv.xid); + if (!req) { + msg->msg_flags |= MSG_TRUNC; + goto out; + } + xprt_pin_rqst(req); + spin_unlock(&xprt->queue_lock); + + ret = xs_read_stream_request(transport, msg, flags, req); + + spin_lock(&xprt->queue_lock); + if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) + xprt_complete_rqst(req->rq_task, ret); + xprt_unpin_rqst(req); +out: + spin_unlock(&xprt->queue_lock); + return ret; +} + +static ssize_t +xs_read_stream(struct sock_xprt *transport, int flags) +{ + struct msghdr msg = { 0 }; + size_t want, read = 0; + ssize_t ret = 0; + + if (transport->recv.len == 0) { + want = xs_read_stream_headersize(transport->recv.copied != 0); + ret = xs_read_stream_header(transport, &msg, flags, want, + transport->recv.offset); + if (ret <= 0) + goto out_err; + transport->recv.offset = ret; + if (ret != want) { + ret = -EAGAIN; + goto out_err; + } + transport->recv.len = be32_to_cpu(transport->recv.fraghdr) & + RPC_FRAGMENT_SIZE_MASK; + transport->recv.offset -= sizeof(transport->recv.fraghdr); + read = ret; + } + + switch (be32_to_cpu(transport->recv.calldir)) { + case RPC_CALL: + ret = xs_read_stream_call(transport, &msg, flags); + break; + case RPC_REPLY: + ret = xs_read_stream_reply(transport, &msg, flags); + } + if (msg.msg_flags & MSG_TRUNC) { + transport->recv.calldir = cpu_to_be32(-1); + transport->recv.copied = -1; + } + if (ret < 0) + goto out_err; + read += ret; + if (transport->recv.offset < transport->recv.len) { + ret = xs_read_discard(transport->sock, &msg, flags, + transport->recv.len - transport->recv.offset); + if (ret <= 0) + goto out_err; + transport->recv.offset += ret; + read += ret; + if (transport->recv.offset != transport->recv.len) + return -EAGAIN; + } + if (xs_read_stream_request_done(transport)) { + trace_xs_tcp_data_recv(transport); + transport->recv.copied = 0; + } + transport->recv.offset = 0; + transport->recv.len = 0; + return read; +out_err: + switch (ret) { + case 0: + case -ESHUTDOWN: + xprt_force_disconnect(&transport->xprt); + return -ESHUTDOWN; + } + return ret; +} + #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more) @@ -484,6 +801,12 @@ static int xs_nospace(struct rpc_rqst *req) return ret; } +static void +xs_stream_prepare_request(struct rpc_rqst *req) +{ + req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_NOIO); +} + /* * Determine if the previous message in the stream was aborted before it * could complete transmission. @@ -1157,263 +1480,7 @@ static void xs_tcp_force_close(struct rpc_xprt *xprt) xprt_force_disconnect(xprt); } -static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) -{ - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - size_t len, used; - char *p; - - p = ((char *) &transport->recv.fraghdr) + transport->recv.offset; - len = sizeof(transport->recv.fraghdr) - transport->recv.offset; - used = xdr_skb_read_bits(desc, p, len); - transport->recv.offset += used; - if (used != len) - return; - - transport->recv.len = ntohl(transport->recv.fraghdr); - if (transport->recv.len & RPC_LAST_STREAM_FRAGMENT) - transport->recv.flags |= TCP_RCV_LAST_FRAG; - else - transport->recv.flags &= ~TCP_RCV_LAST_FRAG; - transport->recv.len &= RPC_FRAGMENT_SIZE_MASK; - - transport->recv.flags &= ~TCP_RCV_COPY_FRAGHDR; - transport->recv.offset = 0; - - /* Sanity check of the record length */ - if (unlikely(transport->recv.len < 8)) { - dprintk("RPC: invalid TCP record fragment length\n"); - xs_tcp_force_close(xprt); - return; - } - dprintk("RPC: reading TCP record fragment of length %d\n", - transport->recv.len); -} - -static void xs_tcp_check_fraghdr(struct sock_xprt *transport) -{ - if (transport->recv.offset == transport->recv.len) { - transport->recv.flags |= TCP_RCV_COPY_FRAGHDR; - transport->recv.offset = 0; - if (transport->recv.flags & TCP_RCV_LAST_FRAG) { - transport->recv.flags &= ~TCP_RCV_COPY_DATA; - transport->recv.flags |= TCP_RCV_COPY_XID; - transport->recv.copied = 0; - } - } -} - -static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_reader *desc) -{ - size_t len, used; - char *p; - - len = sizeof(transport->recv.xid) - transport->recv.offset; - dprintk("RPC: reading XID (%zu bytes)\n", len); - p = ((char *) &transport->recv.xid) + transport->recv.offset; - used = xdr_skb_read_bits(desc, p, len); - transport->recv.offset += used; - if (used != len) - return; - transport->recv.flags &= ~TCP_RCV_COPY_XID; - transport->recv.flags |= TCP_RCV_READ_CALLDIR; - transport->recv.copied = 4; - dprintk("RPC: reading %s XID %08x\n", - (transport->recv.flags & TCP_RPC_REPLY) ? "reply for" - : "request with", - ntohl(transport->recv.xid)); - xs_tcp_check_fraghdr(transport); -} - -static inline void xs_tcp_read_calldir(struct sock_xprt *transport, - struct xdr_skb_reader *desc) -{ - size_t len, used; - u32 offset; - char *p; - - /* - * We want transport->recv.offset to be 8 at the end of this routine - * (4 bytes for the xid and 4 bytes for the call/reply flag). - * When this function is called for the first time, - * transport->recv.offset is 4 (after having already read the xid). - */ - offset = transport->recv.offset - sizeof(transport->recv.xid); - len = sizeof(transport->recv.calldir) - offset; - dprintk("RPC: reading CALL/REPLY flag (%zu bytes)\n", len); - p = ((char *) &transport->recv.calldir) + offset; - used = xdr_skb_read_bits(desc, p, len); - transport->recv.offset += used; - if (used != len) - return; - transport->recv.flags &= ~TCP_RCV_READ_CALLDIR; - /* - * We don't yet have the XDR buffer, so we will write the calldir - * out after we get the buffer from the 'struct rpc_rqst' - */ - switch (ntohl(transport->recv.calldir)) { - case RPC_REPLY: - transport->recv.flags |= TCP_RCV_COPY_CALLDIR; - transport->recv.flags |= TCP_RCV_COPY_DATA; - transport->recv.flags |= TCP_RPC_REPLY; - break; - case RPC_CALL: - transport->recv.flags |= TCP_RCV_COPY_CALLDIR; - transport->recv.flags |= TCP_RCV_COPY_DATA; - transport->recv.flags &= ~TCP_RPC_REPLY; - break; - default: - dprintk("RPC: invalid request message type\n"); - xs_tcp_force_close(&transport->xprt); - } - xs_tcp_check_fraghdr(transport); -} - -static inline void xs_tcp_read_common(struct rpc_xprt *xprt, - struct xdr_skb_reader *desc, - struct rpc_rqst *req) -{ - struct sock_xprt *transport = - container_of(xprt, struct sock_xprt, xprt); - struct xdr_buf *rcvbuf; - size_t len; - ssize_t r; - - rcvbuf = &req->rq_private_buf; - - if (transport->recv.flags & TCP_RCV_COPY_CALLDIR) { - /* - * Save the RPC direction in the XDR buffer - */ - memcpy(rcvbuf->head[0].iov_base + transport->recv.copied, - &transport->recv.calldir, - sizeof(transport->recv.calldir)); - transport->recv.copied += sizeof(transport->recv.calldir); - transport->recv.flags &= ~TCP_RCV_COPY_CALLDIR; - } - - len = desc->count; - if (len > transport->recv.len - transport->recv.offset) - desc->count = transport->recv.len - transport->recv.offset; - r = xdr_partial_copy_from_skb(rcvbuf, transport->recv.copied, - desc, xdr_skb_read_bits); - - if (desc->count) { - /* Error when copying to the receive buffer, - * usually because we weren't able to allocate - * additional buffer pages. All we can do now - * is turn off TCP_RCV_COPY_DATA, so the request - * will not receive any additional updates, - * and time out. - * Any remaining data from this record will - * be discarded. - */ - transport->recv.flags &= ~TCP_RCV_COPY_DATA; - dprintk("RPC: XID %08x truncated request\n", - ntohl(transport->recv.xid)); - dprintk("RPC: xprt = %p, recv.copied = %lu, " - "recv.offset = %u, recv.len = %u\n", - xprt, transport->recv.copied, - transport->recv.offset, transport->recv.len); - return; - } - - transport->recv.copied += r; - transport->recv.offset += r; - desc->count = len - r; - - dprintk("RPC: XID %08x read %zd bytes\n", - ntohl(transport->recv.xid), r); - dprintk("RPC: xprt = %p, recv.copied = %lu, recv.offset = %u, " - "recv.len = %u\n", xprt, transport->recv.copied, - transport->recv.offset, transport->recv.len); - - if (transport->recv.copied == req->rq_private_buf.buflen) - transport->recv.flags &= ~TCP_RCV_COPY_DATA; - else if (transport->recv.offset == transport->recv.len) { - if (transport->recv.flags & TCP_RCV_LAST_FRAG) - transport->recv.flags &= ~TCP_RCV_COPY_DATA; - } -} - -/* - * Finds the request corresponding to the RPC xid and invokes the common - * tcp read code to read the data. - */ -static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, - struct xdr_skb_reader *desc) -{ - struct sock_xprt *transport = - container_of(xprt, struct sock_xprt, xprt); - struct rpc_rqst *req; - - dprintk("RPC: read reply XID %08x\n", ntohl(transport->recv.xid)); - - /* Find and lock the request corresponding to this xid */ - spin_lock(&xprt->queue_lock); - req = xprt_lookup_rqst(xprt, transport->recv.xid); - if (!req) { - dprintk("RPC: XID %08x request not found!\n", - ntohl(transport->recv.xid)); - spin_unlock(&xprt->queue_lock); - return -1; - } - xprt_pin_rqst(req); - spin_unlock(&xprt->queue_lock); - - xs_tcp_read_common(xprt, desc, req); - - spin_lock(&xprt->queue_lock); - if (!(transport->recv.flags & TCP_RCV_COPY_DATA)) - xprt_complete_rqst(req->rq_task, transport->recv.copied); - xprt_unpin_rqst(req); - spin_unlock(&xprt->queue_lock); - return 0; -} - #if defined(CONFIG_SUNRPC_BACKCHANNEL) -/* - * Obtains an rpc_rqst previously allocated and invokes the common - * tcp read code to read the data. The result is placed in the callback - * queue. - * If we're unable to obtain the rpc_rqst we schedule the closing of the - * connection and return -1. - */ -static int xs_tcp_read_callback(struct rpc_xprt *xprt, - struct xdr_skb_reader *desc) -{ - struct sock_xprt *transport = - container_of(xprt, struct sock_xprt, xprt); - struct rpc_rqst *req; - - /* Look up the request corresponding to the given XID */ - req = xprt_lookup_bc_request(xprt, transport->recv.xid); - if (req == NULL) { - printk(KERN_WARNING "Callback slot table overflowed\n"); - xprt_force_disconnect(xprt); - return -1; - } - - dprintk("RPC: read callback XID %08x\n", ntohl(req->rq_xid)); - xs_tcp_read_common(xprt, desc, req); - - if (!(transport->recv.flags & TCP_RCV_COPY_DATA)) - xprt_complete_bc_request(req, transport->recv.copied); - - return 0; -} - -static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, - struct xdr_skb_reader *desc) -{ - struct sock_xprt *transport = - container_of(xprt, struct sock_xprt, xprt); - - return (transport->recv.flags & TCP_RPC_REPLY) ? - xs_tcp_read_reply(xprt, desc) : - xs_tcp_read_callback(xprt, desc); -} - static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net) { int ret; @@ -1429,106 +1496,14 @@ static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt) { return PAGE_SIZE; } -#else -static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, - struct xdr_skb_reader *desc) -{ - return xs_tcp_read_reply(xprt, desc); -} #endif /* CONFIG_SUNRPC_BACKCHANNEL */ -/* - * Read data off the transport. This can be either an RPC_CALL or an - * RPC_REPLY. Relay the processing to helper functions. - */ -static void xs_tcp_read_data(struct rpc_xprt *xprt, - struct xdr_skb_reader *desc) -{ - struct sock_xprt *transport = - container_of(xprt, struct sock_xprt, xprt); - - if (_xs_tcp_read_data(xprt, desc) == 0) - xs_tcp_check_fraghdr(transport); - else { - /* - * The transport_lock protects the request handling. - * There's no need to hold it to update the recv.flags. - */ - transport->recv.flags &= ~TCP_RCV_COPY_DATA; - } -} - -static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc) -{ - size_t len; - - len = transport->recv.len - transport->recv.offset; - if (len > desc->count) - len = desc->count; - desc->count -= len; - desc->offset += len; - transport->recv.offset += len; - dprintk("RPC: discarded %zu bytes\n", len); - xs_tcp_check_fraghdr(transport); -} - -static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len) -{ - struct rpc_xprt *xprt = rd_desc->arg.data; - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - struct xdr_skb_reader desc = { - .skb = skb, - .offset = offset, - .count = len, - }; - size_t ret; - - dprintk("RPC: xs_tcp_data_recv started\n"); - do { - trace_xs_tcp_data_recv(transport); - /* Read in a new fragment marker if necessary */ - /* Can we ever really expect to get completely empty fragments? */ - if (transport->recv.flags & TCP_RCV_COPY_FRAGHDR) { - xs_tcp_read_fraghdr(xprt, &desc); - continue; - } - /* Read in the xid if necessary */ - if (transport->recv.flags & TCP_RCV_COPY_XID) { - xs_tcp_read_xid(transport, &desc); - continue; - } - /* Read in the call/reply flag */ - if (transport->recv.flags & TCP_RCV_READ_CALLDIR) { - xs_tcp_read_calldir(transport, &desc); - continue; - } - /* Read in the request data */ - if (transport->recv.flags & TCP_RCV_COPY_DATA) { - xs_tcp_read_data(xprt, &desc); - continue; - } - /* Skip over any trailing bytes on short reads */ - xs_tcp_read_discard(transport, &desc); - } while (desc.count); - ret = len - desc.count; - if (ret < rd_desc->count) - rd_desc->count -= ret; - else - rd_desc->count = 0; - trace_xs_tcp_data_recv(transport); - dprintk("RPC: xs_tcp_data_recv done\n"); - return ret; -} - static void xs_tcp_data_receive(struct sock_xprt *transport) { struct rpc_xprt *xprt = &transport->xprt; struct sock *sk; - read_descriptor_t rd_desc = { - .arg.data = xprt, - }; - unsigned long total = 0; - int read = 0; + size_t read = 0; + ssize_t ret = 0; restart: mutex_lock(&transport->recv_mutex); @@ -1536,18 +1511,12 @@ restart: if (sk == NULL) goto out; - /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ for (;;) { - rd_desc.count = RPC_TCP_READ_CHUNK_SZ; - lock_sock(sk); - read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); - if (rd_desc.count != 0 || read < 0) { - clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); - release_sock(sk); + clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); + ret = xs_read_stream(transport, MSG_DONTWAIT | MSG_NOSIGNAL); + if (ret < 0) break; - } - release_sock(sk); - total += read; + read += ret; if (need_resched()) { mutex_unlock(&transport->recv_mutex); cond_resched(); @@ -1558,7 +1527,7 @@ restart: queue_work(xprtiod_workqueue, &transport->recv_worker); out: mutex_unlock(&transport->recv_mutex); - trace_xs_tcp_data_ready(xprt, read, total); + trace_xs_tcp_data_ready(xprt, ret, read); } static void xs_tcp_data_receive_workfn(struct work_struct *work) @@ -2380,7 +2349,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) transport->recv.offset = 0; transport->recv.len = 0; transport->recv.copied = 0; - transport->recv.flags = TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; transport->xmit.offset = 0; /* Tell the socket layer to start connecting... */ @@ -2802,6 +2770,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = { .connect = xs_connect, .buf_alloc = rpc_malloc, .buf_free = rpc_free, + .prepare_request = xs_stream_prepare_request, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, .close = xs_tcp_shutdown, -- cgit v1.2.3 From c50b8ee02f1cb9506ac06d22e8414e9fef7d6890 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 14 Sep 2018 14:26:28 -0400 Subject: SUNRPC: Clean up - rename xs_tcp_data_receive() to xs_stream_data_receive() In preparation for sharing with AF_LOCAL. Signed-off-by: Trond Myklebust --- include/trace/events/sunrpc.h | 16 +++++----- net/sunrpc/xprtsock.c | 71 ++++++++++++++++++------------------------- 2 files changed, 38 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 19e08d12696c..28e384186c35 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -470,14 +470,14 @@ TRACE_EVENT(xprt_ping, __get_str(addr), __get_str(port), __entry->status) ); -TRACE_EVENT(xs_tcp_data_ready, - TP_PROTO(struct rpc_xprt *xprt, int err, unsigned int total), +TRACE_EVENT(xs_stream_read_data, + TP_PROTO(struct rpc_xprt *xprt, ssize_t err, size_t total), TP_ARGS(xprt, err, total), TP_STRUCT__entry( - __field(int, err) - __field(unsigned int, total) + __field(ssize_t, err) + __field(size_t, total) __string(addr, xprt ? xprt->address_strings[RPC_DISPLAY_ADDR] : "(null)") __string(port, xprt ? xprt->address_strings[RPC_DISPLAY_PORT] : @@ -493,11 +493,11 @@ TRACE_EVENT(xs_tcp_data_ready, xprt->address_strings[RPC_DISPLAY_PORT] : "(null)"); ), - TP_printk("peer=[%s]:%s err=%d total=%u", __get_str(addr), + TP_printk("peer=[%s]:%s err=%zd total=%zu", __get_str(addr), __get_str(port), __entry->err, __entry->total) ); -TRACE_EVENT(xs_tcp_data_recv, +TRACE_EVENT(xs_stream_read_request, TP_PROTO(struct sock_xprt *xs), TP_ARGS(xs), @@ -508,7 +508,7 @@ TRACE_EVENT(xs_tcp_data_recv, __field(u32, xid) __field(unsigned long, copied) __field(unsigned int, reclen) - __field(unsigned long, offset) + __field(unsigned int, offset) ), TP_fast_assign( @@ -520,7 +520,7 @@ TRACE_EVENT(xs_tcp_data_recv, __entry->offset = xs->recv.offset; ), - TP_printk("peer=[%s]:%s xid=0x%08x copied=%lu reclen=%u offset=%lu", + TP_printk("peer=[%s]:%s xid=0x%08x copied=%lu reclen=%u offset=%u", __get_str(addr), __get_str(port), __entry->xid, __entry->copied, __entry->reclen, __entry->offset) ); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 06aa75008708..55df1fadab27 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -626,7 +626,7 @@ xs_read_stream(struct sock_xprt *transport, int flags) return -EAGAIN; } if (xs_read_stream_request_done(transport)) { - trace_xs_tcp_data_recv(transport); + trace_xs_stream_read_request(transport); transport->recv.copied = 0; } transport->recv.offset = 0; @@ -642,6 +642,34 @@ out_err: return ret; } +static void xs_stream_data_receive(struct sock_xprt *transport) +{ + size_t read = 0; + ssize_t ret = 0; + + mutex_lock(&transport->recv_mutex); + if (transport->sock == NULL) + goto out; + clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); + for (;;) { + ret = xs_read_stream(transport, MSG_DONTWAIT); + if (ret <= 0) + break; + read += ret; + cond_resched(); + } +out: + mutex_unlock(&transport->recv_mutex); + trace_xs_stream_read_data(&transport->xprt, ret, read); +} + +static void xs_stream_data_receive_workfn(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, recv_worker); + xs_stream_data_receive(transport); +} + #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more) @@ -1498,45 +1526,6 @@ static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt) } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ -static void xs_tcp_data_receive(struct sock_xprt *transport) -{ - struct rpc_xprt *xprt = &transport->xprt; - struct sock *sk; - size_t read = 0; - ssize_t ret = 0; - -restart: - mutex_lock(&transport->recv_mutex); - sk = transport->inet; - if (sk == NULL) - goto out; - - for (;;) { - clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); - ret = xs_read_stream(transport, MSG_DONTWAIT | MSG_NOSIGNAL); - if (ret < 0) - break; - read += ret; - if (need_resched()) { - mutex_unlock(&transport->recv_mutex); - cond_resched(); - goto restart; - } - } - if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) - queue_work(xprtiod_workqueue, &transport->recv_worker); -out: - mutex_unlock(&transport->recv_mutex); - trace_xs_tcp_data_ready(xprt, ret, read); -} - -static void xs_tcp_data_receive_workfn(struct work_struct *work) -{ - struct sock_xprt *transport = - container_of(work, struct sock_xprt, recv_worker); - xs_tcp_data_receive(transport); -} - /** * xs_tcp_state_change - callback to handle TCP socket state changes * @sk: socket whose state has changed @@ -3066,7 +3055,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) xprt->connect_timeout = xprt->timeout->to_initval * (xprt->timeout->to_retries + 1); - INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn); + INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket); switch (addr->sa_family) { -- cgit v1.2.3 From 550aebfe1c573518c35ae85d6ffbdc2d44c92703 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 14 Sep 2018 14:32:45 -0400 Subject: SUNRPC: Allow AF_LOCAL sockets to use the generic stream receive Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 1 - net/sunrpc/socklib.c | 4 +- net/sunrpc/xprtsock.c | 137 ++++++--------------------------------------- 3 files changed, 18 insertions(+), 124 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 745587132a87..8815be7cae72 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -185,7 +185,6 @@ struct xdr_skb_reader { typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, size_t len); -size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len); extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *); extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, struct xdr_skb_reader *, xdr_skb_read_actor); diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 08f00a98151f..0e7c0dee7578 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -26,7 +26,8 @@ * Possibly called several times to iterate over an sk_buff and copy * data out of it. */ -size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len) +static size_t +xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len) { if (len > desc->count) len = desc->count; @@ -36,7 +37,6 @@ size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len) desc->offset += len; return len; } -EXPORT_SYMBOL_GPL(xdr_skb_read_bits); /** * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 55df1fadab27..90d4c92177b7 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -670,6 +670,17 @@ static void xs_stream_data_receive_workfn(struct work_struct *work) xs_stream_data_receive(transport); } +static void +xs_stream_reset_connect(struct sock_xprt *transport) +{ + transport->recv.offset = 0; + transport->recv.len = 0; + transport->recv.copied = 0; + transport->xmit.offset = 0; + transport->xprt.stat.connect_count++; + transport->xprt.stat.connect_start = jiffies; +} + #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more) @@ -1266,114 +1277,6 @@ static void xs_destroy(struct rpc_xprt *xprt) module_put(THIS_MODULE); } -static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) -{ - struct xdr_skb_reader desc = { - .skb = skb, - .offset = sizeof(rpc_fraghdr), - .count = skb->len - sizeof(rpc_fraghdr), - }; - - if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0) - return -1; - if (desc.count) - return -1; - return 0; -} - -/** - * xs_local_data_read_skb - * @xprt: transport - * @sk: socket - * @skb: skbuff - * - * Currently this assumes we can read the whole reply in a single gulp. - */ -static void xs_local_data_read_skb(struct rpc_xprt *xprt, - struct sock *sk, - struct sk_buff *skb) -{ - struct rpc_task *task; - struct rpc_rqst *rovr; - int repsize, copied; - u32 _xid; - __be32 *xp; - - repsize = skb->len - sizeof(rpc_fraghdr); - if (repsize < 4) { - dprintk("RPC: impossible RPC reply size %d\n", repsize); - return; - } - - /* Copy the XID from the skb... */ - xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid); - if (xp == NULL) - return; - - /* Look up and lock the request corresponding to the given XID */ - spin_lock(&xprt->queue_lock); - rovr = xprt_lookup_rqst(xprt, *xp); - if (!rovr) - goto out_unlock; - xprt_pin_rqst(rovr); - spin_unlock(&xprt->queue_lock); - task = rovr->rq_task; - - copied = rovr->rq_private_buf.buflen; - if (copied > repsize) - copied = repsize; - - if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) { - dprintk("RPC: sk_buff copy failed\n"); - spin_lock(&xprt->queue_lock); - goto out_unpin; - } - - spin_lock(&xprt->queue_lock); - xprt_complete_rqst(task, copied); -out_unpin: - xprt_unpin_rqst(rovr); - out_unlock: - spin_unlock(&xprt->queue_lock); -} - -static void xs_local_data_receive(struct sock_xprt *transport) -{ - struct sk_buff *skb; - struct sock *sk; - int err; - -restart: - mutex_lock(&transport->recv_mutex); - sk = transport->inet; - if (sk == NULL) - goto out; - for (;;) { - skb = skb_recv_datagram(sk, 0, 1, &err); - if (skb != NULL) { - xs_local_data_read_skb(&transport->xprt, sk, skb); - skb_free_datagram(sk, skb); - continue; - } - if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) - break; - if (need_resched()) { - mutex_unlock(&transport->recv_mutex); - cond_resched(); - goto restart; - } - } -out: - mutex_unlock(&transport->recv_mutex); -} - -static void xs_local_data_receive_workfn(struct work_struct *work) -{ - struct sock_xprt *transport = - container_of(work, struct sock_xprt, recv_worker); - xs_local_data_receive(transport); -} - /** * xs_udp_data_read_skb - receive callback for UDP sockets * @xprt: transport @@ -1974,11 +1877,8 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, write_unlock_bh(&sk->sk_callback_lock); } - transport->xmit.offset = 0; + xs_stream_reset_connect(transport); - /* Tell the socket layer to start connecting... */ - xprt->stat.connect_count++; - xprt->stat.connect_start = jiffies; return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0); } @@ -2335,14 +2235,9 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) xs_set_memalloc(xprt); /* Reset TCP record info */ - transport->recv.offset = 0; - transport->recv.len = 0; - transport->recv.copied = 0; - transport->xmit.offset = 0; + xs_stream_reset_connect(transport); /* Tell the socket layer to start connecting... */ - xprt->stat.connect_count++; - xprt->stat.connect_start = jiffies; set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); switch (ret) { @@ -2717,6 +2612,7 @@ static const struct rpc_xprt_ops xs_local_ops = { .connect = xs_local_connect, .buf_alloc = rpc_malloc, .buf_free = rpc_free, + .prepare_request = xs_stream_prepare_request, .send_request = xs_local_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, .close = xs_close, @@ -2901,9 +2797,8 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) xprt->ops = &xs_local_ops; xprt->timeout = &xs_local_default_timeout; - INIT_WORK(&transport->recv_worker, xs_local_data_receive_workfn); - INIT_DELAYED_WORK(&transport->connect_worker, - xs_dummy_setup_socket); + INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); + INIT_DELAYED_WORK(&transport->connect_worker, xs_dummy_setup_socket); switch (sun->sun_family) { case AF_LOCAL: -- cgit v1.2.3 From ec846469ba7bdb81e42c04e4e15d8fbf19e426e2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 14 Sep 2018 14:38:05 -0400 Subject: SUNRPC: Unexport xdr_partial_copy_from_skb() It is no longer used outside of net/sunrpc/socklib.c Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 2 -- net/sunrpc/socklib.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 8815be7cae72..43106ffa6788 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -186,8 +186,6 @@ struct xdr_skb_reader { typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, size_t len); extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *); -extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, - struct xdr_skb_reader *, xdr_skb_read_actor); extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32); extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *); diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 0e7c0dee7578..9062967575c4 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -69,7 +69,8 @@ static size_t xdr_skb_read_and_csum_bits(struct xdr_skb_reader *desc, void *to, * @copy_actor: virtual method for copying data * */ -ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor) +static ssize_t +xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor) { struct page **ppage = xdr->pages; unsigned int len, pglen = xdr->page_len; @@ -140,7 +141,6 @@ copy_tail: out: return copied; } -EXPORT_SYMBOL_GPL(xdr_partial_copy_from_skb); /** * csum_partial_copy_to_xdr - checksum and copy data -- cgit v1.2.3 From 1db97eaa0b482a738c715da6edb023d6f99e50b0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Sep 2018 15:11:57 -0400 Subject: NFS: Convert lookups of the lock context to RCU Speed up lookups of an existing lock context by avoiding the inode->i_lock, and using RCU instead. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 25 ++++++++++++------------- include/linux/nfs_fs.h | 1 + 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b65aee481d13..09b3b7146ff4 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -857,15 +857,14 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx) { - struct nfs_lock_context *head = &ctx->lock_context; - struct nfs_lock_context *pos = head; + struct nfs_lock_context *pos; - do { + list_for_each_entry_rcu(pos, &ctx->lock_context.list, list) { if (pos->lockowner != current->files) continue; - refcount_inc(&pos->count); - return pos; - } while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head); + if (refcount_inc_not_zero(&pos->count)) + return pos; + } return NULL; } @@ -874,10 +873,10 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) struct nfs_lock_context *res, *new = NULL; struct inode *inode = d_inode(ctx->dentry); - spin_lock(&inode->i_lock); + rcu_read_lock(); res = __nfs_find_lock_context(ctx); + rcu_read_unlock(); if (res == NULL) { - spin_unlock(&inode->i_lock); new = kmalloc(sizeof(*new), GFP_KERNEL); if (new == NULL) return ERR_PTR(-ENOMEM); @@ -885,14 +884,14 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) spin_lock(&inode->i_lock); res = __nfs_find_lock_context(ctx); if (res == NULL) { - list_add_tail(&new->list, &ctx->lock_context.list); + list_add_tail_rcu(&new->list, &ctx->lock_context.list); new->open_context = ctx; res = new; new = NULL; } + spin_unlock(&inode->i_lock); + kfree(new); } - spin_unlock(&inode->i_lock); - kfree(new); return res; } EXPORT_SYMBOL_GPL(nfs_get_lock_context); @@ -904,9 +903,9 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx) if (!refcount_dec_and_lock(&l_ctx->count, &inode->i_lock)) return; - list_del(&l_ctx->list); + list_del_rcu(&l_ctx->list); spin_unlock(&inode->i_lock); - kfree(l_ctx); + kfree_rcu(l_ctx, rcu_head); } EXPORT_SYMBOL_GPL(nfs_put_lock_context); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a0831e9d19c9..d2f4f88a0e66 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -62,6 +62,7 @@ struct nfs_lock_context { struct nfs_open_context *open_context; fl_owner_t lockowner; atomic_t io_count; + struct rcu_head rcu_head; }; struct nfs4_state; -- cgit v1.2.3 From 0de43976fbe716379084f954b1e370c35aa87bf0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Sep 2018 15:57:01 -0400 Subject: NFS: Convert lookups of the open context to RCU Reduce contention on the inode->i_lock by ensuring that we use RCU when looking up the NFS open context. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 11 ++++++----- fs/nfs/inode.c | 35 +++++++++++++++-------------------- fs/nfs/nfs4proc.c | 30 ++++++++++++++++++++++++------ fs/nfs/nfs4state.c | 12 ++++++------ fs/nfs/pnfs.c | 5 ++++- include/linux/nfs_fs.h | 1 + 6 files changed, 56 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index f033f3a69a3b..76d205d1c7bc 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -136,8 +136,8 @@ static int nfs_delegation_claim_opens(struct inode *inode, int err; again: - spin_lock(&inode->i_lock); - list_for_each_entry(ctx, &nfsi->open_files, list) { + rcu_read_lock(); + list_for_each_entry_rcu(ctx, &nfsi->open_files, list) { state = ctx->state; if (state == NULL) continue; @@ -147,8 +147,9 @@ again: continue; if (!nfs4_stateid_match(&state->stateid, stateid)) continue; - get_nfs_open_context(ctx); - spin_unlock(&inode->i_lock); + if (!get_nfs_open_context(ctx)) + continue; + rcu_read_unlock(); sp = state->owner; /* Block nfs4_proc_unlck */ mutex_lock(&sp->so_delegreturn_mutex); @@ -164,7 +165,7 @@ again: return err; goto again; } - spin_unlock(&inode->i_lock); + rcu_read_unlock(); return 0; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 052db41a7f80..5b1eee4952b7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -977,9 +977,9 @@ EXPORT_SYMBOL_GPL(alloc_nfs_open_context); struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { - if (ctx != NULL) - refcount_inc(&ctx->lock_context.count); - return ctx; + if (ctx != NULL && refcount_inc_not_zero(&ctx->lock_context.count)) + return ctx; + return NULL; } EXPORT_SYMBOL_GPL(get_nfs_open_context); @@ -988,13 +988,13 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) struct inode *inode = d_inode(ctx->dentry); struct super_block *sb = ctx->dentry->d_sb; + if (!refcount_dec_and_test(&ctx->lock_context.count)) + return; if (!list_empty(&ctx->list)) { - if (!refcount_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) - return; - list_del(&ctx->list); + spin_lock(&inode->i_lock); + list_del_rcu(&ctx->list); spin_unlock(&inode->i_lock); - } else if (!refcount_dec_and_test(&ctx->lock_context.count)) - return; + } if (inode != NULL) NFS_PROTO(inode)->close_context(ctx, is_sync); if (ctx->cred != NULL) @@ -1002,7 +1002,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) dput(ctx->dentry); nfs_sb_deactive(sb); kfree(ctx->mdsthreshold); - kfree(ctx); + kfree_rcu(ctx, rcu_head); } void put_nfs_open_context(struct nfs_open_context *ctx) @@ -1026,10 +1026,7 @@ void nfs_inode_attach_open_context(struct nfs_open_context *ctx) struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&inode->i_lock); - if (ctx->mode & FMODE_WRITE) - list_add(&ctx->list, &nfsi->open_files); - else - list_add_tail(&ctx->list, &nfsi->open_files); + list_add_tail_rcu(&ctx->list, &nfsi->open_files); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context); @@ -1050,16 +1047,17 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_context *pos, *ctx = NULL; - spin_lock(&inode->i_lock); - list_for_each_entry(pos, &nfsi->open_files, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pos, &nfsi->open_files, list) { if (cred != NULL && pos->cred != cred) continue; if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode) continue; ctx = get_nfs_open_context(pos); - break; + if (ctx) + break; } - spin_unlock(&inode->i_lock); + rcu_read_unlock(); return ctx; } @@ -1077,9 +1075,6 @@ void nfs_file_clear_open_context(struct file *filp) if (ctx->error < 0) invalidate_inode_pages2(inode->i_mapping); filp->private_data = NULL; - spin_lock(&inode->i_lock); - list_move_tail(&ctx->list, &NFS_I(inode)->open_files); - spin_unlock(&inode->i_lock); put_nfs_open_context_sync(ctx); } } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8220a168282e..10c20a5b075d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1933,23 +1933,41 @@ nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) return ret; } -static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) +static struct nfs_open_context * +nfs4_state_find_open_context_mode(struct nfs4_state *state, fmode_t mode) { struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_open_context *ctx; - spin_lock(&state->inode->i_lock); - list_for_each_entry(ctx, &nfsi->open_files, list) { + rcu_read_lock(); + list_for_each_entry_rcu(ctx, &nfsi->open_files, list) { if (ctx->state != state) continue; - get_nfs_open_context(ctx); - spin_unlock(&state->inode->i_lock); + if ((ctx->mode & mode) != mode) + continue; + if (!get_nfs_open_context(ctx)) + continue; + rcu_read_unlock(); return ctx; } - spin_unlock(&state->inode->i_lock); + rcu_read_unlock(); return ERR_PTR(-ENOENT); } +static struct nfs_open_context * +nfs4_state_find_open_context(struct nfs4_state *state) +{ + struct nfs_open_context *ctx; + + ctx = nfs4_state_find_open_context_mode(state, FMODE_READ|FMODE_WRITE); + if (!IS_ERR(ctx)) + return ctx; + ctx = nfs4_state_find_open_context_mode(state, FMODE_WRITE); + if (!IS_ERR(ctx)) + return ctx; + return nfs4_state_find_open_context_mode(state, FMODE_READ); +} + static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx, struct nfs4_state *state, enum open_claim_type4 claim) { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 40a08cd483f0..be92ce4259e9 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1437,8 +1437,8 @@ void nfs_inode_find_state_and_recover(struct inode *inode, struct nfs4_state *state; bool found = false; - spin_lock(&inode->i_lock); - list_for_each_entry(ctx, &nfsi->open_files, list) { + rcu_read_lock(); + list_for_each_entry_rcu(ctx, &nfsi->open_files, list) { state = ctx->state; if (state == NULL) continue; @@ -1456,7 +1456,7 @@ void nfs_inode_find_state_and_recover(struct inode *inode, nfs4_state_mark_reclaim_nograce(clp, state)) found = true; } - spin_unlock(&inode->i_lock); + rcu_read_unlock(); nfs_inode_find_delegation_state_and_recover(inode, stateid); if (found) @@ -1469,13 +1469,13 @@ static void nfs4_state_mark_open_context_bad(struct nfs4_state *state) struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_context *ctx; - spin_lock(&inode->i_lock); - list_for_each_entry(ctx, &nfsi->open_files, list) { + rcu_read_lock(); + list_for_each_entry_rcu(ctx, &nfsi->open_files, list) { if (ctx->state != state) continue; set_bit(NFS_CONTEXT_BAD, &ctx->flags); } - spin_unlock(&inode->i_lock); + rcu_read_unlock(); } static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c5672c02afd6..06cb90e9bc6e 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1339,6 +1339,7 @@ bool pnfs_roc(struct inode *ino, if (!nfs_have_layout(ino)) return false; retry: + rcu_read_lock(); spin_lock(&ino->i_lock); lo = nfsi->layout; if (!lo || !pnfs_layout_is_valid(lo) || @@ -1349,6 +1350,7 @@ retry: pnfs_get_layout_hdr(lo); if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) { spin_unlock(&ino->i_lock); + rcu_read_unlock(); wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE); pnfs_put_layout_hdr(lo); @@ -1362,7 +1364,7 @@ retry: skip_read = true; } - list_for_each_entry(ctx, &nfsi->open_files, list) { + list_for_each_entry_rcu(ctx, &nfsi->open_files, list) { state = ctx->state; if (state == NULL) continue; @@ -1410,6 +1412,7 @@ retry: out_noroc: spin_unlock(&ino->i_lock); + rcu_read_unlock(); pnfs_layoutcommit_inode(ino, true); if (roc) { struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index d2f4f88a0e66..6e0417c02279 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -83,6 +83,7 @@ struct nfs_open_context { struct list_head list; struct nfs4_threshold *mdsthreshold; + struct rcu_head rcu_head; }; struct nfs_open_dir_context { -- cgit v1.2.3 From 8d8928d87960d71f898767185b8c0e4ce3de3cbe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Mar 2018 12:03:00 -0500 Subject: NFSv3: Improve NFSv3 performance when server returns no post-op attributes When the server fails to return post-op attributes, the client's attempt to place read data directly in the page cache fails, and so we have to do an extra copy in order to realign the data with page borders. This patch attempts to detect servers that don't return post-op attributes on read (e.g. for pNFS) and adjusts the placement calculation accordingly. Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 5 +++++ fs/nfs/nfs3xdr.c | 6 +++++- include/linux/nfs_fs_sb.h | 3 +++ include/linux/nfs_xdr.h | 3 ++- 4 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index ec8a9efa268f..71bc16225b98 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -786,6 +786,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { struct inode *inode = hdr->inode; + struct nfs_server *server = NFS_SERVER(inode); if (hdr->pgio_done_cb != NULL) return hdr->pgio_done_cb(task, hdr); @@ -793,6 +794,9 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; + if (task->tk_status >= 0 && !server->read_hdrsize) + cmpxchg(&server->read_hdrsize, 0, hdr->res.replen); + nfs_invalidate_atime(inode); nfs_refresh_inode(inode, &hdr->fattr); return 0; @@ -802,6 +806,7 @@ static void nfs3_proc_read_setup(struct nfs_pgio_header *hdr, struct rpc_message *msg) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; + hdr->args.replen = NFS_SERVER(hdr->inode)->read_hdrsize; } static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index d8c4c10b15f7..78df4eb60f85 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -983,10 +983,11 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, const void *data) { const struct nfs_pgio_args *args = data; + unsigned int replen = args->replen ? args->replen : NFS3_readres_sz; encode_read3args(xdr, args); prepare_reply_buffer(req, args->pages, args->pgbase, - args->count, NFS3_readres_sz); + args->count, replen); req->rq_rcv_buf.flags |= XDRBUF_READ; } @@ -1675,9 +1676,11 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, void *data) { struct nfs_pgio_res *result = data; + unsigned int pos; enum nfs_stat status; int error; + pos = xdr_stream_pos(xdr); error = decode_nfsstat3(xdr, &status); if (unlikely(error)) goto out; @@ -1687,6 +1690,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, result->op_status = status; if (status != NFS3_OK) goto out_status; + result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2); error = decode_read3resok(xdr, result); out: return error; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index bf39d9c92201..0fc0b9135d46 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -228,6 +228,9 @@ struct nfs_server { unsigned short mountd_port; unsigned short mountd_protocol; struct rpc_wait_queue uoc_rpcwaitq; + + /* XDR related information */ + unsigned int read_hdrsize; }; /* Server capabilities */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index bd1c889a9ed9..7f5535e5e852 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -608,6 +608,7 @@ struct nfs_pgio_args { __u32 count; unsigned int pgbase; struct page ** pages; + unsigned int replen; /* used by read */ const u32 * bitmask; /* used by write */ enum nfs3_stable_how stable; /* used by write */ }; @@ -618,9 +619,9 @@ struct nfs_pgio_res { __u32 count; __u32 op_status; int eof; /* used by read */ + unsigned int replen; /* used by read */ struct nfs_writeverf * verf; /* used by write */ const struct nfs_server *server; /* used by write */ - }; /* -- cgit v1.2.3 From 28d52235ee25ba7d843242b4cb3c3f27a8828b5a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 24 Sep 2018 13:15:37 -0400 Subject: NFSv4: Save a few bytes in the nfs_pgio_args/res Save a few bytes by allowing the read/write specific fields of the structures to share storage. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7f5535e5e852..343e44166346 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -608,9 +608,13 @@ struct nfs_pgio_args { __u32 count; unsigned int pgbase; struct page ** pages; - unsigned int replen; /* used by read */ - const u32 * bitmask; /* used by write */ - enum nfs3_stable_how stable; /* used by write */ + union { + unsigned int replen; /* used by read */ + struct { + const u32 * bitmask; /* used by write */ + enum nfs3_stable_how stable; /* used by write */ + }; + }; }; struct nfs_pgio_res { @@ -618,10 +622,16 @@ struct nfs_pgio_res { struct nfs_fattr * fattr; __u32 count; __u32 op_status; - int eof; /* used by read */ - unsigned int replen; /* used by read */ - struct nfs_writeverf * verf; /* used by write */ - const struct nfs_server *server; /* used by write */ + union { + struct { + unsigned int replen; /* used by read */ + int eof; /* used by read */ + }; + struct { + struct nfs_writeverf * verf; /* used by write */ + const struct nfs_server *server; /* used by write */ + }; + }; }; /* -- cgit v1.2.3 From 1c6c4b740df12f2162ae5c3fac337137e2776236 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 Sep 2018 12:34:43 -0400 Subject: NFS: Remove private spinlock in struct nfs_pgio_header Now that each struct nfs_pgio_header corresponds to one RPC call, we only have one writer to the struct nfs_pgio_header. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 13 ++++++------- fs/nfs/read.c | 10 ++++------ include/linux/nfs_xdr.h | 5 ++--- 3 files changed, 12 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index bb5476a6d264..f97c455f5734 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -63,14 +63,14 @@ EXPORT_SYMBOL_GPL(nfs_pgheader_init); void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) { - spin_lock(&hdr->lock); - if (!test_and_set_bit(NFS_IOHDR_ERROR, &hdr->flags) - || pos < hdr->io_start + hdr->good_bytes) { + unsigned int new = pos - hdr->io_start; + + if (hdr->good_bytes > new) { + hdr->good_bytes = new; clear_bit(NFS_IOHDR_EOF, &hdr->flags); - hdr->good_bytes = pos - hdr->io_start; - hdr->error = error; + if (!test_and_set_bit(NFS_IOHDR_ERROR, &hdr->flags)) + hdr->error = error; } - spin_unlock(&hdr->lock); } static inline struct nfs_page * @@ -494,7 +494,6 @@ struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops) if (hdr) { INIT_LIST_HEAD(&hdr->pages); - spin_lock_init(&hdr->lock); hdr->rw_ops = ops; } return hdr; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 48d7277c60a9..f9f19784db82 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -276,16 +276,14 @@ static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_header *hdr) { if (hdr->res.eof) { - loff_t bound; + loff_t pos = hdr->args.offset + hdr->res.count; + unsigned int new = pos - hdr->io_start; - bound = hdr->args.offset + hdr->res.count; - spin_lock(&hdr->lock); - if (bound < hdr->io_start + hdr->good_bytes) { + if (hdr->good_bytes > new) { + hdr->good_bytes = new; set_bit(NFS_IOHDR_EOF, &hdr->flags); clear_bit(NFS_IOHDR_ERROR, &hdr->flags); - hdr->good_bytes = bound - hdr->io_start; } - spin_unlock(&hdr->lock); } else if (hdr->res.count < hdr->args.count) nfs_readpage_retry(task, hdr); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 343e44166346..0e016252cfc6 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1482,11 +1482,10 @@ struct nfs_pgio_header { const struct nfs_rw_ops *rw_ops; struct nfs_io_completion *io_completion; struct nfs_direct_req *dreq; - spinlock_t lock; - /* fields protected by lock */ + int pnfs_error; int error; /* merge with pnfs_error */ - unsigned long good_bytes; /* boundary of good data */ + unsigned int good_bytes; /* boundary of good data */ unsigned long flags; /* -- cgit v1.2.3 From 571ed1fd2390f74e4c1f46994f753fb0d29285e4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 29 Sep 2018 16:00:43 -0400 Subject: SUNRPC: Replace krb5_seq_lock with a lockless scheme Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_krb5.h | 3 ++- net/sunrpc/auth_gss/gss_krb5_seal.c | 37 ++++++++++++++++++++++++++----------- net/sunrpc/auth_gss/gss_krb5_wrap.c | 8 ++------ 3 files changed, 30 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 7df625d41e35..69f749afa617 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -118,7 +118,8 @@ struct krb5_ctx { u8 acceptor_integ[GSS_KRB5_MAX_KEYLEN]; }; -extern spinlock_t krb5_seq_lock; +extern u32 gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx); +extern u64 gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx); /* The length of the Kerberos GSS token header */ #define GSS_KRB5_TOK_HDR_LEN (16) diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index eaad9bc7a0bd..0ffb797b92e5 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -68,8 +68,6 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -DEFINE_SPINLOCK(krb5_seq_lock); - static void * setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token) { @@ -124,6 +122,30 @@ setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token) return krb5_hdr; } +u32 +gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx) +{ + u32 old, seq_send = READ_ONCE(ctx->seq_send); + + do { + old = seq_send; + seq_send = cmpxchg(&ctx->seq_send, old, old + 1); + } while (old != seq_send); + return seq_send; +} + +u64 +gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx) +{ + u64 old, seq_send = READ_ONCE(ctx->seq_send); + + do { + old = seq_send; + seq_send = cmpxchg(&ctx->seq_send64, old, old + 1); + } while (old != seq_send); + return seq_send; +} + static u32 gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text, struct xdr_netobj *token) @@ -154,9 +176,7 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text, memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len); - spin_lock(&krb5_seq_lock); - seq_send = ctx->seq_send++; - spin_unlock(&krb5_seq_lock); + seq_send = gss_seq_send_fetch_and_inc(ctx); if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff, seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)) @@ -174,7 +194,6 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text, .data = cksumdata}; void *krb5_hdr; s32 now; - u64 seq_send; u8 *cksumkey; unsigned int cksum_usage; __be64 seq_send_be64; @@ -185,11 +204,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text, /* Set up the sequence number. Now 64-bits in clear * text and w/o direction indicator */ - spin_lock(&krb5_seq_lock); - seq_send = ctx->seq_send64++; - spin_unlock(&krb5_seq_lock); - - seq_send_be64 = cpu_to_be64(seq_send); + seq_send_be64 = cpu_to_be64(gss_seq_send64_fetch_and_inc(ctx)); memcpy(krb5_hdr + 8, (char *) &seq_send_be64, 8); if (ctx->initiate) { diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 39a2e672900b..41cb294cd071 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -228,9 +228,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset, memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len); - spin_lock(&krb5_seq_lock); - seq_send = kctx->seq_send++; - spin_unlock(&krb5_seq_lock); + seq_send = gss_seq_send_fetch_and_inc(kctx); /* XXX would probably be more efficient to compute checksum * and encrypt at the same time: */ @@ -477,9 +475,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset, *be16ptr++ = 0; be64ptr = (__be64 *)be16ptr; - spin_lock(&krb5_seq_lock); - *be64ptr = cpu_to_be64(kctx->seq_send64++); - spin_unlock(&krb5_seq_lock); + *be64ptr = cpu_to_be64(gss_seq_send64_fetch_and_inc(kctx)); err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages); if (err) -- cgit v1.2.3 From 61da886bf74e738995d359fa14d77f72d14cfb87 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Oct 2018 14:25:25 -0400 Subject: xprtrdma: Explicitly resetting MRs is no longer necessary When a memory operation fails, the MR's driver state might not match its hardware state. The only reliable recourse is to dereg the MR. This is done in ->ro_recover_mr, which then attempts to allocate a fresh MR to replace the released MR. Since commit e2ac236c0b651 ("xprtrdma: Allocate MRs on demand"), xprtrdma dynamically allocates MRs. It can add more MRs whenever they are needed. That makes it possible to simply release an MR when a memory operation fails, instead of "recovering" it. It will automatically be replaced by the on-demand MR allocator. This commit is a little larger than I wanted, but it replaces ->ro_recover_mr, rb_recovery_lock, rb_recovery_worker, and the rb_stale_mrs list with a generic work queue. Since MRs are no longer orphaned, the mrs_orphaned metric is no longer used. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 2 +- net/sunrpc/xprtrdma/fmr_ops.c | 124 +++++++++++++++++--------------------- net/sunrpc/xprtrdma/frwr_ops.c | 130 +++++++++++++++------------------------- net/sunrpc/xprtrdma/rpc_rdma.c | 2 +- net/sunrpc/xprtrdma/transport.c | 2 +- net/sunrpc/xprtrdma/verbs.c | 38 ------------ net/sunrpc/xprtrdma/xprt_rdma.h | 14 +++-- 7 files changed, 114 insertions(+), 198 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 53df203b8057..9906f4d7d9fb 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -655,7 +655,7 @@ DEFINE_MR_EVENT(xprtrdma_localinv); DEFINE_MR_EVENT(xprtrdma_dma_map); DEFINE_MR_EVENT(xprtrdma_dma_unmap); DEFINE_MR_EVENT(xprtrdma_remoteinv); -DEFINE_MR_EVENT(xprtrdma_recover_mr); +DEFINE_MR_EVENT(xprtrdma_mr_recycle); /** ** Reply events diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index db589a23682b..f1cf3a36a992 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -49,46 +49,7 @@ fmr_is_supported(struct rpcrdma_ia *ia) return true; } -static int -fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) -{ - static struct ib_fmr_attr fmr_attr = { - .max_pages = RPCRDMA_MAX_FMR_SGES, - .max_maps = 1, - .page_shift = PAGE_SHIFT - }; - - mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES, - sizeof(u64), GFP_KERNEL); - if (!mr->fmr.fm_physaddrs) - goto out_free; - - mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES, - sizeof(*mr->mr_sg), GFP_KERNEL); - if (!mr->mr_sg) - goto out_free; - - sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES); - - mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS, - &fmr_attr); - if (IS_ERR(mr->fmr.fm_mr)) - goto out_fmr_err; - - INIT_LIST_HEAD(&mr->mr_list); - return 0; - -out_fmr_err: - dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__, - PTR_ERR(mr->fmr.fm_mr)); - -out_free: - kfree(mr->mr_sg); - kfree(mr->fmr.fm_physaddrs); - return -ENOMEM; -} - -static int +static void __fmr_unmap(struct rpcrdma_mr *mr) { LIST_HEAD(l); @@ -97,13 +58,16 @@ __fmr_unmap(struct rpcrdma_mr *mr) list_add(&mr->fmr.fm_mr->list, &l); rc = ib_unmap_fmr(&l); list_del(&mr->fmr.fm_mr->list); - return rc; + if (rc) + pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n", + mr, rc); } +/* Release an MR. + */ static void fmr_op_release_mr(struct rpcrdma_mr *mr) { - LIST_HEAD(unmap_list); int rc; kfree(mr->fmr.fm_physaddrs); @@ -112,10 +76,7 @@ fmr_op_release_mr(struct rpcrdma_mr *mr) /* In case this one was left mapped, try to unmap it * to prevent dealloc_fmr from failing with EBUSY */ - rc = __fmr_unmap(mr); - if (rc) - pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n", - mr, rc); + __fmr_unmap(mr); rc = ib_dealloc_fmr(mr->fmr.fm_mr); if (rc) @@ -125,28 +86,16 @@ fmr_op_release_mr(struct rpcrdma_mr *mr) kfree(mr); } -/* Reset of a single FMR. +/* MRs are dynamically allocated, so simply clean up and release the MR. + * A replacement MR will subsequently be allocated on demand. */ static void -fmr_op_recover_mr(struct rpcrdma_mr *mr) +fmr_mr_recycle_worker(struct work_struct *work) { + struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle); struct rpcrdma_xprt *r_xprt = mr->mr_xprt; - int rc; - /* ORDER: invalidate first */ - rc = __fmr_unmap(mr); - if (rc) - goto out_release; - - /* ORDER: then DMA unmap */ - rpcrdma_mr_unmap_and_put(mr); - - r_xprt->rx_stats.mrs_recovered++; - return; - -out_release: - pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr); - r_xprt->rx_stats.mrs_orphaned++; + trace_xprtrdma_mr_recycle(mr); trace_xprtrdma_dma_unmap(mr); ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, @@ -154,11 +103,51 @@ out_release: spin_lock(&r_xprt->rx_buf.rb_mrlock); list_del(&mr->mr_all); + r_xprt->rx_stats.mrs_recycled++; spin_unlock(&r_xprt->rx_buf.rb_mrlock); - fmr_op_release_mr(mr); } +static int +fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) +{ + static struct ib_fmr_attr fmr_attr = { + .max_pages = RPCRDMA_MAX_FMR_SGES, + .max_maps = 1, + .page_shift = PAGE_SHIFT + }; + + mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES, + sizeof(u64), GFP_KERNEL); + if (!mr->fmr.fm_physaddrs) + goto out_free; + + mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES, + sizeof(*mr->mr_sg), GFP_KERNEL); + if (!mr->mr_sg) + goto out_free; + + sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES); + + mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS, + &fmr_attr); + if (IS_ERR(mr->fmr.fm_mr)) + goto out_fmr_err; + + INIT_LIST_HEAD(&mr->mr_list); + INIT_WORK(&mr->mr_recycle, fmr_mr_recycle_worker); + return 0; + +out_fmr_err: + dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__, + PTR_ERR(mr->fmr.fm_mr)); + +out_free: + kfree(mr->mr_sg); + kfree(mr->fmr.fm_physaddrs); + return -ENOMEM; +} + /* On success, sets: * ep->rep_attr.cap.max_send_wr * ep->rep_attr.cap.max_recv_wr @@ -312,7 +301,7 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) r_xprt->rx_stats.local_inv_needed++; rc = ib_unmap_fmr(&unmap_list); if (rc) - goto out_reset; + goto out_release; /* ORDER: Now DMA unmap all of the req's MRs, and return * them to the free MW list. @@ -325,13 +314,13 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) return; -out_reset: +out_release: pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc); while (!list_empty(mrs)) { mr = rpcrdma_mr_pop(mrs); list_del(&mr->fmr.fm_mr->list); - fmr_op_recover_mr(mr); + rpcrdma_mr_recycle(mr); } } @@ -339,7 +328,6 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { .ro_map = fmr_op_map, .ro_send = fmr_op_send, .ro_unmap_sync = fmr_op_unmap_sync, - .ro_recover_mr = fmr_op_recover_mr, .ro_open = fmr_op_open, .ro_maxpages = fmr_op_maxpages, .ro_init_mr = fmr_op_init_mr, diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 1cc4db515c85..6594627e3c7d 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -97,6 +97,44 @@ out_not_supported: return false; } +static void +frwr_op_release_mr(struct rpcrdma_mr *mr) +{ + int rc; + + rc = ib_dereg_mr(mr->frwr.fr_mr); + if (rc) + pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", + mr, rc); + kfree(mr->mr_sg); + kfree(mr); +} + +/* MRs are dynamically allocated, so simply clean up and release the MR. + * A replacement MR will subsequently be allocated on demand. + */ +static void +frwr_mr_recycle_worker(struct work_struct *work) +{ + struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle); + enum rpcrdma_frwr_state state = mr->frwr.fr_state; + struct rpcrdma_xprt *r_xprt = mr->mr_xprt; + + trace_xprtrdma_mr_recycle(mr); + + if (state != FRWR_FLUSHED_LI) { + trace_xprtrdma_dma_unmap(mr); + ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, + mr->mr_sg, mr->mr_nents, mr->mr_dir); + } + + spin_lock(&r_xprt->rx_buf.rb_mrlock); + list_del(&mr->mr_all); + r_xprt->rx_stats.mrs_recycled++; + spin_unlock(&r_xprt->rx_buf.rb_mrlock); + frwr_op_release_mr(mr); +} + static int frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) { @@ -113,6 +151,7 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) goto out_list_err; INIT_LIST_HEAD(&mr->mr_list); + INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker); sg_init_table(mr->mr_sg, depth); init_completion(&frwr->fr_linv_done); return 0; @@ -131,79 +170,6 @@ out_list_err: return rc; } -static void -frwr_op_release_mr(struct rpcrdma_mr *mr) -{ - int rc; - - rc = ib_dereg_mr(mr->frwr.fr_mr); - if (rc) - pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", - mr, rc); - kfree(mr->mr_sg); - kfree(mr); -} - -static int -__frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) -{ - struct rpcrdma_frwr *frwr = &mr->frwr; - int rc; - - rc = ib_dereg_mr(frwr->fr_mr); - if (rc) { - pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n", - rc, mr); - return rc; - } - - frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, - ia->ri_max_frwr_depth); - if (IS_ERR(frwr->fr_mr)) { - pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", - PTR_ERR(frwr->fr_mr), mr); - return PTR_ERR(frwr->fr_mr); - } - - dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr); - frwr->fr_state = FRWR_IS_INVALID; - return 0; -} - -/* Reset of a single FRWR. Generate a fresh rkey by replacing the MR. - */ -static void -frwr_op_recover_mr(struct rpcrdma_mr *mr) -{ - enum rpcrdma_frwr_state state = mr->frwr.fr_state; - struct rpcrdma_xprt *r_xprt = mr->mr_xprt; - struct rpcrdma_ia *ia = &r_xprt->rx_ia; - int rc; - - rc = __frwr_mr_reset(ia, mr); - if (state != FRWR_FLUSHED_LI) { - trace_xprtrdma_dma_unmap(mr); - ib_dma_unmap_sg(ia->ri_device, - mr->mr_sg, mr->mr_nents, mr->mr_dir); - } - if (rc) - goto out_release; - - rpcrdma_mr_put(mr); - r_xprt->rx_stats.mrs_recovered++; - return; - -out_release: - pr_err("rpcrdma: FRWR reset failed %d, %p released\n", rc, mr); - r_xprt->rx_stats.mrs_orphaned++; - - spin_lock(&r_xprt->rx_buf.rb_mrlock); - list_del(&mr->mr_all); - spin_unlock(&r_xprt->rx_buf.rb_mrlock); - - frwr_op_release_mr(mr); -} - /* On success, sets: * ep->rep_attr.cap.max_send_wr * ep->rep_attr.cap.max_recv_wr @@ -385,7 +351,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, mr = NULL; do { if (mr) - rpcrdma_mr_defer_recovery(mr); + rpcrdma_mr_recycle(mr); mr = rpcrdma_mr_get(r_xprt); if (!mr) return ERR_PTR(-EAGAIN); @@ -452,7 +418,7 @@ out_dmamap_err: out_mapmr_err: pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", frwr->fr_mr, n, mr->mr_nents); - rpcrdma_mr_defer_recovery(mr); + rpcrdma_mr_recycle(mr); return ERR_PTR(-EIO); } @@ -571,7 +537,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) if (bad_wr != first) wait_for_completion(&frwr->fr_linv_done); if (rc) - goto reset_mrs; + goto out_release; /* ORDER: Now DMA unmap all of the MRs, and return * them to the free MR list. @@ -583,22 +549,21 @@ unmap: } return; -reset_mrs: +out_release: pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc); - /* Find and reset the MRs in the LOCAL_INV WRs that did not + /* Unmap and release the MRs in the LOCAL_INV WRs that did not * get posted. */ while (bad_wr) { frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr); mr = container_of(frwr, struct rpcrdma_mr, frwr); - - __frwr_mr_reset(ia, mr); - bad_wr = bad_wr->next; + + list_del(&mr->mr_list); + frwr_op_release_mr(mr); } - goto unmap; } const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { @@ -606,7 +571,6 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { .ro_send = frwr_op_send, .ro_reminv = frwr_op_reminv, .ro_unmap_sync = frwr_op_unmap_sync, - .ro_recover_mr = frwr_op_recover_mr, .ro_open = frwr_op_open, .ro_maxpages = frwr_op_maxpages, .ro_init_mr = frwr_op_init_mr, diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 15edc050ca93..228aee851667 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -803,7 +803,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) struct rpcrdma_mr *mr; mr = rpcrdma_mr_pop(&req->rl_registered); - rpcrdma_mr_defer_recovery(mr); + rpcrdma_mr_recycle(mr); } /* This implementation supports the following combinations diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 98cbc7b060ba..3ae73e6a5c93 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -792,7 +792,7 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) r_xprt->rx_stats.bad_reply_count, r_xprt->rx_stats.nomsg_call_count); seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n", - r_xprt->rx_stats.mrs_recovered, + r_xprt->rx_stats.mrs_recycled, r_xprt->rx_stats.mrs_orphaned, r_xprt->rx_stats.mrs_allocated, r_xprt->rx_stats.local_inv_needed, diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 5625a5089f96..88fe75e6d41a 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -977,39 +977,6 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) } } -static void -rpcrdma_mr_recovery_worker(struct work_struct *work) -{ - struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, - rb_recovery_worker.work); - struct rpcrdma_mr *mr; - - spin_lock(&buf->rb_recovery_lock); - while (!list_empty(&buf->rb_stale_mrs)) { - mr = rpcrdma_mr_pop(&buf->rb_stale_mrs); - spin_unlock(&buf->rb_recovery_lock); - - trace_xprtrdma_recover_mr(mr); - mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr); - - spin_lock(&buf->rb_recovery_lock); - } - spin_unlock(&buf->rb_recovery_lock); -} - -void -rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr) -{ - struct rpcrdma_xprt *r_xprt = mr->mr_xprt; - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - - spin_lock(&buf->rb_recovery_lock); - rpcrdma_mr_push(mr, &buf->rb_stale_mrs); - spin_unlock(&buf->rb_recovery_lock); - - schedule_delayed_work(&buf->rb_recovery_worker, 0); -} - static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) { @@ -1142,14 +1109,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) buf->rb_bc_srv_max_requests = 0; spin_lock_init(&buf->rb_mrlock); spin_lock_init(&buf->rb_lock); - spin_lock_init(&buf->rb_recovery_lock); INIT_LIST_HEAD(&buf->rb_mrs); INIT_LIST_HEAD(&buf->rb_all); - INIT_LIST_HEAD(&buf->rb_stale_mrs); INIT_DELAYED_WORK(&buf->rb_refresh_worker, rpcrdma_mr_refresh_worker); - INIT_DELAYED_WORK(&buf->rb_recovery_worker, - rpcrdma_mr_recovery_worker); rpcrdma_mrs_create(r_xprt); @@ -1233,7 +1196,6 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) void rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) { - cancel_delayed_work_sync(&buf->rb_recovery_worker); cancel_delayed_work_sync(&buf->rb_refresh_worker); rpcrdma_sendctxs_destroy(buf); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 2ca14f7c2d51..eae21668e692 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -280,6 +280,7 @@ struct rpcrdma_mr { u32 mr_handle; u32 mr_length; u64 mr_offset; + struct work_struct mr_recycle; struct list_head mr_all; }; @@ -411,9 +412,6 @@ struct rpcrdma_buffer { u32 rb_bc_max_requests; - spinlock_t rb_recovery_lock; /* protect rb_stale_mrs */ - struct list_head rb_stale_mrs; - struct delayed_work rb_recovery_worker; struct delayed_work rb_refresh_worker; }; #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) @@ -452,7 +450,7 @@ struct rpcrdma_stats { unsigned long hardway_register_count; unsigned long failed_marshal_count; unsigned long bad_reply_count; - unsigned long mrs_recovered; + unsigned long mrs_recycled; unsigned long mrs_orphaned; unsigned long mrs_allocated; unsigned long empty_sendctx_q; @@ -481,7 +479,6 @@ struct rpcrdma_memreg_ops { struct list_head *mrs); void (*ro_unmap_sync)(struct rpcrdma_xprt *, struct list_head *); - void (*ro_recover_mr)(struct rpcrdma_mr *mr); int (*ro_open)(struct rpcrdma_ia *, struct rpcrdma_ep *, struct rpcrdma_create_data_internal *); @@ -578,7 +575,12 @@ struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt); void rpcrdma_mr_put(struct rpcrdma_mr *mr); void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr); -void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr); + +static inline void +rpcrdma_mr_recycle(struct rpcrdma_mr *mr) +{ + schedule_work(&mr->mr_recycle); +} struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); void rpcrdma_buffer_put(struct rpcrdma_req *); -- cgit v1.2.3 From d379eaa838f1813ca906b946ad3cbb77781d2be7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Oct 2018 14:25:30 -0400 Subject: xprtrdma: Name MR trace events consistently Clean up the names of trace events related to MRs so that it's easy to enable these with a glob. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 12 ++++++------ net/sunrpc/xprtrdma/fmr_ops.c | 6 +++--- net/sunrpc/xprtrdma/frwr_ops.c | 8 ++++---- net/sunrpc/xprtrdma/verbs.c | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 9906f4d7d9fb..89e017b0f0ec 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -263,7 +263,7 @@ DECLARE_EVENT_CLASS(xprtrdma_mr, ); #define DEFINE_MR_EVENT(name) \ - DEFINE_EVENT(xprtrdma_mr, name, \ + DEFINE_EVENT(xprtrdma_mr, xprtrdma_mr_##name, \ TP_PROTO( \ const struct rpcrdma_mr *mr \ ), \ @@ -651,11 +651,11 @@ DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_fastreg); DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li); DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake); -DEFINE_MR_EVENT(xprtrdma_localinv); -DEFINE_MR_EVENT(xprtrdma_dma_map); -DEFINE_MR_EVENT(xprtrdma_dma_unmap); -DEFINE_MR_EVENT(xprtrdma_remoteinv); -DEFINE_MR_EVENT(xprtrdma_mr_recycle); +DEFINE_MR_EVENT(localinv); +DEFINE_MR_EVENT(map); +DEFINE_MR_EVENT(unmap); +DEFINE_MR_EVENT(remoteinv); +DEFINE_MR_EVENT(recycle); /** ** Reply events diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index f1cf3a36a992..7f5632cd5a48 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -97,7 +97,7 @@ fmr_mr_recycle_worker(struct work_struct *work) trace_xprtrdma_mr_recycle(mr); - trace_xprtrdma_dma_unmap(mr); + trace_xprtrdma_mr_unmap(mr); ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, mr->mr_sg, mr->mr_nents, mr->mr_dir); @@ -234,7 +234,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, mr->mr_sg, i, mr->mr_dir); if (!mr->mr_nents) goto out_dmamap_err; - trace_xprtrdma_dma_map(mr); + trace_xprtrdma_mr_map(mr); for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++) dma_pages[i] = sg_dma_address(&mr->mr_sg[i]); @@ -295,7 +295,7 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) list_for_each_entry(mr, mrs, mr_list) { dprintk("RPC: %s: unmapping fmr %p\n", __func__, &mr->fmr); - trace_xprtrdma_localinv(mr); + trace_xprtrdma_mr_localinv(mr); list_add_tail(&mr->fmr.fm_mr->list, &unmap_list); } r_xprt->rx_stats.local_inv_needed++; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 6594627e3c7d..fc6378cc0c1c 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -123,7 +123,7 @@ frwr_mr_recycle_worker(struct work_struct *work) trace_xprtrdma_mr_recycle(mr); if (state != FRWR_FLUSHED_LI) { - trace_xprtrdma_dma_unmap(mr); + trace_xprtrdma_mr_unmap(mr); ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, mr->mr_sg, mr->mr_nents, mr->mr_dir); } @@ -384,7 +384,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir); if (!mr->mr_nents) goto out_dmamap_err; - trace_xprtrdma_dma_map(mr); + trace_xprtrdma_mr_map(mr); ibmr = frwr->fr_mr; n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); @@ -466,7 +466,7 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) list_for_each_entry(mr, mrs, mr_list) if (mr->mr_handle == rep->rr_inv_rkey) { list_del_init(&mr->mr_list); - trace_xprtrdma_remoteinv(mr); + trace_xprtrdma_mr_remoteinv(mr); mr->frwr.fr_state = FRWR_IS_INVALID; rpcrdma_mr_unmap_and_put(mr); break; /* only one invalidated MR per RPC */ @@ -503,7 +503,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) mr->frwr.fr_state = FRWR_IS_INVALID; frwr = &mr->frwr; - trace_xprtrdma_localinv(mr); + trace_xprtrdma_mr_localinv(mr); frwr->fr_cqe.done = frwr_wc_localinv; last = &frwr->fr_invwr; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 88fe75e6d41a..3a9a62d28283 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1288,7 +1288,7 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr) { struct rpcrdma_xprt *r_xprt = mr->mr_xprt; - trace_xprtrdma_dma_unmap(mr); + trace_xprtrdma_mr_unmap(mr); ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, mr->mr_sg, mr->mr_nents, mr->mr_dir); __rpcrdma_mr_put(&r_xprt->rx_buf, mr); -- cgit v1.2.3 From ae38288eb73c52e45917fe7d05d34b84a14a7930 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Oct 2018 14:25:47 -0400 Subject: xprtrdma: Rename rpcrdma_conn_upcall Clean up: Use a function name that is consistent with the RDMA core API and with other consumers. Because this is a function that is invoked from outside the rpcrdma.ko module, add an appropriate documenting comment. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 2 +- net/sunrpc/xprtrdma/verbs.c | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 89e017b0f0ec..3b9de5b6b725 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -306,7 +306,7 @@ DECLARE_EVENT_CLASS(xprtrdma_cb_event, ** Connection events **/ -TRACE_EVENT(xprtrdma_conn_upcall, +TRACE_EVENT(xprtrdma_cm_event, TP_PROTO( const struct rpcrdma_xprt *r_xprt, struct rdma_cm_event *event diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3a9a62d28283..7bf0e6529f41 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -219,15 +219,25 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, rpcrdma_set_max_header_sizes(r_xprt); } +/** + * rpcrdma_cm_event_handler - Handle RDMA CM events + * @id: rdma_cm_id on which an event has occurred + * @event: details of the event + * + * Called with @id's mutex held. Returns 1 if caller should + * destroy @id, otherwise 0. + */ static int -rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) +rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { struct rpcrdma_xprt *xprt = id->context; struct rpcrdma_ia *ia = &xprt->rx_ia; struct rpcrdma_ep *ep = &xprt->rx_ep; int connstate = 0; - trace_xprtrdma_conn_upcall(xprt, event); + might_sleep(); + + trace_xprtrdma_cm_event(xprt, event); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: case RDMA_CM_EVENT_ROUTE_RESOLVED: @@ -308,7 +318,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) init_completion(&ia->ri_done); init_completion(&ia->ri_remove_done); - id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_conn_upcall, + id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_cm_event_handler, xprt, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(id)) { rc = PTR_ERR(id); -- cgit v1.2.3 From f9521d53e804b9721c2829858f6d5bf6f470e734 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Oct 2018 14:26:13 -0400 Subject: xprtrdma: Rename rpcrdma_qp_async_error_upcall Clean up: Use a function name that is consistent with the RDMA core API and with other consumers. Because this is a function that is invoked from outside the rpcrdma.ko module, add an appropriate documenting comment. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 2 +- net/sunrpc/xprtrdma/verbs.c | 14 +++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 3b9de5b6b725..a4d9ff9c36d4 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -377,7 +377,7 @@ DEFINE_RXPRT_EVENT(xprtrdma_reinsert); DEFINE_RXPRT_EVENT(xprtrdma_reconnect); DEFINE_RXPRT_EVENT(xprtrdma_inject_dsc); -TRACE_EVENT(xprtrdma_qp_error, +TRACE_EVENT(xprtrdma_qp_event, TP_PROTO( const struct rpcrdma_xprt *r_xprt, const struct ib_event *event diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index abbd3cdc259a..b62260aa348b 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -127,14 +127,22 @@ rpcrdma_disconnect_worker(struct work_struct *work) xprt_force_disconnect(&r_xprt->rx_xprt); } +/** + * rpcrdma_qp_event_handler - Handle one QP event (error notification) + * @event: details of the event + * @context: ep that owns QP where event occurred + * + * Called from the RDMA provider (device driver) possibly in an interrupt + * context. + */ static void -rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) +rpcrdma_qp_event_handler(struct ib_event *event, void *context) { struct rpcrdma_ep *ep = context; struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt, rx_ep); - trace_xprtrdma_qp_error(r_xprt, event); + trace_xprtrdma_qp_event(r_xprt, event); pr_err("rpcrdma: %s on device %s ep %p\n", ib_event_msg(event->event), event->device->name, context); @@ -547,7 +555,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, if (rc) return rc; - ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; + ep->rep_attr.event_handler = rpcrdma_qp_event_handler; ep->rep_attr.qp_context = ep; ep->rep_attr.srq = NULL; ep->rep_attr.cap.max_send_sge = max_sge; -- cgit v1.2.3 From 470443e0b379b070305629f911cc09562bdf324f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Oct 2018 14:26:51 -0400 Subject: xprtrdma: Squelch a sparse warning linux/include/trace/events/rpcrdma.h:501:1: warning: expression using sizeof bool linux/include/trace/events/rpcrdma.h:501:1: warning: odd constant _Bool cast (ffffffffffffffff becomes 1) Fixes: ab03eff58eb5 ("xprtrdma: Add trace points in RPC Call ... ") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index a4d9ff9c36d4..b093058f78aa 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -509,7 +509,7 @@ TRACE_EVENT(xprtrdma_post_send, TP_STRUCT__entry( __field(const void *, req) __field(int, num_sge) - __field(bool, signaled) + __field(int, signaled) __field(int, status) ), -- cgit v1.2.3 From 07d02a67b7faae56e184f6c35f78de47f06da37f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 12 Oct 2018 13:28:26 -0400 Subject: SUNRPC: Simplify lookup code We no longer need to worry about whether or not the entry is hashed in order to figure out if the contents are valid. We only care whether or not the refcount is non-zero. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 12 +++++------- net/sunrpc/auth.c | 19 ++++++++----------- 2 files changed, 13 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 2c97a3933ef9..a7fc8f5a2dad 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -206,11 +206,11 @@ bool rpcauth_cred_key_to_expire(struct rpc_auth *, struct rpc_cred *); char * rpcauth_stringify_acceptor(struct rpc_cred *); static inline -struct rpc_cred * get_rpccred(struct rpc_cred *cred) +struct rpc_cred *get_rpccred(struct rpc_cred *cred) { - if (cred != NULL) - atomic_inc(&cred->cr_count); - return cred; + if (cred != NULL && atomic_inc_not_zero(&cred->cr_count)) + return cred; + return NULL; } /** @@ -226,9 +226,7 @@ struct rpc_cred * get_rpccred(struct rpc_cred *cred) static inline struct rpc_cred * get_rpccred_rcu(struct rpc_cred *cred) { - if (atomic_inc_not_zero(&cred->cr_count)) - return cred; - return NULL; + return get_rpccred(cred); } #endif /* __KERNEL__ */ diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index c1576b110974..77748e572686 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -588,19 +588,15 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, if (!entry->cr_ops->crmatch(acred, entry, flags)) continue; if (flags & RPCAUTH_LOOKUP_RCU) { - if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) && - !test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags)) - cred = entry; + if (test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags) || + atomic_read(&entry->cr_count) == 0) + continue; + cred = entry; break; } - spin_lock(&cache->lock); - if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) { - spin_unlock(&cache->lock); - continue; - } cred = get_rpccred(entry); - spin_unlock(&cache->lock); - break; + if (cred) + break; } rcu_read_unlock(); @@ -621,7 +617,8 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, if (!entry->cr_ops->crmatch(acred, entry, flags)) continue; cred = get_rpccred(entry); - break; + if (cred) + break; } if (cred == NULL) { cred = new; -- cgit v1.2.3 From 79b181810285a6b9b7a1aed25c365c9e1782e22a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 14 Oct 2018 10:34:31 -0400 Subject: SUNRPC: Convert auth creds to use refcount_t Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 4 ++-- net/sunrpc/auth.c | 14 +++++++------- net/sunrpc/auth_null.c | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index a7fc8f5a2dad..a71d4bd191e7 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -67,7 +67,7 @@ struct rpc_cred { const struct rpc_credops *cr_ops; unsigned long cr_expire; /* when to gc */ unsigned long cr_flags; /* various flags */ - atomic_t cr_count; /* ref count */ + refcount_t cr_count; /* ref count */ kuid_t cr_uid; @@ -208,7 +208,7 @@ char * rpcauth_stringify_acceptor(struct rpc_cred *); static inline struct rpc_cred *get_rpccred(struct rpc_cred *cred) { - if (cred != NULL && atomic_inc_not_zero(&cred->cr_count)) + if (cred != NULL && refcount_inc_not_zero(&cred->cr_count)) return cred; return NULL; } diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 77748e572686..4903eda5dd61 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -495,7 +495,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) if (nr_to_scan-- == 0) break; - if (atomic_read(&cred->cr_count) > 1) { + if (refcount_read(&cred->cr_count) > 1) { rpcauth_lru_remove_locked(cred); continue; } @@ -589,7 +589,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, continue; if (flags & RPCAUTH_LOOKUP_RCU) { if (test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags) || - atomic_read(&entry->cr_count) == 0) + refcount_read(&entry->cr_count) == 0) continue; cred = entry; break; @@ -623,7 +623,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, if (cred == NULL) { cred = new; set_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags); - atomic_inc(&cred->cr_count); + refcount_inc(&cred->cr_count); hlist_add_head_rcu(&cred->cr_hash, &cache->hashtable[nr]); } else list_add_tail(&new->cr_lru, &free); @@ -670,7 +670,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, { INIT_HLIST_NODE(&cred->cr_hash); INIT_LIST_HEAD(&cred->cr_lru); - atomic_set(&cred->cr_count, 1); + refcount_set(&cred->cr_count, 1); cred->cr_auth = auth; cred->cr_ops = ops; cred->cr_expire = jiffies; @@ -739,9 +739,9 @@ put_rpccred(struct rpc_cred *cred) if (cred == NULL) return; rcu_read_lock(); - if (atomic_dec_and_test(&cred->cr_count)) + if (refcount_dec_and_test(&cred->cr_count)) goto destroy; - if (atomic_read(&cred->cr_count) != 1 || + if (refcount_read(&cred->cr_count) != 1 || !test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags)) goto out; if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) { @@ -752,7 +752,7 @@ put_rpccred(struct rpc_cred *cred) rpcauth_lru_remove(cred); } else if (rpcauth_unhash_cred(cred)) { rpcauth_lru_remove(cred); - if (atomic_dec_and_test(&cred->cr_count)) + if (refcount_dec_and_test(&cred->cr_count)) goto destroy; } out: diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index a7c00b4959f3..ea816d7000a4 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -138,6 +138,6 @@ struct rpc_cred null_cred = { .cr_lru = LIST_HEAD_INIT(null_cred.cr_lru), .cr_auth = &null_auth, .cr_ops = &null_credops, - .cr_count = ATOMIC_INIT(2), + .cr_count = REFCOUNT_INIT(2), .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE, }; -- cgit v1.2.3 From 331bc71cb1751d78f6807ad8e6162b07c67cdd1b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 14 Oct 2018 10:40:29 -0400 Subject: SUNRPC: Convert the auth cred cache to use refcount_t Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 2 +- net/sunrpc/auth.c | 2 +- net/sunrpc/auth_generic.c | 2 +- net/sunrpc/auth_gss/auth_gss.c | 4 ++-- net/sunrpc/auth_null.c | 4 ++-- net/sunrpc/auth_unix.c | 4 ++-- 6 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index a71d4bd191e7..c4db9424b63b 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -100,7 +100,7 @@ struct rpc_auth { * differ from the flavor in * au_ops->au_flavor in gss * case) */ - atomic_t au_count; /* Reference counter */ + refcount_t au_count; /* Reference counter */ struct rpc_cred_cache * au_credcache; /* per-flavor data */ diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 4903eda5dd61..ad8ead738981 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -284,7 +284,7 @@ EXPORT_SYMBOL_GPL(rpcauth_create); void rpcauth_release(struct rpc_auth *auth) { - if (!atomic_dec_and_test(&auth->au_count)) + if (!refcount_dec_and_test(&auth->au_count)) return; auth->au_ops->destroy(auth); } diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index f1df9837f1ac..d8831b988b1e 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -274,7 +274,7 @@ static const struct rpc_authops generic_auth_ops = { static struct rpc_auth generic_auth = { .au_ops = &generic_auth_ops, - .au_count = ATOMIC_INIT(0), + .au_count = REFCOUNT_INIT(1), }; static bool generic_key_to_expire(struct rpc_cred *cred) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index c898a7c75e84..30f970cdc7f6 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1058,7 +1058,7 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) auth->au_flavor = flavor; if (gss_pseudoflavor_to_datatouch(gss_auth->mech, flavor)) auth->au_flags |= RPCAUTH_AUTH_DATATOUCH; - atomic_set(&auth->au_count, 1); + refcount_set(&auth->au_count, 1); kref_init(&gss_auth->kref); err = rpcauth_init_credcache(auth); @@ -1187,7 +1187,7 @@ gss_auth_find_or_add_hashed(const struct rpc_auth_create_args *args, if (strcmp(gss_auth->target_name, args->target_name)) continue; } - if (!atomic_inc_not_zero(&gss_auth->rpc_auth.au_count)) + if (!refcount_inc_not_zero(&gss_auth->rpc_auth.au_count)) continue; goto out; } diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index ea816d7000a4..2694a1bc026b 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -21,7 +21,7 @@ static struct rpc_cred null_cred; static struct rpc_auth * nul_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) { - atomic_inc(&null_auth.au_count); + refcount_inc(&null_auth.au_count); return &null_auth; } @@ -119,7 +119,7 @@ struct rpc_auth null_auth = { .au_flags = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT, .au_ops = &authnull_ops, .au_flavor = RPC_AUTH_NULL, - .au_count = ATOMIC_INIT(0), + .au_count = REFCOUNT_INIT(1), }; static diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 185e56d4f9ae..4c1c7e56288f 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -34,7 +34,7 @@ unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) { dprintk("RPC: creating UNIX authenticator for client %p\n", clnt); - atomic_inc(&unix_auth.au_count); + refcount_inc(&unix_auth.au_count); return &unix_auth; } @@ -239,7 +239,7 @@ struct rpc_auth unix_auth = { .au_flags = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT, .au_ops = &authunix_ops, .au_flavor = RPC_AUTH_UNIX, - .au_count = ATOMIC_INIT(0), + .au_count = REFCOUNT_INIT(1), }; static -- cgit v1.2.3