From ae67bd3821bb0a54d97e7883d211196637d487a9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:44 -0400 Subject: SUNRPC: Fix up task signalling The RPC_TASK_KILLED flag should really not be set from another context because it can clobber data in the struct task when task->tk_flags is changed non-atomically. Let's therefore swap out RPC_TASK_KILLED with an atomic flag, and add a function to set that flag and safely wake up the task. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/sched.h | 6 ++++-- include/trace/events/sunrpc.h | 6 +++--- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 52d41d0c1ae1..90e06c67f455 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -125,7 +125,6 @@ struct rpc_task_setup { #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ #define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ -#define RPC_TASK_KILLED 0x0100 /* task was killed */ #define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ #define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */ #define RPC_TASK_SENT 0x0800 /* message was sent */ @@ -135,7 +134,6 @@ struct rpc_task_setup { #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) -#define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED) #define RPC_IS_SOFT(t) ((t)->tk_flags & (RPC_TASK_SOFT|RPC_TASK_TIMEOUT)) #define RPC_IS_SOFTCONN(t) ((t)->tk_flags & RPC_TASK_SOFTCONN) #define RPC_WAS_SENT(t) ((t)->tk_flags & RPC_TASK_SENT) @@ -146,6 +144,7 @@ struct rpc_task_setup { #define RPC_TASK_NEED_XMIT 3 #define RPC_TASK_NEED_RECV 4 #define RPC_TASK_MSG_PIN_WAIT 5 +#define RPC_TASK_SIGNALLED 6 #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) #define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) @@ -169,6 +168,8 @@ struct rpc_task_setup { #define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate) +#define RPC_SIGNALLED(t) test_bit(RPC_TASK_SIGNALLED, &(t)->tk_runstate) + /* * Task priorities. * Note: if you change these, you must also change @@ -217,6 +218,7 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req); void rpc_put_task(struct rpc_task *); void rpc_put_task_async(struct rpc_task *); +void rpc_signal_task(struct rpc_task *); void rpc_exit_task(struct rpc_task *); void rpc_exit(struct rpc_task *, int); void rpc_release_calldata(const struct rpc_call_ops *, void *); diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 7e899e635d33..5e3b77d9daa7 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -82,7 +82,6 @@ TRACE_DEFINE_ENUM(RPC_TASK_SWAPPER); TRACE_DEFINE_ENUM(RPC_CALL_MAJORSEEN); TRACE_DEFINE_ENUM(RPC_TASK_ROOTCREDS); TRACE_DEFINE_ENUM(RPC_TASK_DYNAMIC); -TRACE_DEFINE_ENUM(RPC_TASK_KILLED); TRACE_DEFINE_ENUM(RPC_TASK_SOFT); TRACE_DEFINE_ENUM(RPC_TASK_SOFTCONN); TRACE_DEFINE_ENUM(RPC_TASK_SENT); @@ -97,7 +96,6 @@ TRACE_DEFINE_ENUM(RPC_TASK_NO_RETRANS_TIMEOUT); { RPC_CALL_MAJORSEEN, "MAJORSEEN" }, \ { RPC_TASK_ROOTCREDS, "ROOTCREDS" }, \ { RPC_TASK_DYNAMIC, "DYNAMIC" }, \ - { RPC_TASK_KILLED, "KILLED" }, \ { RPC_TASK_SOFT, "SOFT" }, \ { RPC_TASK_SOFTCONN, "SOFTCONN" }, \ { RPC_TASK_SENT, "SENT" }, \ @@ -111,6 +109,7 @@ TRACE_DEFINE_ENUM(RPC_TASK_ACTIVE); TRACE_DEFINE_ENUM(RPC_TASK_NEED_XMIT); TRACE_DEFINE_ENUM(RPC_TASK_NEED_RECV); TRACE_DEFINE_ENUM(RPC_TASK_MSG_PIN_WAIT); +TRACE_DEFINE_ENUM(RPC_TASK_SIGNALLED); #define rpc_show_runstate(flags) \ __print_flags(flags, "|", \ @@ -119,7 +118,8 @@ TRACE_DEFINE_ENUM(RPC_TASK_MSG_PIN_WAIT); { (1UL << RPC_TASK_ACTIVE), "ACTIVE" }, \ { (1UL << RPC_TASK_NEED_XMIT), "NEED_XMIT" }, \ { (1UL << RPC_TASK_NEED_RECV), "NEED_RECV" }, \ - { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" }) + { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" }, \ + { (1UL << RPC_TASK_SIGNALLED), "SIGNALLED" }) DECLARE_EVENT_CLASS(rpc_task_running, -- cgit v1.2.3 From 8ba6a92d0182091e0c2fa15c1a5b5458bac25fc3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:46 -0400 Subject: SUNRPC: Refactor xprt_request_wait_receive() Convert the transport callback to actually put the request to sleep instead of just setting a timeout. This is in preparation for rpc_sleep_on_timeout(). Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 6 +-- net/sunrpc/xprt.c | 79 ++++++++++++++++-------------- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 2 +- net/sunrpc/xprtrdma/transport.c | 2 +- net/sunrpc/xprtsock.c | 8 +-- 5 files changed, 51 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 3a391544299e..a6d9fce7f20e 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -143,7 +143,7 @@ struct rpc_xprt_ops { void (*buf_free)(struct rpc_task *task); void (*prepare_request)(struct rpc_rqst *req); int (*send_request)(struct rpc_rqst *req); - void (*set_retrans_timeout)(struct rpc_task *task); + void (*wait_for_reply_request)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); void (*release_request)(struct rpc_task *task); void (*close)(struct rpc_xprt *xprt); @@ -378,8 +378,8 @@ xprt_disable_swap(struct rpc_xprt *xprt) int xprt_register_transport(struct xprt_class *type); int xprt_unregister_transport(struct xprt_class *type); int xprt_load_transport(const char *); -void xprt_set_retrans_timeout_def(struct rpc_task *task); -void xprt_set_retrans_timeout_rtt(struct rpc_task *task); +void xprt_wait_for_reply_request_def(struct rpc_task *task); +void xprt_wait_for_reply_request_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); void xprt_wait_for_buffer_space(struct rpc_xprt *xprt); bool xprt_write_space(struct rpc_xprt *xprt); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 3a4156cb0134..5afffa669d04 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -554,41 +554,6 @@ bool xprt_write_space(struct rpc_xprt *xprt) } EXPORT_SYMBOL_GPL(xprt_write_space); -/** - * xprt_set_retrans_timeout_def - set a request's retransmit timeout - * @task: task whose timeout is to be set - * - * Set a request's retransmit timeout based on the transport's - * default timeout parameters. Used by transports that don't adjust - * the retransmit timeout based on round-trip time estimation. - */ -void xprt_set_retrans_timeout_def(struct rpc_task *task) -{ - task->tk_timeout = task->tk_rqstp->rq_timeout; -} -EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def); - -/** - * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout - * @task: task whose timeout is to be set - * - * Set a request's retransmit timeout using the RTT estimator. - */ -void xprt_set_retrans_timeout_rtt(struct rpc_task *task) -{ - int timer = task->tk_msg.rpc_proc->p_timer; - struct rpc_clnt *clnt = task->tk_client; - struct rpc_rtt *rtt = clnt->cl_rtt; - struct rpc_rqst *req = task->tk_rqstp; - unsigned long max_timeout = clnt->cl_timeout->to_maxval; - - task->tk_timeout = rpc_calc_rto(rtt, timer); - task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries; - if (task->tk_timeout > max_timeout || task->tk_timeout == 0) - task->tk_timeout = max_timeout; -} -EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt); - static void xprt_reset_majortimeo(struct rpc_rqst *req) { const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout; @@ -1102,6 +1067,47 @@ static void xprt_timer(struct rpc_task *task) task->tk_status = 0; } +/** + * xprt_wait_for_reply_request_def - wait for reply + * @task: pointer to rpc_task + * + * Set a request's retransmit timeout based on the transport's + * default timeout parameters. Used by transports that don't adjust + * the retransmit timeout based on round-trip time estimation, + * and put the task to sleep on the pending queue. + */ +void xprt_wait_for_reply_request_def(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + + task->tk_timeout = req->rq_timeout; + rpc_sleep_on(&req->rq_xprt->pending, task, xprt_timer); +} +EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_def); + +/** + * xprt_wait_for_reply_request_rtt - wait for reply using RTT estimator + * @task: pointer to rpc_task + * + * Set a request's retransmit timeout using the RTT estimator, + * and put the task to sleep on the pending queue. + */ +void xprt_wait_for_reply_request_rtt(struct rpc_task *task) +{ + int timer = task->tk_msg.rpc_proc->p_timer; + struct rpc_clnt *clnt = task->tk_client; + struct rpc_rtt *rtt = clnt->cl_rtt; + struct rpc_rqst *req = task->tk_rqstp; + unsigned long max_timeout = clnt->cl_timeout->to_maxval; + + task->tk_timeout = rpc_calc_rto(rtt, timer); + task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries; + if (task->tk_timeout > max_timeout || task->tk_timeout == 0) + task->tk_timeout = max_timeout; + rpc_sleep_on(&req->rq_xprt->pending, task, xprt_timer); +} +EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_rtt); + /** * xprt_request_wait_receive - wait for the reply to an RPC request * @task: RPC task about to send a request @@ -1121,8 +1127,7 @@ void xprt_request_wait_receive(struct rpc_task *task) */ spin_lock(&xprt->queue_lock); if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) { - xprt->ops->set_retrans_timeout(task); - rpc_sleep_on(&xprt->pending, task, xprt_timer); + xprt->ops->wait_for_reply_request(task); /* * Send an extra queue wakeup call if the * connection was dropped in case the call to diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 907464c2a9f0..bed57d8b5c19 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -261,7 +261,7 @@ static const struct rpc_xprt_ops xprt_rdma_bc_procs = { .buf_alloc = xprt_rdma_bc_allocate, .buf_free = xprt_rdma_bc_free, .send_request = xprt_rdma_bc_send_request, - .set_retrans_timeout = xprt_set_retrans_timeout_def, + .wait_for_reply_request = xprt_wait_for_reply_request_def, .close = xprt_rdma_bc_close, .destroy = xprt_rdma_bc_put, .print_stats = xprt_rdma_print_stats diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 5d261353bd90..7e73abe01cfe 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -815,7 +815,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = { .alloc_slot = xprt_rdma_alloc_slot, .free_slot = xprt_rdma_free_slot, .release_request = xprt_release_rqst_cong, /* ditto */ - .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ + .wait_for_reply_request = xprt_wait_for_reply_request_def, /* ditto */ .timer = xprt_rdma_timer, .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ .set_port = xprt_rdma_set_port, diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 732d4b57411a..b4b4b8db143c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2690,7 +2690,7 @@ static const struct rpc_xprt_ops xs_local_ops = { .buf_free = rpc_free, .prepare_request = xs_stream_prepare_request, .send_request = xs_local_send_request, - .set_retrans_timeout = xprt_set_retrans_timeout_def, + .wait_for_reply_request = xprt_wait_for_reply_request_def, .close = xs_close, .destroy = xs_destroy, .print_stats = xs_local_print_stats, @@ -2710,7 +2710,7 @@ static const struct rpc_xprt_ops xs_udp_ops = { .buf_alloc = rpc_malloc, .buf_free = rpc_free, .send_request = xs_udp_send_request, - .set_retrans_timeout = xprt_set_retrans_timeout_rtt, + .wait_for_reply_request = xprt_wait_for_reply_request_rtt, .timer = xs_udp_timer, .release_request = xprt_release_rqst_cong, .close = xs_close, @@ -2733,7 +2733,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = { .buf_free = rpc_free, .prepare_request = xs_stream_prepare_request, .send_request = xs_tcp_send_request, - .set_retrans_timeout = xprt_set_retrans_timeout_def, + .wait_for_reply_request = xprt_wait_for_reply_request_def, .close = xs_tcp_shutdown, .destroy = xs_destroy, .set_connect_timeout = xs_tcp_set_connect_timeout, @@ -2761,7 +2761,7 @@ static const struct rpc_xprt_ops bc_tcp_ops = { .buf_alloc = bc_malloc, .buf_free = bc_free, .send_request = bc_send_request, - .set_retrans_timeout = xprt_set_retrans_timeout_def, + .wait_for_reply_request = xprt_wait_for_reply_request_def, .close = bc_close, .destroy = bc_destroy, .print_stats = xs_tcp_print_stats, -- cgit v1.2.3 From 8357a9b60fe7500699a9dec540ca1c48df3cb455 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:48 -0400 Subject: SUNRPC: Remove unused argument 'action' from rpc_sleep_on_priority() None of the callers set the 'action' argument, so let's just remove it. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 2 +- include/linux/sunrpc/sched.h | 1 - net/sunrpc/sched.c | 7 +++---- 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 741ff8c9c6ed..222eccd8a715 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1008,7 +1008,7 @@ out_start: out_sleep: if (args->sa_privileged) rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task, - NULL, RPC_PRIORITY_PRIVILEGED); + RPC_PRIORITY_PRIVILEGED); else rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); spin_unlock(&tbl->slot_tbl_lock); diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 90e06c67f455..5d517578a018 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -231,7 +231,6 @@ void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, rpc_action action); void rpc_sleep_on_priority(struct rpc_wait_queue *, struct rpc_task *, - rpc_action action, int priority); void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq, struct rpc_wait_queue *queue, diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 8e96a841dd11..04170c08b2cf 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -409,18 +409,17 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, EXPORT_SYMBOL_GPL(rpc_sleep_on); void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, - rpc_action action, int priority) + int priority) { if (!rpc_sleep_check_activated(task)) return; - rpc_set_tk_callback(task, action); - + priority -= RPC_PRIORITY_LOW; /* * Protect the queue operations. */ spin_lock_bh(&q->lock); - __rpc_sleep_on_priority(q, task, priority - RPC_PRIORITY_LOW); + __rpc_sleep_on_priority(q, task, priority); spin_unlock_bh(&q->lock); } EXPORT_SYMBOL_GPL(rpc_sleep_on_priority); -- cgit v1.2.3 From 6b2e6856275d7b8d0acbf06d2e8da72e1a6bc857 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:49 -0400 Subject: SUNRPC: Add function rpc_sleep_on_timeout() Clean up the RPC task sleep interfaces by replacing the task->tk_timeout 'hidden parameter' to rpc_sleep_on() with a new function that takes an absolute timeout. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 18 +++++++---- include/linux/sunrpc/sched.h | 9 +++++- net/sunrpc/auth_gss/auth_gss.c | 5 ++- net/sunrpc/clnt.c | 1 - net/sunrpc/rpcb_clnt.c | 3 +- net/sunrpc/sched.c | 69 ++++++++++++++++++++++++++++++++++-------- net/sunrpc/xprt.c | 36 +++++++++++++--------- 7 files changed, 101 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 222eccd8a715..96b988689f0e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -978,10 +978,8 @@ int nfs4_setup_sequence(struct nfs_client *client, if (res->sr_slot != NULL) goto out_start; - if (session) { + if (session) tbl = &session->fc_slot_table; - task->tk_timeout = 0; - } spin_lock(&tbl->slot_tbl_lock); /* The state manager will wait until the slot table is empty */ @@ -990,9 +988,8 @@ int nfs4_setup_sequence(struct nfs_client *client, slot = nfs4_alloc_slot(tbl); if (IS_ERR(slot)) { - /* Try again in 1/4 second */ if (slot == ERR_PTR(-ENOMEM)) - task->tk_timeout = HZ >> 2; + goto out_sleep_timeout; goto out_sleep; } spin_unlock(&tbl->slot_tbl_lock); @@ -1004,7 +1001,16 @@ out_start: nfs41_sequence_res_init(res); rpc_call_start(task); return 0; - +out_sleep_timeout: + /* Try again in 1/4 second */ + if (args->sa_privileged) + rpc_sleep_on_priority_timeout(&tbl->slot_tbl_waitq, task, + jiffies + (HZ >> 2), RPC_PRIORITY_PRIVILEGED); + else + rpc_sleep_on_timeout(&tbl->slot_tbl_waitq, task, + NULL, jiffies + (HZ >> 2)); + spin_unlock(&tbl->slot_tbl_lock); + return -EAGAIN; out_sleep: if (args->sa_privileged) rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task, diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 5d517578a018..61ba533ec058 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -35,7 +35,6 @@ struct rpc_wait { struct list_head list; /* wait queue links */ struct list_head links; /* Links to related tasks */ struct list_head timer_list; /* Timer list */ - unsigned long expires; }; /* @@ -227,8 +226,16 @@ void rpc_execute(struct rpc_task *); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); void rpc_destroy_wait_queue(struct rpc_wait_queue *); +void rpc_sleep_on_timeout(struct rpc_wait_queue *queue, + struct rpc_task *task, + rpc_action action, + unsigned long timeout); void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, rpc_action action); +void rpc_sleep_on_priority_timeout(struct rpc_wait_queue *queue, + struct rpc_task *task, + unsigned long timeout, + int priority); void rpc_sleep_on_priority(struct rpc_wait_queue *, struct rpc_task *, int priority); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 3fd56c0c90ae..c055edfec55e 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -581,8 +581,8 @@ gss_refresh_upcall(struct rpc_task *task) /* XXX: warning on the first, under the assumption we * shouldn't normally hit this case on a refresh. */ warn_gssd(); - task->tk_timeout = 15*HZ; - rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL); + rpc_sleep_on_timeout(&pipe_version_rpc_waitqueue, + task, NULL, jiffies + (15 * HZ)); err = -EAGAIN; goto out; } @@ -595,7 +595,6 @@ gss_refresh_upcall(struct rpc_task *task) if (gss_cred->gc_upcall != NULL) rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL); else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { - task->tk_timeout = 0; gss_cred->gc_upcall = gss_msg; /* gss_upcall_callback will release the reference to gss_upcall_msg */ refcount_inc(&gss_msg->count); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index af1dfc2a8fb1..216d5e5e3b54 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1851,7 +1851,6 @@ call_bind(struct rpc_task *task) if (!xprt_prepare_transmit(task)) return; - task->tk_timeout = xprt->bind_timeout; xprt->ops->rpcbind(task); } diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 41a971ac1c63..18b0cf2a923f 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -694,7 +694,8 @@ void rpcb_getport_async(struct rpc_task *task) /* Put self on the wait queue to ensure we get notified if * some other task is already attempting to bind the port */ - rpc_sleep_on(&xprt->binding, task, NULL); + rpc_sleep_on_timeout(&xprt->binding, task, + NULL, jiffies + xprt->bind_timeout); if (xprt_test_and_set_binding(xprt)) { dprintk("RPC: %5u %s: waiting for another binder\n", diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 04170c08b2cf..7e0f7b83262f 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -66,7 +66,7 @@ struct workqueue_struct *xprtiod_workqueue __read_mostly; static void __rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task) { - if (task->tk_timeout == 0) + if (list_empty(&task->u.tk_wait.timer_list)) return; dprintk("RPC: %5u disabling timer\n", task->tk_pid); task->tk_timeout = 0; @@ -86,17 +86,15 @@ rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires) * Set up a timer for the current task. */ static void -__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) +__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task, + unsigned long timeout) { - if (!task->tk_timeout) - return; - dprintk("RPC: %5u setting alarm for %u ms\n", - task->tk_pid, jiffies_to_msecs(task->tk_timeout)); + task->tk_pid, jiffies_to_msecs(timeout - jiffies)); - task->u.tk_wait.expires = jiffies + task->tk_timeout; - if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires)) - rpc_set_queue_timer(queue, task->u.tk_wait.expires); + task->tk_timeout = timeout; + if (list_empty(&queue->timer_list.list) || time_before(timeout, queue->timer_list.expires)) + rpc_set_queue_timer(queue, timeout); list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); } @@ -188,6 +186,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, if (RPC_IS_QUEUED(task)) return; + INIT_LIST_HEAD(&task->u.tk_wait.timer_list); if (RPC_IS_PRIORITY(queue)) __rpc_add_wait_queue_priority(queue, task, queue_priority); else if (RPC_IS_SWAPPER(task)) @@ -371,7 +370,17 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, __rpc_add_wait_queue(q, task, queue_priority); - __rpc_add_timer(q, task); +} + +static void __rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q, + struct rpc_task *task, unsigned long timeout, + unsigned char queue_priority) +{ + if (time_is_after_jiffies(timeout)) { + __rpc_sleep_on_priority(q, task, queue_priority); + __rpc_add_timer(q, task, timeout); + } else + task->tk_status = -ETIMEDOUT; } static void rpc_set_tk_callback(struct rpc_task *task, rpc_action action) @@ -391,6 +400,23 @@ static bool rpc_sleep_check_activated(struct rpc_task *task) return true; } +void rpc_sleep_on_timeout(struct rpc_wait_queue *q, struct rpc_task *task, + rpc_action action, unsigned long timeout) +{ + if (!rpc_sleep_check_activated(task)) + return; + + rpc_set_tk_callback(task, action); + + /* + * Protect the queue operations. + */ + spin_lock_bh(&q->lock); + __rpc_sleep_on_priority_timeout(q, task, timeout, task->tk_priority); + spin_unlock_bh(&q->lock); +} +EXPORT_SYMBOL_GPL(rpc_sleep_on_timeout); + void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action) { @@ -399,6 +425,7 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, rpc_set_tk_callback(task, action); + WARN_ON_ONCE(task->tk_timeout != 0); /* * Protect the queue operations. */ @@ -408,12 +435,29 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, } EXPORT_SYMBOL_GPL(rpc_sleep_on); +void rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q, + struct rpc_task *task, unsigned long timeout, int priority) +{ + if (!rpc_sleep_check_activated(task)) + return; + + priority -= RPC_PRIORITY_LOW; + /* + * Protect the queue operations. + */ + spin_lock_bh(&q->lock); + __rpc_sleep_on_priority_timeout(q, task, timeout, priority); + spin_unlock_bh(&q->lock); +} +EXPORT_SYMBOL_GPL(rpc_sleep_on_priority_timeout); + void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, int priority) { if (!rpc_sleep_check_activated(task)) return; + WARN_ON_ONCE(task->tk_timeout != 0); priority -= RPC_PRIORITY_LOW; /* * Protect the queue operations. @@ -711,7 +755,7 @@ static void __rpc_queue_timer_fn(struct timer_list *t) spin_lock(&queue->lock); expires = now = jiffies; list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) { - timeo = task->u.tk_wait.expires; + timeo = task->tk_timeout; if (time_after_eq(now, timeo)) { dprintk("RPC: %5u timeout\n", task->tk_pid); task->tk_status = -ETIMEDOUT; @@ -737,8 +781,7 @@ static void __rpc_atrun(struct rpc_task *task) */ void rpc_delay(struct rpc_task *task, unsigned long delay) { - task->tk_timeout = delay; - rpc_sleep_on(&delay_queue, task, __rpc_atrun); + rpc_sleep_on_timeout(&delay_queue, task, __rpc_atrun, jiffies + delay); } EXPORT_SYMBOL_GPL(rpc_delay); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 5afffa669d04..7c3623b17493 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -209,9 +209,12 @@ out_unlock: out_sleep: dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); - task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0; task->tk_status = -EAGAIN; - rpc_sleep_on(&xprt->sending, task, NULL); + if (RPC_IS_SOFT(task)) + rpc_sleep_on_timeout(&xprt->sending, task, NULL, + jiffies + req->rq_timeout); + else + rpc_sleep_on(&xprt->sending, task, NULL); return 0; } EXPORT_SYMBOL_GPL(xprt_reserve_xprt); @@ -273,9 +276,12 @@ out_unlock: xprt_clear_locked(xprt); out_sleep: dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); - task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0; task->tk_status = -EAGAIN; - rpc_sleep_on(&xprt->sending, task, NULL); + if (RPC_IS_SOFT(task)) + rpc_sleep_on_timeout(&xprt->sending, task, NULL, + jiffies + req->rq_timeout); + else + rpc_sleep_on(&xprt->sending, task, NULL); return 0; } EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); @@ -787,9 +793,9 @@ void xprt_connect(struct rpc_task *task) xprt->ops->close(xprt); if (!xprt_connected(xprt)) { - task->tk_timeout = task->tk_rqstp->rq_timeout; task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie; - rpc_sleep_on(&xprt->pending, task, NULL); + rpc_sleep_on_timeout(&xprt->pending, task, NULL, + jiffies + task->tk_rqstp->rq_timeout); if (test_bit(XPRT_CLOSING, &xprt->state)) return; @@ -1080,8 +1086,8 @@ void xprt_wait_for_reply_request_def(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - task->tk_timeout = req->rq_timeout; - rpc_sleep_on(&req->rq_xprt->pending, task, xprt_timer); + rpc_sleep_on_timeout(&req->rq_xprt->pending, task, xprt_timer, + jiffies + req->rq_timeout); } EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_def); @@ -1099,12 +1105,14 @@ void xprt_wait_for_reply_request_rtt(struct rpc_task *task) struct rpc_rtt *rtt = clnt->cl_rtt; struct rpc_rqst *req = task->tk_rqstp; unsigned long max_timeout = clnt->cl_timeout->to_maxval; + unsigned long timeout; - task->tk_timeout = rpc_calc_rto(rtt, timer); - task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries; - if (task->tk_timeout > max_timeout || task->tk_timeout == 0) - task->tk_timeout = max_timeout; - rpc_sleep_on(&req->rq_xprt->pending, task, xprt_timer); + timeout = rpc_calc_rto(rtt, timer); + timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries; + if (timeout > max_timeout || timeout == 0) + timeout = max_timeout; + rpc_sleep_on_timeout(&req->rq_xprt->pending, task, xprt_timer, + jiffies + timeout); } EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_rtt); @@ -1656,7 +1664,6 @@ void xprt_reserve(struct rpc_task *task) if (task->tk_rqstp != NULL) return; - task->tk_timeout = 0; task->tk_status = -EAGAIN; if (!xprt_throttle_congested(xprt, task)) xprt_do_reserve(xprt, task); @@ -1679,7 +1686,6 @@ void xprt_retry_reserve(struct rpc_task *task) if (task->tk_rqstp != NULL) return; - task->tk_timeout = 0; task->tk_status = -EAGAIN; xprt_do_reserve(xprt, task); } -- cgit v1.2.3 From 5efd1876e61fe61b61e2d056782027c11bcd0982 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:50 -0400 Subject: SUNRPC: Fix up tracking of timeouts Add a helper to ensure that debugfs and friends print out the correct current task timeout value. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/sched.h | 1 + include/trace/events/sunrpc.h | 2 +- net/sunrpc/clnt.c | 2 +- net/sunrpc/debugfs.c | 2 +- net/sunrpc/sched.c | 14 ++++++++++++++ 5 files changed, 18 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 61ba533ec058..c5ad02c7a4b3 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -226,6 +226,7 @@ void rpc_execute(struct rpc_task *); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); void rpc_destroy_wait_queue(struct rpc_wait_queue *); +unsigned long rpc_task_timeout(const struct rpc_task *task); void rpc_sleep_on_timeout(struct rpc_wait_queue *queue, struct rpc_task *task, rpc_action action, diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 5e3b77d9daa7..dd301db64521 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -186,7 +186,7 @@ DECLARE_EVENT_CLASS(rpc_task_queued, __entry->client_id = task->tk_client ? task->tk_client->cl_clid : -1; __entry->task_id = task->tk_pid; - __entry->timeout = task->tk_timeout; + __entry->timeout = rpc_task_timeout(task); __entry->runstate = task->tk_runstate; __entry->status = task->tk_status; __entry->flags = task->tk_flags; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 216d5e5e3b54..b25f317d0ee2 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2874,7 +2874,7 @@ static void rpc_show_task(const struct rpc_clnt *clnt, printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n", task->tk_pid, task->tk_flags, task->tk_status, - clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops, + clnt, task->tk_rqstp, rpc_task_timeout(task), task->tk_ops, clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task), task->tk_action, rpc_waitq); } diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index 19bb356230ed..95ebd76b132d 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -33,7 +33,7 @@ tasks_show(struct seq_file *f, void *v) seq_printf(f, "%5u %04x %6d 0x%x 0x%x %8ld %ps %sv%u %s a:%ps q:%s\n", task->tk_pid, task->tk_flags, task->tk_status, - clnt->cl_clid, xid, task->tk_timeout, task->tk_ops, + clnt->cl_clid, xid, rpc_task_timeout(task), task->tk_ops, clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task), task->tk_action, rpc_waitq); return 0; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 7e0f7b83262f..40944c34a9e4 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -58,6 +58,20 @@ static struct rpc_wait_queue delay_queue; struct workqueue_struct *rpciod_workqueue __read_mostly; struct workqueue_struct *xprtiod_workqueue __read_mostly; +unsigned long +rpc_task_timeout(const struct rpc_task *task) +{ + unsigned long timeout = READ_ONCE(task->tk_timeout); + + if (timeout != 0) { + unsigned long now = jiffies; + if (time_before(now, timeout)) + return timeout - now; + } + return 0; +} +EXPORT_SYMBOL_GPL(rpc_task_timeout); + /* * Disable the timer for a given RPC task. Should be called with * queue->lock and bh_disabled in order to avoid races within -- cgit v1.2.3 From 24a9d9a21e568f494198eea2bb864e0b6c593051 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:51 -0400 Subject: SUNRPC: Simplify queue timeouts using timer_reduce() Simplify the setting of queue timeouts by using the timer_reduce() function. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/sched.h | 1 - net/sunrpc/sched.c | 6 ++---- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index c5ad02c7a4b3..bf9d6ee7f00f 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -183,7 +183,6 @@ struct rpc_task_setup { struct rpc_timer { struct timer_list timer; struct list_head list; - unsigned long expires; }; /* diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 40944c34a9e4..301e0f7f1dc9 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -92,8 +92,7 @@ __rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task) static void rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires) { - queue->timer_list.expires = expires; - mod_timer(&queue->timer_list.timer, expires); + timer_reduce(&queue->timer_list.timer, expires); } /* @@ -107,8 +106,7 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task, task->tk_pid, jiffies_to_msecs(timeout - jiffies)); task->tk_timeout = timeout; - if (list_empty(&queue->timer_list.list) || time_before(timeout, queue->timer_list.expires)) - rpc_set_queue_timer(queue, timeout); + rpc_set_queue_timer(queue, timeout); list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); } -- cgit v1.2.3 From 5ad64b36dda962797ce3ed579a27189ec7482d0d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:54 -0400 Subject: SUNRPC: Add tracking of RPC level errors Add variables to track RPC level errors so that we can distinguish between issue that arose in the RPC transport layer as opposed to those arising from the reply message. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/sched.h | 2 ++ net/sunrpc/clnt.c | 40 +++++++++++++++++++++++++++------------- net/sunrpc/xprtsock.c | 1 + 3 files changed, 30 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index bf9d6ee7f00f..d0e451868f02 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -61,6 +61,8 @@ struct rpc_task { struct rpc_wait tk_wait; /* RPC wait */ } u; + int tk_rpc_status; /* Result of last RPC operation */ + /* * RPC call state */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b25f317d0ee2..315f9c9cb72d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1468,6 +1468,7 @@ static int __rpc_restart_call(struct rpc_task *task, void (*action)(struct rpc_task *)) { task->tk_status = 0; + task->tk_rpc_status = 0; task->tk_action = action; return 1; } @@ -1510,6 +1511,19 @@ const char return "no proc"; } +static void +__rpc_call_rpcerror(struct rpc_task *task, int tk_status, int rpc_status) +{ + task->tk_rpc_status = rpc_status; + rpc_exit(task, tk_status); +} + +static void +rpc_call_rpcerror(struct rpc_task *task, int status) +{ + __rpc_call_rpcerror(task, status, status); +} + /* * 0. Initial state * @@ -1574,7 +1588,7 @@ call_reserveresult(struct rpc_task *task) printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n", __func__, status); - rpc_exit(task, -EIO); + rpc_call_rpcerror(task, -EIO); return; } @@ -1602,7 +1616,7 @@ call_reserveresult(struct rpc_task *task) __func__, status); break; } - rpc_exit(task, status); + rpc_call_rpcerror(task, status); } /* @@ -1670,7 +1684,7 @@ call_refreshresult(struct rpc_task *task) } dprintk("RPC: %5u %s: refresh creds failed with error %d\n", task->tk_pid, __func__, status); - rpc_exit(task, status); + rpc_call_rpcerror(task, status); } /* @@ -1721,7 +1735,7 @@ call_allocate(struct rpc_task *task) if (status == 0) return; if (status != -ENOMEM) { - rpc_exit(task, status); + rpc_call_rpcerror(task, status); return; } @@ -1790,7 +1804,7 @@ call_encode(struct rpc_task *task) task->tk_action = call_refresh; break; default: - rpc_exit(task, task->tk_status); + rpc_call_rpcerror(task, task->tk_status); } return; } else { @@ -1931,7 +1945,7 @@ call_bind_status(struct rpc_task *task) task->tk_pid, -task->tk_status); } - rpc_exit(task, status); + rpc_call_rpcerror(task, status); return; retry_timeout: @@ -1966,7 +1980,7 @@ call_connect(struct rpc_task *task) if (task->tk_status < 0) return; if (task->tk_flags & RPC_TASK_NOCONNECT) { - rpc_exit(task, -ENOTCONN); + rpc_call_rpcerror(task, -ENOTCONN); return; } if (!xprt_prepare_transmit(task)) @@ -2026,7 +2040,7 @@ call_connect_status(struct rpc_task *task) task->tk_action = call_transmit; return; } - rpc_exit(task, status); + rpc_call_rpcerror(task, status); return; out_retry: /* Check for timeouts before looping back to call_bind */ @@ -2111,7 +2125,7 @@ call_transmit_status(struct rpc_task *task) if (!task->tk_msg.rpc_proc->p_proc) trace_xprt_ping(task->tk_xprt, task->tk_status); - rpc_exit(task, task->tk_status); + rpc_call_rpcerror(task, task->tk_status); return; } /* fall through */ @@ -2275,7 +2289,7 @@ call_status(struct rpc_task *task) rpc_check_timeout(task); return; out_exit: - rpc_exit(task, status); + rpc_call_rpcerror(task, status); } static bool @@ -2299,7 +2313,7 @@ rpc_check_timeout(struct rpc_task *task) task->tk_timeouts++; if (RPC_IS_SOFTCONN(task) && !rpc_check_connected(task->tk_rqstp)) { - rpc_exit(task, -ETIMEDOUT); + rpc_call_rpcerror(task, -ETIMEDOUT); return; } @@ -2310,9 +2324,9 @@ rpc_check_timeout(struct rpc_task *task) task->tk_xprt->servername); } if (task->tk_flags & RPC_TASK_TIMEOUT) - rpc_exit(task, -ETIMEDOUT); + rpc_call_rpcerror(task, -ETIMEDOUT); else - rpc_exit(task, -EIO); + __rpc_call_rpcerror(task, -EIO, -ETIMEDOUT); return; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b4b4b8db143c..c69951ed2ebc 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2017,6 +2017,7 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) * we'll need to figure out how to pass a namespace to * connect. */ + task->tk_rpc_status = -ENOTCONN; rpc_exit(task, -ENOTCONN); return; } -- cgit v1.2.3 From ae6ec918474597a13a2648c54b6f12fb8cf0a55e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:58 -0400 Subject: SUNRPC: Add the 'softerr' rpc_client flag Add the 'softerr' rpc client flag that sets the RPC_TASK_TIMEOUT flag on all new rpc tasks that are attached to that rpc client. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/clnt.h | 2 ++ net/sunrpc/clnt.c | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 98bc9883b230..943762acfcd4 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -50,6 +50,7 @@ struct rpc_clnt { struct rpc_iostats * cl_metrics; /* per-client statistics */ unsigned int cl_softrtry : 1,/* soft timeouts */ + cl_softerr : 1,/* Timeouts return errors */ cl_discrtry : 1,/* disconnect before retry */ cl_noretranstimeo: 1,/* No retransmit timeouts */ cl_autobind : 1,/* use getport() */ @@ -144,6 +145,7 @@ struct rpc_add_xprt_test { #define RPC_CLNT_CREATE_INFINITE_SLOTS (1UL << 7) #define RPC_CLNT_CREATE_NO_IDLE_TIMEOUT (1UL << 8) #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) +#define RPC_CLNT_CREATE_SOFTERR (1UL << 10) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b16322eb6c42..e933f1185317 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -484,8 +484,11 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, } clnt->cl_softrtry = 1; - if (args->flags & RPC_CLNT_CREATE_HARDRTRY) + if (args->flags & (RPC_CLNT_CREATE_HARDRTRY|RPC_CLNT_CREATE_SOFTERR)) { clnt->cl_softrtry = 0; + if (args->flags & RPC_CLNT_CREATE_SOFTERR) + clnt->cl_softerr = 1; + } if (args->flags & RPC_CLNT_CREATE_AUTOBIND) clnt->cl_autobind = 1; @@ -623,6 +626,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, /* Turn off autobind on clones */ new->cl_autobind = 0; new->cl_softrtry = clnt->cl_softrtry; + new->cl_softerr = clnt->cl_softerr; new->cl_noretranstimeo = clnt->cl_noretranstimeo; new->cl_discrtry = clnt->cl_discrtry; new->cl_chatty = clnt->cl_chatty; @@ -1001,6 +1005,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) atomic_inc(&clnt->cl_count); if (clnt->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; + if (clnt->cl_softerr) + task->tk_flags |= RPC_TASK_TIMEOUT; if (clnt->cl_noretranstimeo) task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; if (atomic_read(&clnt->cl_swapper)) -- cgit v1.2.3 From 7b1355b615c67e8fcbfb508e6baee83aed9dee96 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:59:00 -0400 Subject: NFS: Move internal constants out of uapi/linux/nfs_mount.h When the label says "for internal use only", then it doesn't belong in the 'uapi' subtree. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_fs_sb.h | 9 +++++++++ include/uapi/linux/nfs_mount.h | 9 --------- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index c827d31298cc..013ac5b54a09 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -139,6 +139,15 @@ struct nfs_server { struct nfs_iostats __percpu *io_stats; /* I/O statistics */ atomic_long_t writeback; /* number of writeback pages */ int flags; /* various flags */ + +/* The following are for internal use only. Also see uapi/linux/nfs_mount.h */ +#define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000 +#define NFS_MOUNT_LOOKUP_CACHE_NONE 0x20000 +#define NFS_MOUNT_NORESVPORT 0x40000 +#define NFS_MOUNT_LEGACY_INTERFACE 0x80000 +#define NFS_MOUNT_LOCAL_FLOCK 0x100000 +#define NFS_MOUNT_LOCAL_FCNTL 0x200000 + unsigned int caps; /* server capabilities */ unsigned int rsize; /* read size */ unsigned int rpages; /* read size (in pages) */ diff --git a/include/uapi/linux/nfs_mount.h b/include/uapi/linux/nfs_mount.h index e44e00616ab5..e3bcfc6aa3b0 100644 --- a/include/uapi/linux/nfs_mount.h +++ b/include/uapi/linux/nfs_mount.h @@ -66,13 +66,4 @@ struct nfs_mount_data { #define NFS_MOUNT_UNSHARED 0x8000 /* 5 */ #define NFS_MOUNT_FLAGMASK 0xFFFF -/* The following are for internal use only */ -#define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000 -#define NFS_MOUNT_LOOKUP_CACHE_NONE 0x20000 -#define NFS_MOUNT_NORESVPORT 0x40000 -#define NFS_MOUNT_LEGACY_INTERFACE 0x80000 - -#define NFS_MOUNT_LOCAL_FLOCK 0x100000 -#define NFS_MOUNT_LOCAL_FCNTL 0x200000 - #endif -- cgit v1.2.3 From 91a575e1a98451d12df713f267a9a210a9e5dcf9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:59:01 -0400 Subject: NFS: Add a mount option "softerr" to allow clients to see ETIMEDOUT errors Add a mount option that exposes the ETIMEDOUT errors that occur during soft timeouts to the application. This allows aware applications to distinguish between server disk IO errors and client timeout errors. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 2 ++ fs/nfs/super.c | 15 ++++++++++++--- include/linux/nfs_fs_sb.h | 1 + 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 90d71fda65ce..f74638c5e5b4 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -598,6 +598,8 @@ int nfs_init_server_rpcclient(struct nfs_server *server, sizeof(server->client->cl_timeout_default)); server->client->cl_timeout = &server->client->cl_timeout_default; server->client->cl_softrtry = 0; + if (server->flags & NFS_MOUNT_SOFTERR) + server->client->cl_softerr = 1; if (server->flags & NFS_MOUNT_SOFT) server->client->cl_softrtry = 1; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index c27ac96a95bd..19783e8ba9fb 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -78,7 +78,7 @@ enum { /* Mount options that take no arguments */ - Opt_soft, Opt_hard, + Opt_soft, Opt_softerr, Opt_hard, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, Opt_noac, @@ -125,6 +125,7 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_sloppy, "sloppy" }, { Opt_soft, "soft" }, + { Opt_softerr, "softerr" }, { Opt_hard, "hard" }, { Opt_deprecated, "intr" }, { Opt_deprecated, "nointr" }, @@ -628,7 +629,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, const char *str; const char *nostr; } nfs_info[] = { - { NFS_MOUNT_SOFT, ",soft", ",hard" }, + { NFS_MOUNT_SOFT, ",soft", "" }, + { NFS_MOUNT_SOFTERR, ",softerr", "" }, { NFS_MOUNT_POSIX, ",posix", "" }, { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, @@ -658,6 +660,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, seq_printf(m, ",acdirmin=%u", nfss->acdirmin/HZ); if (nfss->acdirmax != NFS_DEF_ACDIRMAX*HZ || showdefaults) seq_printf(m, ",acdirmax=%u", nfss->acdirmax/HZ); + if (!(nfss->flags & (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))) + seq_puts(m, ",hard"); for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { if (nfss->flags & nfs_infop->flag) seq_puts(m, nfs_infop->str); @@ -1239,10 +1243,15 @@ static int nfs_parse_mount_options(char *raw, */ case Opt_soft: mnt->flags |= NFS_MOUNT_SOFT; + mnt->flags &= ~NFS_MOUNT_SOFTERR; break; - case Opt_hard: + case Opt_softerr: + mnt->flags |= NFS_MOUNT_SOFTERR; mnt->flags &= ~NFS_MOUNT_SOFT; break; + case Opt_hard: + mnt->flags &= ~(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR); + break; case Opt_posix: mnt->flags |= NFS_MOUNT_POSIX; break; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 013ac5b54a09..0fbc5d3c5e53 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -147,6 +147,7 @@ struct nfs_server { #define NFS_MOUNT_LEGACY_INTERFACE 0x80000 #define NFS_MOUNT_LOCAL_FLOCK 0x100000 #define NFS_MOUNT_LOCAL_FCNTL 0x200000 +#define NFS_MOUNT_SOFTERR 0x400000 unsigned int caps; /* server capabilities */ unsigned int rsize; /* read size */ -- cgit v1.2.3 From 6fbda89b257f25694bf4892ddbbaa472f581533b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:59:05 -0400 Subject: NFS: Replace custom error reporting mechanism with generic one Replace the NFS custom error reporting mechanism with the generic mapping_set_error(). Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/file.c | 25 ++++--------------------- fs/nfs/internal.h | 6 ------ fs/nfs/write.c | 40 ++++++++++++++++++++++------------------ include/linux/nfs_fs.h | 1 - 4 files changed, 26 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f807e8643ae6..144e183250c3 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -199,13 +199,6 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap); * Flush any dirty pages for this process, and check for write errors. * The return status from this call provides a reliable indication of * whether any write errors occurred for this process. - * - * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to - * disk, but it retrieves and clears ctx->error after synching, despite - * the two being set at the same time in nfs_context_set_write_error(). - * This is because the former is used to notify the _next_ call to - * nfs_file_write() that a write error occurred, and hence cause it to - * fall back to doing a synchronous write. */ static int nfs_file_fsync_commit(struct file *file, int datasync) @@ -220,11 +213,8 @@ nfs_file_fsync_commit(struct file *file, int datasync) nfs_inc_stats(inode, NFSIOS_VFSFSYNC); do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); status = nfs_commit_inode(inode, FLUSH_SYNC); - if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) { - ret = xchg(&ctx->error, 0); - if (ret) - goto out; - } + if (status == 0) + status = file_check_and_advance_wb_err(file); if (status < 0) { ret = status; goto out; @@ -245,13 +235,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) trace_nfs_fsync_enter(inode); do { - struct nfs_open_context *ctx = nfs_file_open_context(file); - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) { - int ret2 = xchg(&ctx->error, 0); - if (ret2) - ret = ret2; - } + ret = file_write_and_wait_range(file, start, end); if (ret != 0) break; ret = nfs_file_fsync_commit(file, datasync); @@ -600,8 +584,7 @@ static int nfs_need_check_write(struct file *filp, struct inode *inode) struct nfs_open_context *ctx; ctx = nfs_file_open_context(filp); - if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags) || - nfs_ctx_key_to_expire(ctx, inode)) + if (nfs_ctx_key_to_expire(ctx, inode)) return 1; return 0; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 3cefd0ed01be..196534634c3a 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -773,9 +773,3 @@ static inline bool nfs_error_is_fatal(int err) } } -static inline void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) -{ - ctx->error = error; - smp_wmb(); - set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); -} diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 64cf6a340ba6..03cde38ecd31 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -244,6 +244,12 @@ static void nfs_set_pageerror(struct address_space *mapping) nfs_zap_mapping(mapping->host, mapping); } +static void nfs_mapping_set_error(struct page *page, int error) +{ + SetPageError(page); + mapping_set_error(page_file_mapping(page), error); +} + /* * nfs_page_group_search_locked * @head - head request of page group @@ -582,9 +588,9 @@ release_request: return ERR_PTR(ret); } -static void nfs_write_error_remove_page(struct nfs_page *req) +static void nfs_write_error(struct nfs_page *req, int error) { - SetPageError(req->wb_page); + nfs_mapping_set_error(req->wb_page, error); nfs_end_page_writeback(req); nfs_release_request(req); } @@ -608,6 +614,7 @@ nfs_error_is_fatal_on_server(int err) static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, struct page *page) { + struct address_space *mapping; struct nfs_page *req; int ret = 0; @@ -621,19 +628,19 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, nfs_set_page_writeback(page); WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags)); - ret = req->wb_context->error; /* If there is a fatal error that covers this write, just exit */ - if (nfs_error_is_fatal_on_server(ret)) + ret = 0; + mapping = page_file_mapping(page); + if (test_bit(AS_ENOSPC, &mapping->flags) || + test_bit(AS_EIO, &mapping->flags)) goto out_launder; - ret = 0; if (!nfs_pageio_add_request(pgio, req)) { ret = pgio->pg_error; /* * Remove the problematic req upon fatal errors on the server */ if (nfs_error_is_fatal(ret)) { - nfs_context_set_write_error(req->wb_context, ret); if (nfs_error_is_fatal_on_server(ret)) goto out_launder; } else @@ -645,7 +652,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, out: return ret; out_launder: - nfs_write_error_remove_page(req); + nfs_write_error(req, ret); return 0; } @@ -998,7 +1005,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes < bytes)) { nfs_set_pageerror(page_file_mapping(req->wb_page)); - nfs_context_set_write_error(req->wb_context, hdr->error); + nfs_mapping_set_error(req->wb_page, hdr->error); goto remove_req; } if (nfs_write_need_commit(hdr)) { @@ -1422,14 +1429,10 @@ static void nfs_async_write_error(struct list_head *head, int error) while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); - if (nfs_error_is_fatal(error)) { - nfs_context_set_write_error(req->wb_context, error); - if (nfs_error_is_fatal_on_server(error)) { - nfs_write_error_remove_page(req); - continue; - } - } - nfs_redirty_request(req); + if (nfs_error_is_fatal(error)) + nfs_write_error(req, error); + else + nfs_redirty_request(req); } } @@ -1843,9 +1846,10 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) req->wb_bytes, (long long)req_offset(req)); if (status < 0) { - nfs_context_set_write_error(req->wb_context, status); - if (req->wb_page) + if (req->wb_page) { + nfs_mapping_set_error(req->wb_page, status); nfs_inode_remove_request(req); + } dprintk_cont(", error = %d\n", status); goto next; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 40e30376130b..d363d5765cdf 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -76,7 +76,6 @@ struct nfs_open_context { fmode_t mode; unsigned long flags; -#define NFS_CONTEXT_ERROR_WRITE (0) #define NFS_CONTEXT_RESEND_WRITES (1) #define NFS_CONTEXT_BAD (2) #define NFS_CONTEXT_UNLOCK (3) -- cgit v1.2.3 From 28b1d3f5a772b705ca76df620eb9f686aa2d0b4c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:59:07 -0400 Subject: NFS: Remove unused argument from nfs_create_request() All the callers of nfs_create_request() are now creating page group heads, so we can remove the redundant 'last' page argument. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 4 ++-- fs/nfs/pagelist.c | 16 ++++++++-------- fs/nfs/read.c | 4 ++-- fs/nfs/write.c | 2 +- include/linux/nfs_page.h | 1 - 5 files changed, 13 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 0fd811ac08b5..2d301a1a73e2 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -492,7 +492,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, struct nfs_page *req; unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); /* XXX do we need to do the eof zeroing found in async_filler? */ - req = nfs_create_request(dreq->ctx, pagevec[i], NULL, + req = nfs_create_request(dreq->ctx, pagevec[i], pgbase, req_len); if (IS_ERR(req)) { result = PTR_ERR(req); @@ -899,7 +899,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, struct nfs_page *req; unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); - req = nfs_create_request(dreq->ctx, pagevec[i], NULL, + req = nfs_create_request(dreq->ctx, pagevec[i], pgbase, req_len); if (IS_ERR(req)) { result = PTR_ERR(req); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 66a5c5d4a777..b8301c40dd78 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -297,8 +297,8 @@ out: static struct nfs_page * __nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page, - struct nfs_page *last, unsigned int pgbase, - unsigned int offset, unsigned int count) + unsigned int pgbase, unsigned int offset, + unsigned int count) { struct nfs_page *req; struct nfs_open_context *ctx = l_ctx->open_context; @@ -327,7 +327,6 @@ __nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page, req->wb_bytes = count; req->wb_context = get_nfs_open_context(ctx); kref_init(&req->wb_kref); - nfs_page_group_init(req, last); return req; } @@ -335,7 +334,6 @@ __nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page, * nfs_create_request - Create an NFS read/write request. * @ctx: open context to use * @page: page to write - * @last: last nfs request created for this page group or NULL if head * @offset: starting offset within the page for the write * @count: number of bytes to read/write * @@ -345,15 +343,16 @@ __nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page, */ struct nfs_page * nfs_create_request(struct nfs_open_context *ctx, struct page *page, - struct nfs_page *last, unsigned int offset, - unsigned int count) + unsigned int offset, unsigned int count) { struct nfs_lock_context *l_ctx = nfs_get_lock_context(ctx); struct nfs_page *ret; if (IS_ERR(l_ctx)) return ERR_CAST(l_ctx); - ret = __nfs_create_request(l_ctx, page, last, offset, offset, count); + ret = __nfs_create_request(l_ctx, page, offset, offset, count); + if (!IS_ERR(ret)) + nfs_page_group_init(ret, NULL); nfs_put_lock_context(l_ctx); return ret; } @@ -365,11 +364,12 @@ nfs_create_subreq(struct nfs_page *req, struct nfs_page *last, { struct nfs_page *ret; - ret = __nfs_create_request(req->wb_lock_context, req->wb_page, last, + ret = __nfs_create_request(req->wb_lock_context, req->wb_page, pgbase, offset, count); if (!IS_ERR(ret)) { nfs_lock_request(ret); ret->wb_index = req->wb_index; + nfs_page_group_init(ret, last); } return ret; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 1d95a60b2586..fad1333dbf71 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -118,7 +118,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, len = nfs_page_length(page); if (len == 0) return nfs_return_empty_page(page); - new = nfs_create_request(ctx, page, NULL, 0, len); + new = nfs_create_request(ctx, page, 0, len); if (IS_ERR(new)) { unlock_page(page); return PTR_ERR(new); @@ -363,7 +363,7 @@ readpage_async_filler(void *data, struct page *page) if (len == 0) return nfs_return_empty_page(page); - new = nfs_create_request(desc->ctx, page, NULL, 0, len); + new = nfs_create_request(desc->ctx, page, 0, len); if (IS_ERR(new)) goto out_error; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 03cde38ecd31..b9bcbd06a628 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1171,7 +1171,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, req = nfs_try_to_update_request(inode, page, offset, bytes); if (req != NULL) goto out; - req = nfs_create_request(ctx, page, NULL, offset, bytes); + req = nfs_create_request(ctx, page, offset, bytes); if (IS_ERR(req)) goto out; nfs_inode_add_request(inode, req); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index ad69430fd0eb..b7d0f15615c2 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -114,7 +114,6 @@ struct nfs_pageio_descriptor { extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, struct page *page, - struct nfs_page *last, unsigned int offset, unsigned int count); extern void nfs_release_request(struct nfs_page *); -- cgit v1.2.3 From 33344e0f7eaa2efbf9fcc55557d02e8603aa7012 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:59:08 -0400 Subject: pNFS: Add tracking to limit the number of pNFS retries When the client is reading or writing using pNFS, and hits an error on the DS, then it typically sends a LAYOUTERROR and/or LAYOUTRETURN to the MDS, before redirtying the failed pages, and going for a new round of reads/writebacks. The problem is that if the server has no way to fix the DS, then we may need a way to interrupt this loop after a set number of attempts have been made. This patch adds an optional module parameter that allows the admin to specify how many times to retry the read/writeback process before failing with a fatal error. The default behaviour is to retry forever. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 7 +++++++ fs/nfs/flexfilelayout/flexfilelayout.c | 8 ++++++++ fs/nfs/pagelist.c | 14 +++++++++++++- fs/nfs/write.c | 5 +++++ include/linux/nfs_page.h | 4 +++- 5 files changed, 36 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 2d301a1a73e2..2436bd92bc00 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -663,6 +663,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) } list_for_each_entry_safe(req, tmp, &reqs, wb_list) { + /* Bump the transmission count */ + req->wb_nio++; if (!nfs_pageio_add_request(&desc, req)) { nfs_list_move_request(req, &failed); spin_lock(&cinfo.inode->i_lock); @@ -703,6 +705,11 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { + /* + * Despite the reboot, the write was successful, + * so reset wb_nio. + */ + req->wb_nio = 0; /* Note the rewrite will go through mds */ nfs_mark_request_commit(req, NULL, &cinfo, 0); } else diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 6673d4ff5a2a..9fdbcfd3e39d 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -28,6 +28,8 @@ #define FF_LAYOUT_POLL_RETRY_MAX (15*HZ) #define FF_LAYOUTRETURN_MAXERR 20 +static unsigned short io_maxretrans; + static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, struct nfs_pgio_header *hdr); static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, @@ -925,6 +927,7 @@ retry: pgm = &pgio->pg_mirrors[0]; pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; + pgio->pg_maxretrans = io_maxretrans; return; out_nolseg: if (pgio->pg_error < 0) @@ -992,6 +995,7 @@ retry: pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; } + pgio->pg_maxretrans = io_maxretrans; return; out_mds: @@ -2515,3 +2519,7 @@ MODULE_DESCRIPTION("The NFSv4 flexfile layout driver"); module_init(nfs4flexfilelayout_init); module_exit(nfs4flexfilelayout_exit); + +module_param(io_maxretrans, ushort, 0644); +MODULE_PARM_DESC(io_maxretrans, "The number of times the NFSv4.1 client " + "retries an I/O request before returning an error. "); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index b8301c40dd78..4a31284f411e 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -16,8 +16,8 @@ #include #include #include -#include #include +#include #include #include @@ -327,6 +327,7 @@ __nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page, req->wb_bytes = count; req->wb_context = get_nfs_open_context(ctx); kref_init(&req->wb_kref); + req->wb_nio = 0; return req; } @@ -370,6 +371,7 @@ nfs_create_subreq(struct nfs_page *req, struct nfs_page *last, nfs_lock_request(ret); ret->wb_index = req->wb_index; nfs_page_group_init(ret, last); + ret->wb_nio = req->wb_nio; } return ret; } @@ -724,6 +726,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_mirrors_dynamic = NULL; desc->pg_mirrors = desc->pg_mirrors_static; nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize); + desc->pg_maxretrans = 0; } /** @@ -983,6 +986,15 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, return 0; mirror->pg_base = req->wb_pgbase; } + + if (desc->pg_maxretrans && req->wb_nio > desc->pg_maxretrans) { + if (NFS_SERVER(desc->pg_inode)->flags & NFS_MOUNT_SOFTERR) + desc->pg_error = -ETIMEDOUT; + else + desc->pg_error = -EIO; + return 0; + } + if (!nfs_can_coalesce_requests(prev, req, desc)) return 0; nfs_list_move_request(req, &mirror->pg_list); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b9bcbd06a628..294604784f70 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1009,6 +1009,8 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) goto remove_req; } if (nfs_write_need_commit(hdr)) { + /* Reset wb_nio, since the write was successful. */ + req->wb_nio = 0; memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo, hdr->pgio_mirror_idx); @@ -1142,6 +1144,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, req->wb_bytes = end - req->wb_offset; else req->wb_bytes = rqend - req->wb_offset; + req->wb_nio = 0; return req; out_flushme: /* @@ -1416,6 +1419,8 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, */ static void nfs_redirty_request(struct nfs_page *req) { + /* Bump the transmission count */ + req->wb_nio++; nfs_mark_request_dirty(req); set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); nfs_end_page_writeback(req); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index b7d0f15615c2..8b36800d342d 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -53,6 +53,7 @@ struct nfs_page { struct nfs_write_verifier wb_verf; /* Commit cookie */ struct nfs_page *wb_this_page; /* list of reqs for this page */ struct nfs_page *wb_head; /* head pointer for req list */ + unsigned short wb_nio; /* Number of I/O attempts */ }; struct nfs_pageio_descriptor; @@ -87,7 +88,6 @@ struct nfs_pgio_mirror { }; struct nfs_pageio_descriptor { - unsigned char pg_moreio : 1; struct inode *pg_inode; const struct nfs_pageio_ops *pg_ops; const struct nfs_rw_ops *pg_rw_ops; @@ -105,6 +105,8 @@ struct nfs_pageio_descriptor { struct nfs_pgio_mirror pg_mirrors_static[1]; struct nfs_pgio_mirror *pg_mirrors_dynamic; u32 pg_mirror_idx; /* current mirror */ + unsigned short pg_maxretrans; + unsigned char pg_moreio : 1; }; /* arbitrarily selected limit to number of mirrors */ -- cgit v1.2.3 From 9fcd5960e88bbdc74a70d9e3a5ab46b489fc4b80 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:59:11 -0400 Subject: NFS: Add a helper to return a pointer to the open context of a struct nfs_page Add a helper for when we remove the explicit pointer to the open context. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/filelayout/filelayout.c | 4 ++-- fs/nfs/flexfilelayout/flexfilelayout.c | 6 +++--- fs/nfs/pagelist.c | 8 ++++---- fs/nfs/pnfs.c | 4 ++-- fs/nfs/pnfs.h | 4 ++-- fs/nfs/read.c | 2 +- fs/nfs/write.c | 20 +++++++++++--------- include/linux/nfs_page.h | 6 ++++++ 8 files changed, 31 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 61f46facb39c..21d9f3bfbc81 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -917,7 +917,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, pnfs_generic_pg_check_layout(pgio); if (!pgio->pg_lseg) { pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, - req->wb_context, + nfs_req_openctx(req), 0, NFS4_MAX_UINT64, IOMODE_READ, @@ -944,7 +944,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, pnfs_generic_pg_check_layout(pgio); if (!pgio->pg_lseg) { pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, - req->wb_context, + nfs_req_openctx(req), 0, NFS4_MAX_UINT64, IOMODE_RW, diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 9fdbcfd3e39d..9920c52bd0cd 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -873,7 +873,7 @@ ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio, { pnfs_put_lseg(pgio->pg_lseg); pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - req->wb_context, + nfs_req_openctx(req), 0, NFS4_MAX_UINT64, IOMODE_READ, @@ -953,7 +953,7 @@ retry: pnfs_generic_pg_check_layout(pgio); if (!pgio->pg_lseg) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - req->wb_context, + nfs_req_openctx(req), 0, NFS4_MAX_UINT64, IOMODE_RW, @@ -1010,7 +1010,7 @@ ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio, { if (!pgio->pg_lseg) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - req->wb_context, + nfs_req_openctx(req), 0, NFS4_MAX_UINT64, IOMODE_RW, diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 4a31284f411e..ce6440b79328 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -47,7 +47,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, hdr->req = nfs_list_entry(mirror->pg_list.next); hdr->inode = desc->pg_inode; - hdr->cred = hdr->req->wb_context->cred; + hdr->cred = nfs_req_openctx(hdr->req)->cred; hdr->io_start = req_offset(hdr->req); hdr->good_bytes = mirror->pg_count; hdr->io_completion = desc->pg_io_completion; @@ -578,7 +578,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr, hdr->args.pgbase = req->wb_pgbase; hdr->args.pages = hdr->page_array.pagevec; hdr->args.count = count; - hdr->args.context = get_nfs_open_context(req->wb_context); + hdr->args.context = get_nfs_open_context(nfs_req_openctx(req)); hdr->args.lock_context = req->wb_lock_context; hdr->args.stable = NFS_UNSTABLE; switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { @@ -935,9 +935,9 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, struct file_lock_context *flctx; if (prev) { - if (!nfs_match_open_context(req->wb_context, prev->wb_context)) + if (!nfs_match_open_context(nfs_req_openctx(req), nfs_req_openctx(prev))) return false; - flctx = d_inode(req->wb_context->dentry)->i_flctx; + flctx = d_inode(nfs_req_openctx(req)->dentry)->i_flctx; if (flctx != NULL && !(list_empty_careful(&flctx->flc_posix) && list_empty_careful(&flctx->flc_flock)) && diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 7066cd7c7aff..83722e936b4a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2436,7 +2436,7 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - req->wb_context, + nfs_req_openctx(req), req_offset(req), rd_size, IOMODE_READ, @@ -2463,7 +2463,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, pnfs_generic_pg_check_range(pgio, req); if (pgio->pg_lseg == NULL) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - req->wb_context, + nfs_req_openctx(req), req_offset(req), wb_size, IOMODE_RW, diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index c0420b979d88..f15609c003d8 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -459,7 +459,7 @@ static inline bool pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo, u32 ds_commit_idx) { - struct inode *inode = d_inode(req->wb_context->dentry); + struct inode *inode = d_inode(nfs_req_openctx(req)->dentry); struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; if (lseg == NULL || ld->mark_request_commit == NULL) @@ -471,7 +471,7 @@ pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, static inline bool pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo) { - struct inode *inode = d_inode(req->wb_context->dentry); + struct inode *inode = d_inode(nfs_req_openctx(req)->dentry); struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; if (ld == NULL || ld->clear_request_commit == NULL) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index fad1333dbf71..c799e540ed1e 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); static void nfs_readpage_release(struct nfs_page *req) { - struct inode *inode = d_inode(req->wb_context->dentry); + struct inode *inode = d_inode(nfs_req_openctx(req)->dentry); dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode), req->wb_bytes, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 294604784f70..bc5bb9323412 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -964,7 +964,8 @@ static void nfs_clear_request_commit(struct nfs_page *req) { if (test_bit(PG_CLEAN, &req->wb_flags)) { - struct inode *inode = d_inode(req->wb_context->dentry); + struct nfs_open_context *ctx = nfs_req_openctx(req); + struct inode *inode = d_inode(ctx->dentry); struct nfs_commit_info cinfo; nfs_init_cinfo_from_inode(&cinfo, inode); @@ -1219,7 +1220,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) return 0; l_ctx = req->wb_lock_context; do_flush = req->wb_page != page || - !nfs_match_open_context(req->wb_context, ctx); + !nfs_match_open_context(nfs_req_openctx(req), ctx); if (l_ctx && flctx && !(list_empty_careful(&flctx->flc_posix) && list_empty_careful(&flctx->flc_flock))) { @@ -1422,7 +1423,7 @@ static void nfs_redirty_request(struct nfs_page *req) /* Bump the transmission count */ req->wb_nio++; nfs_mark_request_dirty(req); - set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); + set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags); nfs_end_page_writeback(req); nfs_release_request(req); } @@ -1742,7 +1743,8 @@ void nfs_init_commit(struct nfs_commit_data *data, struct nfs_commit_info *cinfo) { struct nfs_page *first = nfs_list_entry(head->next); - struct inode *inode = d_inode(first->wb_context->dentry); + struct nfs_open_context *ctx = nfs_req_openctx(first); + struct inode *inode = d_inode(ctx->dentry); /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ @@ -1750,7 +1752,7 @@ void nfs_init_commit(struct nfs_commit_data *data, list_splice_init(head, &data->pages); data->inode = inode; - data->cred = first->wb_context->cred; + data->cred = ctx->cred; data->lseg = lseg; /* reference transferred */ /* only set lwb for pnfs commit */ if (lseg) @@ -1763,7 +1765,7 @@ void nfs_init_commit(struct nfs_commit_data *data, /* Note: we always request a commit of the entire inode */ data->args.offset = 0; data->args.count = 0; - data->context = get_nfs_open_context(first->wb_context); + data->context = get_nfs_open_context(ctx); data->res.fattr = &data->fattr; data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); @@ -1846,8 +1848,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) nfs_clear_page_commit(req->wb_page); dprintk("NFS: commit (%s/%llu %d@%lld)", - req->wb_context->dentry->d_sb->s_id, - (unsigned long long)NFS_FILEID(d_inode(req->wb_context->dentry)), + nfs_req_openctx(req)->dentry->d_sb->s_id, + (unsigned long long)NFS_FILEID(d_inode(nfs_req_openctx(req)->dentry)), req->wb_bytes, (long long)req_offset(req)); if (status < 0) { @@ -1871,7 +1873,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* We have a mismatch. Write the page again */ dprintk_cont(" mismatch\n"); nfs_mark_request_dirty(req); - set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); + set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags); next: nfs_unlock_and_release_request(req); /* Latency breaker */ diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 8b36800d342d..1ea13e94feb7 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -200,4 +200,10 @@ loff_t req_offset(struct nfs_page *req) return (((loff_t)req->wb_index) << PAGE_SHIFT) + req->wb_offset; } +static inline struct nfs_open_context * +nfs_req_openctx(struct nfs_page *req) +{ + return req->wb_context; +} + #endif /* _LINUX_NFS_PAGE_H */ -- cgit v1.2.3 From c79d183ebb76311ed434bd558279769551d02d5a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:59:12 -0400 Subject: NFS: Remove redundant open context from nfs_page The lock context already references and tracks the open context, so take the opportunity to save some space in struct nfs_page. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/pagelist.c | 8 ++------ include/linux/nfs_page.h | 3 +-- 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ce6440b79328..6ec30014a439 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -325,7 +325,6 @@ __nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page, req->wb_offset = offset; req->wb_pgbase = pgbase; req->wb_bytes = count; - req->wb_context = get_nfs_open_context(ctx); kref_init(&req->wb_kref); req->wb_nio = 0; return req; @@ -414,8 +413,8 @@ void nfs_unlock_and_release_request(struct nfs_page *req) static void nfs_clear_request(struct nfs_page *req) { struct page *page = req->wb_page; - struct nfs_open_context *ctx = req->wb_context; struct nfs_lock_context *l_ctx = req->wb_lock_context; + struct nfs_open_context *ctx; if (page != NULL) { put_page(page); @@ -424,16 +423,13 @@ static void nfs_clear_request(struct nfs_page *req) if (l_ctx != NULL) { if (atomic_dec_and_test(&l_ctx->io_count)) { wake_up_var(&l_ctx->io_count); + ctx = l_ctx->open_context; if (test_bit(NFS_CONTEXT_UNLOCK, &ctx->flags)) rpc_wake_up(&NFS_SERVER(d_inode(ctx->dentry))->uoc_rpcwaitq); } nfs_put_lock_context(l_ctx); req->wb_lock_context = NULL; } - if (ctx != NULL) { - put_nfs_open_context(ctx); - req->wb_context = NULL; - } } /** diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 1ea13e94feb7..0bbd587fac6a 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -42,7 +42,6 @@ struct nfs_inode; struct nfs_page { struct list_head wb_list; /* Defines state of page: */ struct page *wb_page; /* page to read in/write out */ - struct nfs_open_context *wb_context; /* File state context info */ struct nfs_lock_context *wb_lock_context; /* lock context info */ pgoff_t wb_index; /* Offset >> PAGE_SHIFT */ unsigned int wb_offset, /* Offset & ~PAGE_MASK */ @@ -203,7 +202,7 @@ loff_t req_offset(struct nfs_page *req) static inline struct nfs_open_context * nfs_req_openctx(struct nfs_page *req) { - return req->wb_context; + return req->wb_lock_context->open_context; } #endif /* _LINUX_NFS_PAGE_H */ -- cgit v1.2.3 From 17e4c443c0b433354016df60a7bd3f1c6aac759c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:39:48 -0400 Subject: xprtrdma: Trace marshaling failures Record an event when rpcrdma_marshal_req returns a non-zero return value to help track down why an xprt close might have occurred. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 27 +++++++++++++++++++++++++++ net/sunrpc/xprtrdma/rpc_rdma.c | 1 + 2 files changed, 28 insertions(+) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 962975b4313f..df9851cb82b2 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -511,6 +511,33 @@ TRACE_EVENT(xprtrdma_marshal, ) ); +TRACE_EVENT(xprtrdma_marshal_failed, + TP_PROTO(const struct rpc_rqst *rqst, + int ret + ), + + TP_ARGS(rqst, ret), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + __field(int, ret) + ), + + TP_fast_assign( + __entry->task_id = rqst->rq_task->tk_pid; + __entry->client_id = rqst->rq_task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->ret = ret; + ), + + TP_printk("task:%u@%u xid=0x%08x: ret=%d", + __entry->task_id, __entry->client_id, __entry->xid, + __entry->ret + ) +); + TRACE_EVENT(xprtrdma_post_send, TP_PROTO( const struct rpcrdma_req *req, diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 231a44b9c152..45cba06655ea 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -875,6 +875,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) return 0; out_err: + trace_xprtrdma_marshal_failed(rqst, ret); switch (ret) { case -EAGAIN: xprt_wait_for_buffer_space(rqst->rq_xprt); -- cgit v1.2.3 From 79caa5fad47c69874f9efc4ac3128cc3f6d36f6e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 24 Apr 2019 17:46:42 -0400 Subject: SUNRPC: Cache cred of process creating the rpc_client When converting kuids to AUTH_UNIX creds, etc we will want to use the same user namespace as the process that created the rpc client. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/lockd/host.c | 1 + fs/lockd/mon.c | 1 + fs/nfs/client.c | 1 + fs/nfs/mount_clnt.c | 2 ++ fs/nfsd/nfs4callback.c | 1 + include/linux/sunrpc/clnt.h | 2 ++ net/sunrpc/clnt.c | 7 +++++++ net/sunrpc/rpcb_clnt.c | 9 +++++++-- 8 files changed, 22 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index f0b5c987d6ae..d46081123f7c 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -458,6 +458,7 @@ nlm_bind_host(struct nlm_host *host) .authflavor = RPC_AUTH_UNIX, .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_AUTOBIND), + .cred = current_cred(), }; /* diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 654594ef4f94..1eabd91870e6 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -82,6 +82,7 @@ static struct rpc_clnt *nsm_create(struct net *net, const char *nodename) .version = NSM_VERSION, .authflavor = RPC_AUTH_NULL, .flags = RPC_CLNT_CREATE_NOPING, + .cred = current_cred(), }; return rpc_create(&args); diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f74638c5e5b4..a843cf3f6340 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -500,6 +500,7 @@ int nfs_create_rpc_client(struct nfs_client *clp, .program = &nfs_program, .version = clp->rpc_ops->version, .authflavor = flavor, + .cred = current_cred(), }; if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags)) diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index d979ff4fee7e..cb7c10e9721e 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -163,6 +163,7 @@ int nfs_mount(struct nfs_mount_request *info) .program = &mnt_program, .version = info->version, .authflavor = RPC_AUTH_UNIX, + .cred = current_cred(), }; struct rpc_clnt *mnt_clnt; int status; @@ -249,6 +250,7 @@ void nfs_umount(const struct nfs_mount_request *info) .version = info->version, .authflavor = RPC_AUTH_UNIX, .flags = RPC_CLNT_CREATE_NOPING, + .cred = current_cred(), }; struct rpc_message msg = { .rpc_argp = info->dirpath, diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index f7494be8dbe2..3a10399a0ef1 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -868,6 +868,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c .program = &cb_program, .version = 1, .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), + .cred = current_cred(), }; struct rpc_clnt *client; const struct cred *cred; diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 943762acfcd4..6e8073140a5d 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -72,6 +72,7 @@ struct rpc_clnt { struct dentry *cl_debugfs; /* debugfs directory */ #endif struct rpc_xprt_iter cl_xpi; + const struct cred *cl_cred; }; /* @@ -126,6 +127,7 @@ struct rpc_create_args { unsigned long flags; char *client_name; struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ + const struct cred *cred; }; struct rpc_add_xprt_test { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e933f1185317..369a2648dafc 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -394,6 +394,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, if (err) goto out_no_clid; + clnt->cl_cred = get_cred(args->cred); clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; clnt->cl_prog = args->prognumber ? : program->number; @@ -439,6 +440,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, out_no_path: rpc_free_iostats(clnt->cl_metrics); out_no_stats: + put_cred(clnt->cl_cred); rpc_free_clid(clnt); out_no_clid: kfree(clnt); @@ -631,6 +633,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, new->cl_discrtry = clnt->cl_discrtry; new->cl_chatty = clnt->cl_chatty; new->cl_principal = clnt->cl_principal; + new->cl_cred = get_cred(clnt->cl_cred); return new; out_err: @@ -652,6 +655,7 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt) .prognumber = clnt->cl_prog, .version = clnt->cl_vers, .authflavor = clnt->cl_auth->au_flavor, + .cred = clnt->cl_cred, }; return __rpc_clone_client(&args, clnt); } @@ -673,6 +677,7 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor) .prognumber = clnt->cl_prog, .version = clnt->cl_vers, .authflavor = flavor, + .cred = clnt->cl_cred, }; return __rpc_clone_client(&args, clnt); } @@ -880,6 +885,7 @@ rpc_free_client(struct rpc_clnt *clnt) xprt_put(rcu_dereference_raw(clnt->cl_xprt)); xprt_iter_destroy(&clnt->cl_xpi); rpciod_down(); + put_cred(clnt->cl_cred); rpc_free_clid(clnt); kfree(clnt); return parent; @@ -944,6 +950,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, .prognumber = program->number, .version = vers, .authflavor = old->cl_auth->au_flavor, + .cred = old->cl_cred, }; struct rpc_clnt *clnt; int err; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 18b0cf2a923f..2277b7cdad27 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -240,6 +240,7 @@ static int rpcb_create_local_unix(struct net *net) .program = &rpcb_program, .version = RPCBVERS_2, .authflavor = RPC_AUTH_NULL, + .cred = current_cred(), /* * We turn off the idle timeout to prevent the kernel * from automatically disconnecting the socket. @@ -299,6 +300,7 @@ static int rpcb_create_local_net(struct net *net) .program = &rpcb_program, .version = RPCBVERS_2, .authflavor = RPC_AUTH_UNIX, + .cred = current_cred(), .flags = RPC_CLNT_CREATE_NOPING, }; struct rpc_clnt *clnt, *clnt4; @@ -358,7 +360,8 @@ out: static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename, const char *hostname, struct sockaddr *srvaddr, size_t salen, - int proto, u32 version) + int proto, u32 version, + const struct cred *cred) { struct rpc_create_args args = { .net = net, @@ -370,6 +373,7 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename, .program = &rpcb_program, .version = version, .authflavor = RPC_AUTH_UNIX, + .cred = cred, .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_NONPRIVPORT), }; @@ -745,7 +749,8 @@ void rpcb_getport_async(struct rpc_task *task) rpcb_clnt = rpcb_create(xprt->xprt_net, clnt->cl_nodename, xprt->servername, sap, salen, - xprt->prot, bind_version); + xprt->prot, bind_version, + clnt->cl_cred); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", -- cgit v1.2.3 From 1a58e8a0e5c1f188a80eb9e505bc77d78a31a4ec Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 24 Apr 2019 17:46:43 -0400 Subject: NFS: Store the credential of the mount process in the nfs_server Store the credential of the mount process so that we can determine information such as the user namespace. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 8 +++++++- fs/nfs/internal.h | 1 + fs/nfs/nfs3client.c | 1 + fs/nfs/nfs4client.c | 6 ++++++ include/linux/nfs_fs_sb.h | 3 +++ 5 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index a843cf3f6340..e3baa9f1da76 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -500,7 +500,7 @@ int nfs_create_rpc_client(struct nfs_client *clp, .program = &nfs_program, .version = clp->rpc_ops->version, .authflavor = flavor, - .cred = current_cred(), + .cred = cl_init->cred, }; if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags)) @@ -655,6 +655,7 @@ static int nfs_init_server(struct nfs_server *server, .proto = data->nfs_server.protocol, .net = data->net, .timeparms = &timeparms, + .cred = server->cred, }; struct nfs_client *clp; int error; @@ -923,6 +924,7 @@ void nfs_free_server(struct nfs_server *server) ida_destroy(&server->lockowner_id); ida_destroy(&server->openowner_id); nfs_free_iostats(server->io_stats); + put_cred(server->cred); kfree(server); nfs_release_automount_timer(); } @@ -943,6 +945,8 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info, if (!server) return ERR_PTR(-ENOMEM); + server->cred = get_cred(current_cred()); + error = -ENOMEM; fattr = nfs_alloc_fattr(); if (fattr == NULL) @@ -1009,6 +1013,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, if (!server) return ERR_PTR(-ENOMEM); + server->cred = get_cred(source->cred); + error = -ENOMEM; fattr_fsinfo = nfs_alloc_fattr(); if (fattr_fsinfo == NULL) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 196534634c3a..22232e76df47 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -84,6 +84,7 @@ struct nfs_client_initdata { u32 minorversion; struct net *net; const struct rpc_timeout *timeparms; + const struct cred *cred; }; /* diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 7879f2a0fcfd..1afdb0f7473f 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -91,6 +91,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, .proto = ds_proto, .net = mds_clp->cl_net, .timeparms = &ds_timeout, + .cred = mds_srv->cred, }; struct nfs_client *clp; char buf[INET6_ADDRSTRLEN + 1]; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 1339ede979af..3ce246346f02 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -870,6 +870,7 @@ static int nfs4_set_client(struct nfs_server *server, .minorversion = minorversion, .net = net, .timeparms = timeparms, + .cred = server->cred, }; struct nfs_client *clp; @@ -931,6 +932,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, .minorversion = minor_version, .net = mds_clp->cl_net, .timeparms = &ds_timeout, + .cred = mds_srv->cred, }; char buf[INET6_ADDRSTRLEN + 1]; @@ -1107,6 +1109,8 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info, if (!server) return ERR_PTR(-ENOMEM); + server->cred = get_cred(current_cred()); + auth_probe = mount_info->parsed->auth_info.flavor_len < 1; /* set up the general RPC client */ @@ -1143,6 +1147,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, parent_server = NFS_SB(data->sb); parent_client = parent_server->nfs_client; + server->cred = get_cred(parent_server->cred); + /* Initialise the client representation from the parent server */ nfs_server_copy_userdata(server, parent_server); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 0fbc5d3c5e53..1e78032a174b 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -241,6 +241,9 @@ struct nfs_server { /* XDR related information */ unsigned int read_hdrsize; + + /* User namespace info */ + const struct cred *cred; }; /* Server capabilities */ -- cgit v1.2.3 From b422df915cef80333d7a1732e6ed81f41db12b79 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 24 Apr 2019 17:46:50 -0400 Subject: lockd: Store the lockd client credential in struct nlm_host When we create a new lockd client, we want to be able to pass the correct credential of the process that created the struct nlm_host. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/lockd/clntlock.c | 2 +- fs/lockd/host.c | 11 +++++++++-- include/linux/lockd/bind.h | 1 + include/linux/lockd/lockd.h | 4 +++- 4 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index c2a128678e6e..70f520b41a19 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -63,7 +63,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen, nlm_init->protocol, nlm_version, nlm_init->hostname, nlm_init->noresvport, - nlm_init->net); + nlm_init->net, nlm_init->cred); if (host == NULL) goto out_nohost; if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL) diff --git a/fs/lockd/host.c b/fs/lockd/host.c index d46081123f7c..7d46fafdbbe5 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -60,6 +60,7 @@ struct nlm_lookup_host_info { const size_t hostname_len; /* it's length */ const int noresvport; /* use non-priv port */ struct net *net; /* network namespace to bind */ + const struct cred *cred; }; /* @@ -162,6 +163,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni, host->h_nsmhandle = nsm; host->h_addrbuf = nsm->sm_addrbuf; host->net = ni->net; + host->h_cred = get_cred(ni->cred), strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename)); out: @@ -188,6 +190,7 @@ static void nlm_destroy_host_locked(struct nlm_host *host) clnt = host->h_rpcclnt; if (clnt != NULL) rpc_shutdown_client(clnt); + put_cred(host->h_cred); kfree(host); ln->nrhosts--; @@ -202,6 +205,8 @@ static void nlm_destroy_host_locked(struct nlm_host *host) * @version: NLM protocol version * @hostname: '\0'-terminated hostname of server * @noresvport: 1 if non-privileged port should be used + * @net: pointer to net namespace + * @cred: pointer to cred * * Returns an nlm_host structure that matches the passed-in * [server address, transport protocol, NLM version, server hostname]. @@ -214,7 +219,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, const u32 version, const char *hostname, int noresvport, - struct net *net) + struct net *net, + const struct cred *cred) { struct nlm_lookup_host_info ni = { .server = 0, @@ -226,6 +232,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, .hostname_len = strlen(hostname), .noresvport = noresvport, .net = net, + .cred = cred, }; struct hlist_head *chain; struct nlm_host *host; @@ -458,7 +465,7 @@ nlm_bind_host(struct nlm_host *host) .authflavor = RPC_AUTH_UNIX, .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_AUTOBIND), - .cred = current_cred(), + .cred = host->h_cred, }; /* diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h index 053a4ef3d431..8c0cf1059443 100644 --- a/include/linux/lockd/bind.h +++ b/include/linux/lockd/bind.h @@ -46,6 +46,7 @@ struct nlmclnt_initdata { int noresvport; struct net *net; const struct nlmclnt_operations *nlmclnt_ops; + const struct cred *cred; }; /* diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index b065ef406770..c9b422dde542 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -70,6 +70,7 @@ struct nlm_host { struct nsm_handle *h_nsmhandle; /* NSM status handle */ char *h_addrbuf; /* address eyecatcher */ struct net *net; /* host net */ + const struct cred *h_cred; char nodename[UNX_MAXNODENAME + 1]; const struct nlmclnt_operations *h_nlmclnt_ops; /* Callback ops for NLM users */ }; @@ -229,7 +230,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, const u32 version, const char *hostname, int noresvport, - struct net *net); + struct net *net, + const struct cred *cred); void nlmclnt_release_host(struct nlm_host *); struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, const char *hostname, -- cgit v1.2.3