From ab0fc21bc7105b54bafd85bd8b82742f9e68898a Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Tue, 29 Mar 2022 19:32:07 +0800 Subject: Revert "NFSv4: Handle the special Linux file open access mode" This reverts commit 44942b4e457beda00981f616402a1a791e8c616e. After secondly opening a file with O_ACCMODE|O_DIRECT flags, nfs4_valid_open_stateid() will dereference NULL nfs4_state when lseek(). Reproducer: 1. mount -t nfs -o vers=4.2 $server_ip:/ /mnt/ 2. fd = open("/mnt/file", O_ACCMODE|O_DIRECT|O_CREAT) 3. close(fd) 4. fd = open("/mnt/file", O_ACCMODE|O_DIRECT) 5. lseek(fd) Reported-by: Lyu Tao Signed-off-by: ChenXiaoSong Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 1 - fs/nfs/nfs4file.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e51d86707fca..e72900c059ee 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1180,7 +1180,6 @@ int nfs_open(struct inode *inode, struct file *filp) nfs_fscache_open_file(inode, filp); return 0; } -EXPORT_SYMBOL_GPL(nfs_open); /* * This function is called whenever some part of NFS notices that diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index d258933cf8c8..f336d0a4190e 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -51,7 +51,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) return err; if ((openflags & O_ACCMODE) == 3) - return nfs_open(inode, filp); + openflags--; /* We can't create new files here */ openflags &= ~(O_CREAT|O_EXCL); -- cgit v1.2.3 From b243874f6f9568b2daf1a00e9222cacdc15e159c Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Tue, 29 Mar 2022 19:32:08 +0800 Subject: NFSv4: fix open failure with O_ACCMODE flag open() with O_ACCMODE|O_DIRECT flags secondly will fail. Reproducer: 1. mount -t nfs -o vers=4.2 $server_ip:/ /mnt/ 2. fd = open("/mnt/file", O_ACCMODE|O_DIRECT|O_CREAT) 3. close(fd) 4. fd = open("/mnt/file", O_ACCMODE|O_DIRECT) Server nfsd4_decode_share_access() will fail with error nfserr_bad_xdr when client use incorrect share access mode of 0. Fix this by using NFS4_SHARE_ACCESS_BOTH share access mode in client, just like firstly opening. Fixes: ce4ef7c0a8a05 ("NFS: Split out NFS v4 file operations") Signed-off-by: ChenXiaoSong Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 10 ---------- fs/nfs/internal.h | 10 ++++++++++ fs/nfs/nfs4file.c | 6 ++++-- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index bac4cf1a308e..0365063b85a2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1991,16 +1991,6 @@ const struct dentry_operations nfs4_dentry_operations = { }; EXPORT_SYMBOL_GPL(nfs4_dentry_operations); -static fmode_t flags_to_mode(int flags) -{ - fmode_t res = (__force fmode_t)flags & FMODE_EXEC; - if ((flags & O_ACCMODE) != O_WRONLY) - res |= FMODE_READ; - if ((flags & O_ACCMODE) != O_RDONLY) - res |= FMODE_WRITE; - return res; -} - static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp) { return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 57b0497105c8..7eefa16ed381 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -42,6 +42,16 @@ static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry) return true; } +static inline fmode_t flags_to_mode(int flags) +{ + fmode_t res = (__force fmode_t)flags & FMODE_EXEC; + if ((flags & O_ACCMODE) != O_WRONLY) + res |= FMODE_READ; + if ((flags & O_ACCMODE) != O_RDONLY) + res |= FMODE_WRITE; + return res; +} + /* * Note: RFC 1813 doesn't limit the number of auth flavors that * a server can return, so make something up. diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index f336d0a4190e..7b861e4f0533 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -32,6 +32,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) struct dentry *parent = NULL; struct inode *dir; unsigned openflags = filp->f_flags; + fmode_t f_mode; struct iattr attr; int err; @@ -50,8 +51,9 @@ nfs4_file_open(struct inode *inode, struct file *filp) if (err) return err; + f_mode = filp->f_mode; if ((openflags & O_ACCMODE) == 3) - openflags--; + f_mode |= flags_to_mode(openflags); /* We can't create new files here */ openflags &= ~(O_CREAT|O_EXCL); @@ -59,7 +61,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) parent = dget_parent(dentry); dir = d_inode(parent); - ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode, filp); + ctx = alloc_nfs_open_context(file_dentry(filp), f_mode, filp); err = PTR_ERR(ctx); if (IS_ERR(ctx)) goto out; -- cgit v1.2.3 From eb07d5a4da041fd2e30e386e5fd12d23bb31cf9e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 30 Mar 2022 11:48:37 +1100 Subject: SUNRPC: handle malloc failure in ->request_prepare If ->request_prepare() detects an error, it sets ->rq_task->tk_status. This is easy for callers to ignore. The only caller is xprt_request_enqueue_receive() and it does ignore the error, as does call_encode() which calls it. This can result in a request being queued to receive a reply without an allocated receive buffer. So instead of setting rq_task->tk_status, return an error, and store in ->tk_status only in call_encode(); The call to xprt_request_enqueue_receive() is now earlier in call_encode(), where the error can still be handled. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 5 ++--- net/sunrpc/clnt.c | 6 +++--- net/sunrpc/xprt.c | 23 +++++++++++++++-------- net/sunrpc/xprtsock.c | 4 ++-- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index eef5e87c03b4..f171f8c09e13 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -144,7 +144,7 @@ struct rpc_xprt_ops { unsigned short (*get_srcport)(struct rpc_xprt *xprt); int (*buf_alloc)(struct rpc_task *task); void (*buf_free)(struct rpc_task *task); - void (*prepare_request)(struct rpc_rqst *req); + int (*prepare_request)(struct rpc_rqst *req); int (*send_request)(struct rpc_rqst *req); void (*wait_for_reply_request)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); @@ -357,10 +357,9 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req); -void xprt_request_prepare(struct rpc_rqst *req); bool xprt_prepare_transmit(struct rpc_task *task); void xprt_request_enqueue_transmit(struct rpc_task *task); -void xprt_request_enqueue_receive(struct rpc_task *task); +int xprt_request_enqueue_receive(struct rpc_task *task); void xprt_request_wait_receive(struct rpc_task *task); void xprt_request_dequeue_xprt(struct rpc_task *task); bool xprt_request_need_retransmit(struct rpc_task *task); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8bf2af8546d2..3c7407104d54 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1858,6 +1858,9 @@ call_encode(struct rpc_task *task) xprt_request_dequeue_xprt(task); /* Encode here so that rpcsec_gss can use correct sequence number. */ rpc_xdr_encode(task); + /* Add task to reply queue before transmission to avoid races */ + if (task->tk_status == 0 && rpc_reply_expected(task)) + task->tk_status = xprt_request_enqueue_receive(task); /* Did the encode result in an error condition? */ if (task->tk_status != 0) { /* Was the error nonfatal? */ @@ -1881,9 +1884,6 @@ call_encode(struct rpc_task *task) return; } - /* Add task to reply queue before transmission to avoid races */ - if (rpc_reply_expected(task)) - xprt_request_enqueue_receive(task); xprt_request_enqueue_transmit(task); out: task->tk_action = call_transmit; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 880bfe8dc7f6..73344ffb2692 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -69,10 +69,11 @@ /* * Local functions */ -static void xprt_init(struct rpc_xprt *xprt, struct net *net); +static void xprt_init(struct rpc_xprt *xprt, struct net *net); static __be32 xprt_alloc_xid(struct rpc_xprt *xprt); -static void xprt_destroy(struct rpc_xprt *xprt); -static void xprt_request_init(struct rpc_task *task); +static void xprt_destroy(struct rpc_xprt *xprt); +static void xprt_request_init(struct rpc_task *task); +static int xprt_request_prepare(struct rpc_rqst *req); static DEFINE_SPINLOCK(xprt_list_lock); static LIST_HEAD(xprt_list); @@ -1143,16 +1144,19 @@ xprt_request_need_enqueue_receive(struct rpc_task *task, struct rpc_rqst *req) * @task: RPC task * */ -void +int xprt_request_enqueue_receive(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; + int ret; if (!xprt_request_need_enqueue_receive(task, req)) - return; + return 0; - xprt_request_prepare(task->tk_rqstp); + ret = xprt_request_prepare(task->tk_rqstp); + if (ret) + return ret; spin_lock(&xprt->queue_lock); /* Update the softirq receive buffer */ @@ -1166,6 +1170,7 @@ xprt_request_enqueue_receive(struct rpc_task *task) /* Turn off autodisconnect */ del_singleshot_timer_sync(&xprt->timer); + return 0; } /** @@ -1452,14 +1457,16 @@ xprt_request_dequeue_xprt(struct rpc_task *task) * * Calls into the transport layer to do whatever is needed to prepare * the request for transmission or receive. + * Returns error, or zero. */ -void +static int xprt_request_prepare(struct rpc_rqst *req) { struct rpc_xprt *xprt = req->rq_xprt; if (xprt->ops->prepare_request) - xprt->ops->prepare_request(req); + return xprt->ops->prepare_request(req); + return 0; } /** diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 78af7518f263..9b75891b3cc0 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -822,11 +822,11 @@ static int xs_stream_nospace(struct rpc_rqst *req, bool vm_wait) return ret; } -static void +static int xs_stream_prepare_request(struct rpc_rqst *req) { xdr_free_bvec(&req->rq_rcv_buf); - req->rq_task->tk_status = xdr_alloc_bvec( + return xdr_alloc_bvec( &req->rq_rcv_buf, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); } -- cgit v1.2.3 From 830f1111d90e8770fcfad8bd5628e8ae6fecec06 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Mar 2022 20:00:07 -0400 Subject: NFS: Replace readdir's use of xxhash() with hash_64() Both xxhash() and hash_64() appear to give similarly low collision rates with a standard linearly increasing readdir offset. They both give similarly higher collision rates when applied to ext4's offsets. So switch to using the standard hash_64(). Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 4 ---- fs/nfs/dir.c | 9 +++------ 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 47a53b3362b6..14a72224b657 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -4,10 +4,6 @@ config NFS_FS depends on INET && FILE_LOCKING && MULTIUSER select LOCKD select SUNRPC - select CRYPTO - select CRYPTO_HASH - select XXHASH - select CRYPTO_XXHASH select NFS_ACL_SUPPORT if NFS_V3_ACL help Choose Y here if you want to access files residing on other diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0365063b85a2..c6b263b5faf1 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include "delegation.h" #include "iostat.h" @@ -350,10 +350,7 @@ out: * of directory cookies. Content is addressed by the value of the * cookie index of the first readdir entry in a page. * - * The xxhash algorithm is chosen because it is fast, and is supposed - * to result in a decent flat distribution of hashes. - * - * We then select only the first 18 bits to avoid issues with excessive + * We select only the first 18 bits to avoid issues with excessive * memory use for the page cache XArray. 18 bits should allow the caching * of 262144 pages of sequences of readdir entries. Since each page holds * 127 readdir entries for a typical 64-bit system, that works out to a @@ -363,7 +360,7 @@ static pgoff_t nfs_readdir_page_cookie_hash(u64 cookie) { if (cookie == 0) return 0; - return xxhash(&cookie, sizeof(cookie), 0) & NFS_READDIR_COOKIE_MASK; + return hash_64(cookie, 18); } static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie, -- cgit v1.2.3 From f00432063db1a0db484e85193eccc6845435b80e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 3 Apr 2022 15:58:11 -0400 Subject: SUNRPC: Ensure we flush any closed sockets before xs_xprt_free() We must ensure that all sockets are closed before we call xprt_free() and release the reference to the net namespace. The problem is that calling fput() will defer closing the socket until delayed_fput() gets called. Let's fix the situation by allowing rpciod and the transport teardown code (which runs on the system wq) to call __fput_sync(), and directly close the socket. Reported-by: Felix Fu Acked-by: Al Viro Fixes: a73881c96d73 ("SUNRPC: Fix an Oops in udp_poll()") Cc: stable@vger.kernel.org # 5.1.x: 3be232f11a3c: SUNRPC: Prevent immediate close+reconnect Cc: stable@vger.kernel.org # 5.1.x: 89f42494f92f: SUNRPC: Don't call connect() more than once on a TCP socket Cc: stable@vger.kernel.org # 5.1.x Signed-off-by: Trond Myklebust --- fs/file_table.c | 1 + include/trace/events/sunrpc.h | 1 - net/sunrpc/xprt.c | 7 +------ net/sunrpc/xprtsock.c | 16 +++++++++++++--- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/fs/file_table.c b/fs/file_table.c index 7d2e692b66a9..ada8fe814db9 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -412,6 +412,7 @@ void __fput_sync(struct file *file) } EXPORT_SYMBOL(fput); +EXPORT_SYMBOL(__fput_sync); void __init files_init(void) { diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ac33892da411..a4848c7bab80 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1004,7 +1004,6 @@ DEFINE_RPC_XPRT_LIFETIME_EVENT(connect); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_auto); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_done); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_force); -DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_cleanup); DEFINE_RPC_XPRT_LIFETIME_EVENT(destroy); DECLARE_EVENT_CLASS(rpc_xprt_event, diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 73344ffb2692..ad62eba540a4 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -930,12 +930,7 @@ void xprt_connect(struct rpc_task *task) if (!xprt_lock_write(xprt, task)) return; - if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) { - trace_xprt_disconnect_cleanup(xprt); - xprt->ops->close(xprt); - } - - if (!xprt_connected(xprt)) { + if (!xprt_connected(xprt) && !test_bit(XPRT_CLOSE_WAIT, &xprt->state)) { task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie; rpc_sleep_on_timeout(&xprt->pending, task, NULL, xprt_request_timeout(task->tk_rqstp)); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9b75891b3cc0..c6a13893e308 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -879,7 +879,7 @@ static int xs_local_send_request(struct rpc_rqst *req) /* Close the stream if the previous transmission was incomplete */ if (xs_send_request_was_aborted(transport, req)) { - xs_close(xprt); + xprt_force_disconnect(xprt); return -ENOTCONN; } @@ -915,7 +915,7 @@ static int xs_local_send_request(struct rpc_rqst *req) -status); fallthrough; case -EPIPE: - xs_close(xprt); + xprt_force_disconnect(xprt); status = -ENOTCONN; } @@ -1185,6 +1185,16 @@ static void xs_reset_transport(struct sock_xprt *transport) if (sk == NULL) return; + /* + * Make sure we're calling this in a context from which it is safe + * to call __fput_sync(). In practice that means rpciod and the + * system workqueue. + */ + if (!(current->flags & PF_WQ_WORKER)) { + WARN_ON_ONCE(1); + set_bit(XPRT_CLOSE_WAIT, &xprt->state); + return; + } if (atomic_read(&transport->xprt.swapper)) sk_clear_memalloc(sk); @@ -1208,7 +1218,7 @@ static void xs_reset_transport(struct sock_xprt *transport) mutex_unlock(&transport->recv_mutex); trace_rpc_socket_close(xprt, sock); - fput(filp); + __fput_sync(filp); xprt_disconnect_done(xprt); } -- cgit v1.2.3 From dcc7977c7fdd0b59809cf7420ae1d5f5b5bd16ad Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Fri, 1 Apr 2022 10:59:05 +0800 Subject: NFSv4.2: Fix missing removal of SLAB_ACCOUNT on kmem_cache allocation The commit 5c60e89e71f8 ("NFSv4.2: Fix up an invalid combination of memory allocation flags") has stripped GFP_KERNEL_ACCOUNT down to GFP_KERNEL, however, it forgot to remove SLAB_ACCOUNT from kmem_cache allocation. It means that memory is still limited by kmemcg. This patch also fix a NULL pointer reference issue [1] reported by NeilBrown. Link: https://lore.kernel.org/all/164870069595.25542.17292003658915487357@noble.neil.brown.name/ [1] Fixes: 5c60e89e71f8 ("NFSv4.2: Fix up an invalid combination of memory allocation flags") Fixes: 5abc1e37afa0 ("mm: list_lru: allocate list_lru_one only when needed") Reported-by: NeilBrown Signed-off-by: Muchun Song Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c index ad3405c64b9e..e7b34f7e0614 100644 --- a/fs/nfs/nfs42xattr.c +++ b/fs/nfs/nfs42xattr.c @@ -997,7 +997,7 @@ int __init nfs4_xattr_cache_init(void) nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache", sizeof(struct nfs4_xattr_cache), 0, - (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT), + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), nfs4_xattr_cache_init_once); if (nfs4_xattr_cache_cachep == NULL) return -ENOMEM; -- cgit v1.2.3 From d3c15033b240767d0287f1c4a529cbbe2d5ded8a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Apr 2022 23:18:57 -0400 Subject: SUNRPC: Handle ENOMEM in call_transmit_status() Both call_transmit() and call_bc_transmit() can now return ENOMEM, so let's make sure that we handle the errors gracefully. Fixes: 0472e4766049 ("SUNRPC: Convert socket page send code to use iov_iter()") Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 3c7407104d54..07328f1d3885 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2200,6 +2200,7 @@ call_transmit_status(struct rpc_task *task) * socket just returned a connection error, * then hold onto the transport lock. */ + case -ENOMEM: case -ENOBUFS: rpc_delay(task, HZ>>2); fallthrough; @@ -2283,6 +2284,7 @@ call_bc_transmit_status(struct rpc_task *task) case -ENOTCONN: case -EPIPE: break; + case -ENOMEM: case -ENOBUFS: rpc_delay(task, HZ>>2); fallthrough; -- cgit v1.2.3 From 9d82819d5b065348ce623f196bf601028e22ed00 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Apr 2022 09:50:19 -0400 Subject: SUNRPC: Handle low memory situations in call_status() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to handle ENFILE, ENOBUFS, and ENOMEM, because xprt_wake_pending_tasks() can be called with any one of these due to socket creation failures. Fixes: b61d59fffd3e ("SUNRPC: xs_tcp_connect_worker{4,6}: merge common code") Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 07328f1d3885..6757b0fa5367 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2367,6 +2367,11 @@ call_status(struct rpc_task *task) case -EPIPE: case -EAGAIN: break; + case -ENFILE: + case -ENOBUFS: + case -ENOMEM: + rpc_delay(task, HZ>>2); + break; case -EIO: /* shutdown or soft timeout */ goto out_exit; -- cgit v1.2.3 From 68b78dcdf93a845d68e34918d17c125924240584 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Apr 2022 22:33:19 -0400 Subject: NFSv4/pnfs: Handle RPC allocation errors in nfs4_proc_layoutget If rpc_run_task() fails due to an allocation error, then bail out early. Fixes: 910ad38697d9 ("NFS: Fix memory allocation in rpc_alloc_task()") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e3f5b380cefe..16106f805ffa 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -9615,6 +9615,8 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0); task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return ERR_CAST(task); status = rpc_wait_for_completion_task(task); if (status != 0) -- cgit v1.2.3 From 88dee0cc93adcd83db9d089c1163dc88edafd1c1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Apr 2022 22:34:35 -0400 Subject: NFS: Ensure rpc_run_task() cannot fail in nfs_async_rename() Ensure the call to rpc_run_task() cannot fail by preallocating the rpc_task. Fixes: 910ad38697d9 ("NFS: Fix memory allocation in rpc_alloc_task()") Signed-off-by: Trond Myklebust --- fs/nfs/unlink.c | 1 + include/linux/nfs_xdr.h | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 5fa11e1aca4c..6f325e10056c 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -347,6 +347,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, data = kzalloc(sizeof(*data), GFP_KERNEL); if (data == NULL) return ERR_PTR(-ENOMEM); + task_setup_data.task = &data->task; task_setup_data.callback_data = data; data->cred = get_current_cred(); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 49ba486aea5f..2863e5a69c6a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1694,6 +1694,7 @@ struct nfs_unlinkdata { struct nfs_renamedata { struct nfs_renameargs args; struct nfs_renameres res; + struct rpc_task task; const struct cred *cred; struct inode *old_dir; struct dentry *old_dentry; -- cgit v1.2.3 From 25cf32ad5dba79385f6e7de9008dcb75556c42d2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Apr 2022 22:36:19 -0400 Subject: SUNRPC: Handle allocation failure in rpc_new_task() If the call to rpc_alloc_task() fails, then ensure that the calldata is released, and that rpc_run_task() and rpc_run_bc_task() bail out early. Reported-by: NeilBrown Fixes: 910ad38697d9 ("NFS: Fix memory allocation in rpc_alloc_task()") Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 7 +++++++ net/sunrpc/sched.c | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 6757b0fa5367..af0174d7ce5a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1127,6 +1127,8 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data) struct rpc_task *task; task = rpc_new_task(task_setup_data); + if (IS_ERR(task)) + return task; if (!RPC_IS_ASYNC(task)) task->tk_flags |= RPC_TASK_CRED_NOREF; @@ -1227,6 +1229,11 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) * Create an rpc_task to send the data */ task = rpc_new_task(&task_setup_data); + if (IS_ERR(task)) { + xprt_free_bc_request(req); + return task; + } + xprt_init_bc_request(req, task); task->tk_action = call_bc_encode; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index b258b87a3ec2..7f70c1e608b7 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1128,6 +1128,11 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data) if (task == NULL) { task = rpc_alloc_task(); + if (task == NULL) { + rpc_release_calldata(setup_data->callback_ops, + setup_data->callback_data); + return ERR_PTR(-ENOMEM); + } flags = RPC_TASK_DYNAMIC; } -- cgit v1.2.3 From b056fa070814897be32d83b079dbc311375588e7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Apr 2022 14:10:23 -0400 Subject: SUNRPC: svc_tcp_sendmsg() should handle errors from xdr_alloc_bvec() The allocation is done with GFP_KERNEL, but it could still fail in a low memory situation. Fixes: 4a85a6a3320b ("SUNRPC: Handle TCP socket sends with kernel_sendpage() again") Signed-off-by: Trond Myklebust --- net/sunrpc/svcsock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 478f857cdaed..6ea3d87e1147 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1096,7 +1096,9 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr, int ret; *sentp = 0; - xdr_alloc_bvec(xdr, GFP_KERNEL); + ret = xdr_alloc_bvec(xdr, GFP_KERNEL); + if (ret < 0) + return ret; ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len); if (ret < 0) -- cgit v1.2.3 From ff053dbbaffec45c85e5bfe43306d26694a6433f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Apr 2022 22:51:58 -0400 Subject: SUNRPC: Move the call to xprt_send_pagedata() out of xprt_sock_sendmsg() The client and server have different requirements for their memory allocation, so move the allocation of the send buffer out of the socket send code that is common to both. Reported-by: NeilBrown Fixes: b2648015d452 ("SUNRPC: Make the rpciod and xprtiod slab allocation modes consistent") Signed-off-by: Trond Myklebust --- net/sunrpc/socklib.c | 6 ------ net/sunrpc/svcsock.c | 9 ++++++--- net/sunrpc/xprtsock.c | 15 +++++++++++++-- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 05b38bf68316..71ba4cf513bc 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -221,12 +221,6 @@ static int xprt_send_kvec(struct socket *sock, struct msghdr *msg, static int xprt_send_pagedata(struct socket *sock, struct msghdr *msg, struct xdr_buf *xdr, size_t base) { - int err; - - err = xdr_alloc_bvec(xdr, rpc_task_gfp_mask()); - if (err < 0) - return err; - iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec, xdr_buf_pagecount(xdr), xdr->page_len + xdr->page_base); return xprt_sendmsg(sock, msg, base + xdr->page_base); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 6ea3d87e1147..cc35ec433400 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -579,15 +579,18 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) if (svc_xprt_is_dead(xprt)) goto out_notconn; + err = xdr_alloc_bvec(xdr, GFP_KERNEL); + if (err < 0) + goto out_unlock; + err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); - xdr_free_bvec(xdr); if (err == -ECONNREFUSED) { /* ICMP error on earlier request. */ err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); - xdr_free_bvec(xdr); } + xdr_free_bvec(xdr); trace_svcsock_udp_send(xprt, err); - +out_unlock: mutex_unlock(&xprt->xpt_mutex); if (err < 0) return err; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c6a13893e308..8ab64ea46870 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -825,9 +825,14 @@ static int xs_stream_nospace(struct rpc_rqst *req, bool vm_wait) static int xs_stream_prepare_request(struct rpc_rqst *req) { + gfp_t gfp = rpc_task_gfp_mask(); + int ret; + + ret = xdr_alloc_bvec(&req->rq_snd_buf, gfp); + if (ret < 0) + return ret; xdr_free_bvec(&req->rq_rcv_buf); - return xdr_alloc_bvec( - &req->rq_rcv_buf, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); + return xdr_alloc_bvec(&req->rq_rcv_buf, gfp); } /* @@ -956,6 +961,9 @@ static int xs_udp_send_request(struct rpc_rqst *req) if (!xprt_request_get_cong(xprt, req)) return -EBADSLT; + status = xdr_alloc_bvec(xdr, rpc_task_gfp_mask()); + if (status < 0) + return status; req->rq_xtime = ktime_get(); status = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, 0, &sent); @@ -2554,6 +2562,9 @@ static int bc_sendto(struct rpc_rqst *req) int err; req->rq_xtime = ktime_get(); + err = xdr_alloc_bvec(xdr, rpc_task_gfp_mask()); + if (err < 0) + return err; err = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, marker, &sent); xdr_free_bvec(xdr); if (err < 0 || sent != (xdr->len + sizeof(marker))) -- cgit v1.2.3