From ec677b58f65ed500d80c4ddff62ae2e8a9ae9802 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 30 Nov 2023 11:45:34 -0500 Subject: SUNRPC: Clean up unused variable in rpc_xprt_probe_trunked() We don't use the rpc_xprt_switch anywhere in this function, so let's not take an extra reference to in unnecessarily. Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index daa9582ec861..4aa838543f79 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -3116,7 +3116,6 @@ static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt, struct rpc_xprt *xprt, struct rpc_add_xprt_test *data) { - struct rpc_xprt_switch *xps; struct rpc_xprt *main_xprt; int status = 0; @@ -3124,7 +3123,6 @@ static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt, rcu_read_lock(); main_xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); - xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch)); status = rpc_cmp_addr_port((struct sockaddr *)&xprt->addr, (struct sockaddr *)&main_xprt->addr); rcu_read_unlock(); @@ -3135,7 +3133,6 @@ static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt, status = rpc_clnt_add_xprt_helper(clnt, xprt, data); out: xprt_put(xprt); - xprt_switch_put(xps); return status; } -- cgit v1.2.3 From 5f1e77b2285b47c216b44e513071549cf006a309 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 30 Nov 2023 13:30:15 -0500 Subject: SUNRPC: Remove unused function rpc_clnt_xprt_switch_put() Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/sunrpc/clnt.h | 1 - net/sunrpc/clnt.c | 8 -------- 2 files changed, 9 deletions(-) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index e9d4377d03c6..5e9d1469c6fa 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -252,7 +252,6 @@ void rpc_clnt_probe_trunked_xprts(struct rpc_clnt *, const char *rpc_proc_name(const struct rpc_task *task); -void rpc_clnt_xprt_switch_put(struct rpc_clnt *); void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *); void rpc_clnt_xprt_switch_remove_xprt(struct rpc_clnt *, struct rpc_xprt *); bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4aa838543f79..8df944444e9b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -3247,14 +3247,6 @@ rpc_set_connect_timeout(struct rpc_clnt *clnt, } EXPORT_SYMBOL_GPL(rpc_set_connect_timeout); -void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt) -{ - rcu_read_lock(); - xprt_switch_put(rcu_dereference(clnt->cl_xpi.xpi_xpswitch)); - rcu_read_unlock(); -} -EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_put); - void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt) { struct rpc_xprt_switch *xps; -- cgit v1.2.3 From a902f3dec70a46332a8c99604de384fbabd734e5 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 30 Nov 2023 16:25:58 -0500 Subject: SUNRPC: Create a helper function for accessing the rpc_clnt's xprt_switch This function takes the necessary rcu read lock to dereference the client's rpc_xprt_switch and bump the reference count so it doesn't disappear underneath us before returning. This does mean that callers are responsible for calling xprt_switch_put() on the returned object when they are done with it. Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8df944444e9b..0b2c4b5484f5 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -797,15 +797,24 @@ out_revert: } EXPORT_SYMBOL_GPL(rpc_switch_client_transport); -static -int _rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi, - void func(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps)) +static struct rpc_xprt_switch *rpc_clnt_xprt_switch_get(struct rpc_clnt *clnt) { struct rpc_xprt_switch *xps; rcu_read_lock(); xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch)); rcu_read_unlock(); + + return xps; +} + +static +int _rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi, + void func(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps)) +{ + struct rpc_xprt_switch *xps; + + xps = rpc_clnt_xprt_switch_get(clnt); if (xps == NULL) return -EAGAIN; func(xpi, xps); @@ -2206,9 +2215,7 @@ call_connect_status(struct rpc_task *task) struct rpc_xprt *saved = task->tk_xprt; struct rpc_xprt_switch *xps; - rcu_read_lock(); - xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch)); - rcu_read_unlock(); + xps = rpc_clnt_xprt_switch_get(clnt); if (xps->xps_nxprts > 1) { long value; @@ -3251,22 +3258,23 @@ void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt) { struct rpc_xprt_switch *xps; - rcu_read_lock(); - xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch); - rcu_read_unlock(); + xps = rpc_clnt_xprt_switch_get(clnt); xprt_set_online_locked(xprt, xps); + xprt_switch_put(xps); } void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt) { + struct rpc_xprt_switch *xps; + if (rpc_clnt_xprt_switch_has_addr(clnt, (const struct sockaddr *)&xprt->addr)) { return rpc_clnt_xprt_set_online(clnt, xprt); } - rcu_read_lock(); - rpc_xprt_switch_add_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch), - xprt); - rcu_read_unlock(); + + xps = rpc_clnt_xprt_switch_get(clnt); + rpc_xprt_switch_add_xprt(xps, xprt); + xprt_switch_put(xps); } EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_add_xprt); -- cgit v1.2.3 From 31b62908693c90d4d07db597e685d9f25a120073 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Mon, 27 Nov 2023 17:06:18 -0500 Subject: SUNRPC: Fix a suspicious RCU usage warning I received the following warning while running cthon against an ontap server running pNFS: [ 57.202521] ============================= [ 57.202522] WARNING: suspicious RCU usage [ 57.202523] 6.7.0-rc3-g2cc14f52aeb7 #41492 Not tainted [ 57.202525] ----------------------------- [ 57.202525] net/sunrpc/xprtmultipath.c:349 RCU-list traversed in non-reader section!! [ 57.202527] other info that might help us debug this: [ 57.202528] rcu_scheduler_active = 2, debug_locks = 1 [ 57.202529] no locks held by test5/3567. [ 57.202530] stack backtrace: [ 57.202532] CPU: 0 PID: 3567 Comm: test5 Not tainted 6.7.0-rc3-g2cc14f52aeb7 #41492 5b09971b4965c0aceba19f3eea324a4a806e227e [ 57.202534] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS unknown 2/2/2022 [ 57.202536] Call Trace: [ 57.202537] [ 57.202540] dump_stack_lvl+0x77/0xb0 [ 57.202551] lockdep_rcu_suspicious+0x154/0x1a0 [ 57.202556] rpc_xprt_switch_has_addr+0x17c/0x190 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6] [ 57.202596] rpc_clnt_setup_test_and_add_xprt+0x50/0x180 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6] [ 57.202621] ? rpc_clnt_add_xprt+0x254/0x300 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6] [ 57.202646] rpc_clnt_add_xprt+0x27a/0x300 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6] [ 57.202671] ? __pfx_rpc_clnt_setup_test_and_add_xprt+0x10/0x10 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6] [ 57.202696] nfs4_pnfs_ds_connect+0x345/0x760 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9] [ 57.202728] ? __pfx_nfs4_test_session_trunk+0x10/0x10 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9] [ 57.202754] nfs4_fl_prepare_ds+0x75/0xc0 [nfs_layout_nfsv41_files e3a4187f18ae8a27b630f9feae6831b584a9360a] [ 57.202760] filelayout_write_pagelist+0x4a/0x200 [nfs_layout_nfsv41_files e3a4187f18ae8a27b630f9feae6831b584a9360a] [ 57.202765] pnfs_generic_pg_writepages+0xbe/0x230 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9] [ 57.202788] __nfs_pageio_add_request+0x3fd/0x520 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202813] nfs_pageio_add_request+0x18b/0x390 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202831] nfs_do_writepage+0x116/0x1e0 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202849] nfs_writepages_callback+0x13/0x30 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202866] write_cache_pages+0x265/0x450 [ 57.202870] ? __pfx_nfs_writepages_callback+0x10/0x10 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202891] nfs_writepages+0x141/0x230 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202913] do_writepages+0xd2/0x230 [ 57.202917] ? filemap_fdatawrite_wbc+0x5c/0x80 [ 57.202921] filemap_fdatawrite_wbc+0x67/0x80 [ 57.202924] filemap_write_and_wait_range+0xd9/0x170 [ 57.202930] nfs_wb_all+0x49/0x180 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202947] nfs4_file_flush+0x72/0xb0 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9] [ 57.202969] __se_sys_close+0x46/0xd0 [ 57.202972] do_syscall_64+0x68/0x100 [ 57.202975] ? do_syscall_64+0x77/0x100 [ 57.202976] ? do_syscall_64+0x77/0x100 [ 57.202979] entry_SYSCALL_64_after_hwframe+0x6e/0x76 [ 57.202982] RIP: 0033:0x7fe2b12e4a94 [ 57.202985] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 80 3d d5 18 0e 00 00 74 13 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 44 c3 0f 1f 00 48 83 ec 18 89 7c 24 0c e8 c3 [ 57.202987] RSP: 002b:00007ffe857ddb38 EFLAGS: 00000202 ORIG_RAX: 0000000000000003 [ 57.202989] RAX: ffffffffffffffda RBX: 00007ffe857dfd68 RCX: 00007fe2b12e4a94 [ 57.202991] RDX: 0000000000002000 RSI: 00007ffe857ddc40 RDI: 0000000000000003 [ 57.202992] RBP: 00007ffe857dfc50 R08: 7fffffffffffffff R09: 0000000065650f49 [ 57.202993] R10: 00007fe2b11f8300 R11: 0000000000000202 R12: 0000000000000000 [ 57.202994] R13: 00007ffe857dfd80 R14: 00007fe2b1445000 R15: 0000000000000000 [ 57.202999] The problem seems to be that two out of three callers aren't taking the rcu_read_lock() before calling the list_for_each_entry_rcu() function in rpc_xprt_switch_has_addr(). I fix this by having rpc_xprt_switch_has_addr() unconditionaly take the rcu_read_lock(), which is okay to do recursively in the case that the lock has already been taken by a caller. Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- net/sunrpc/xprtmultipath.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 701250b305db..0706575d9392 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -336,8 +336,9 @@ struct rpc_xprt *xprt_iter_current_entry_offline(struct rpc_xprt_iter *xpi) xprt_switch_find_current_entry_offline); } -bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, - const struct sockaddr *sap) +static +bool __rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, + const struct sockaddr *sap) { struct list_head *head; struct rpc_xprt *pos; @@ -356,6 +357,18 @@ bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, return false; } +bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, + const struct sockaddr *sap) +{ + bool res; + + rcu_read_lock(); + res = __rpc_xprt_switch_has_addr(xps, sap); + rcu_read_unlock(); + + return res; +} + static struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head, const struct rpc_xprt *cur, bool check_active) -- cgit v1.2.3 From 1530827b90025cdf80c9b0d07a166d045a0a7b81 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 5 Dec 2023 10:05:01 -0500 Subject: blocklayoutdriver: Fix reference leak of pnfs_device_node The error path for blocklayout's device lookup is missing a reference drop for the case where a lookup finds the device, but the device is marked with NFS_DEVICEID_UNAVAILABLE. Fixes: b3dce6a2f060 ("pnfs/blocklayout: handle transient devices") Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/blocklayout/blocklayout.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 943aeea1eb16..1d1d7abc3205 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -580,6 +580,8 @@ retry: nfs4_delete_deviceid(node->ld, node->nfs_client, id); goto retry; } + + nfs4_put_deviceid_node(node); return ERR_PTR(-ENODEV); } -- cgit v1.2.3 From d76c769c8db4c321aa4d697e810e0ed0b30bcc91 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 5 Dec 2023 10:05:02 -0500 Subject: pnfs/blocklayout: Don't add zero-length pnfs_block_dev We noticed a SCSI device that refused to allow READ CAPACITY when the device had a PR with exclusive access, registrants only. The result of this situation is that the blocklayout driver adds a pnfs_block_dev of zero length which always fails the offset_in_map tests. Instead of continuously trying to do pNFS for this case, just mark the device as unavailable which will allow the client to fallback to the MDS for the duration of PNFS_DEVICE_RETRY_TIMEOUT. Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/blocklayout/dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index f318a05a80e1..c97ebc42ec0f 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -351,6 +351,9 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, d->map = bl_map_simple; d->pr_key = v->scsi.pr_key; + if (d->len == 0) + return -ENODEV; + pr_info("pNFS: using block device %s (reservation key 0x%llx)\n", d->bdev_handle->bdev->bd_disk->disk_name, d->pr_key); -- cgit v1.2.3 From b4d4fd60f884e75d52bf9254bc56b3ea41ea2686 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 6 Dec 2023 08:10:22 -0500 Subject: NFSv4: Always ask for type with READDIR Again we have claimed regressions for walking a directory tree, this time with the "find" utility which always tries to optimize away asking for any attributes until it has a complete list of entries. This behavior makes the readdir plus heuristic do the wrong thing, which causes a storm of GETATTRs to determine each entry's type in order to continue the walk. For v4 add the type attribute to each READDIR request to include it no matter the heuristic. This allows a simple `find` command to proceed quickly through a directory tree. Suggested-by: Jeff Layton Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: Anna Schumaker --- fs/nfs/nfs4xdr.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index deec76cf5afe..69406e60f391 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1602,7 +1602,8 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_pgio_args *args static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) { uint32_t attrs[3] = { - FATTR4_WORD0_RDATTR_ERROR, + FATTR4_WORD0_TYPE + | FATTR4_WORD0_RDATTR_ERROR, FATTR4_WORD1_MOUNTED_ON_FILEID, }; uint32_t dircount = readdir->count; @@ -1612,12 +1613,20 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg unsigned int i; if (readdir->plus) { - attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| - FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE|FATTR4_WORD0_FILEID; - attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER| - FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV| - FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS| - FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; + attrs[0] |= FATTR4_WORD0_CHANGE + | FATTR4_WORD0_SIZE + | FATTR4_WORD0_FSID + | FATTR4_WORD0_FILEHANDLE + | FATTR4_WORD0_FILEID; + attrs[1] |= FATTR4_WORD1_MODE + | FATTR4_WORD1_NUMLINKS + | FATTR4_WORD1_OWNER + | FATTR4_WORD1_OWNER_GROUP + | FATTR4_WORD1_RAWDEV + | FATTR4_WORD1_SPACE_USED + | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_METADATA + | FATTR4_WORD1_TIME_MODIFY; attrs[2] |= FATTR4_WORD2_SECURITY_LABEL; } /* Use mounted_on_fileid only if the server supports it */ -- cgit v1.2.3 From a10a9233073d984b239e22358ba21825e27e2e88 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 5 Dec 2023 09:10:54 -0500 Subject: NFS: Use parent's objective cred in nfs_access_login_time() The subjective cred (task->cred) can potentially be overridden and subsquently freed in non-RCU context, which could lead to a panic if we try to use it in cred_fscmp(). Use __task_cred(), which returns the objective cred (task->real_cred) instead. Fixes: 0eb43812c027 ("NFS: Clear the file access cache upon login") Fixes: 5e9a7b9c2ea1 ("NFS: Fix up a sparse warning") Signed-off-by: Scott Mayhew Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 13dffe4201e6..273c0b68abf4 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2963,7 +2963,7 @@ static u64 nfs_access_login_time(const struct task_struct *task, rcu_read_lock(); for (;;) { parent = rcu_dereference(task->real_parent); - pcred = rcu_dereference(parent->cred); + pcred = __task_cred(parent); if (parent == task || cred_fscmp(pcred, cred) != 0) break; task = parent; -- cgit v1.2.3 From e3fd54e7dc5a7f3fb7bb7c33bee1361ca0c36ed3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 15 Nov 2023 13:55:27 -0500 Subject: NFSv4: Track the number of referring calls in struct cb_process_state When the server gives us a set of referring calls, to tell us that the NFSv4.1 callback needs to be ordered with respect to those calls, then we may want to make that information available to the operations. In certain cases, it may allow them to optimise their behaviour due to the extra knowledge. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/callback.h | 5 +++-- fs/nfs/callback_proc.c | 11 ++++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index ccd4f245cae2..cc1b6620a0c2 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -40,11 +40,12 @@ enum nfs4_callback_opnum { struct nfs4_slot; struct cb_process_state { - __be32 drc_status; struct nfs_client *clp; struct nfs4_slot *slot; - u32 minorversion; struct net *net; + u32 minorversion; + __be32 drc_status; + unsigned int referring_calls; }; struct cb_compound_hdr_arg { diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 96a4923080ae..5d7fe37d226c 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -450,6 +450,7 @@ static int referring_call_exists(struct nfs_client *clp, __acquires(lock) { int status = 0; + int found = 0; int i, j; struct nfs4_session *session; struct nfs4_slot_table *tbl; @@ -478,11 +479,12 @@ static int referring_call_exists(struct nfs_client *clp, spin_lock(lock); if (status) goto out; + found++; } } out: - return status; + return status < 0 ? status : found; } __be32 nfs4_callback_sequence(void *argp, void *resp, @@ -493,6 +495,7 @@ __be32 nfs4_callback_sequence(void *argp, void *resp, struct nfs4_slot_table *tbl; struct nfs4_slot *slot; struct nfs_client *clp; + int ret; int i; __be32 status = htonl(NFS4ERR_BADSESSION); @@ -552,11 +555,13 @@ __be32 nfs4_callback_sequence(void *argp, void *resp, * related callback was received before the response to the original * call. */ - if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists, - &tbl->slot_tbl_lock) < 0) { + ret = referring_call_exists(clp, args->csa_nrclists, args->csa_rclists, + &tbl->slot_tbl_lock); + if (ret < 0) { status = htonl(NFS4ERR_DELAY); goto out_unlock; } + cps->referring_calls = ret; /* * RFC5661 20.9.3 -- cgit v1.2.3 From dce72920c81b7e2ee6e0c9b2fde8762160b9992a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 15 Nov 2023 13:55:28 -0500 Subject: NFSv4.1: if referring calls are complete, trust the stateid argument If the server is recalling a layout, and sends us a list of referring calls that we can see are complete, then we should just trust that the stateid argument is correct, even if the sequence id doesn't match the one we hold. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/callback_proc.c | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 5d7fe37d226c..76cea34477ae 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -207,7 +207,8 @@ static struct inode *nfs_layout_find_inode(struct nfs_client *clp, * Enforce RFC5661 section 12.5.5.2.1. (Layout Recall and Return Sequencing) */ static u32 pnfs_check_callback_stateid(struct pnfs_layout_hdr *lo, - const nfs4_stateid *new) + const nfs4_stateid *new, + struct cb_process_state *cps) { u32 oldseq, newseq; @@ -221,28 +222,29 @@ static u32 pnfs_check_callback_stateid(struct pnfs_layout_hdr *lo, newseq = be32_to_cpu(new->seqid); /* Are we already in a layout recall situation? */ - if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) && - lo->plh_return_seq != 0) { - if (newseq < lo->plh_return_seq) - return NFS4ERR_OLD_STATEID; - if (newseq > lo->plh_return_seq) - return NFS4ERR_DELAY; - goto out; - } + if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + return NFS4ERR_DELAY; - /* Check that the stateid matches what we think it should be. */ + /* + * Check that the stateid matches what we think it should be. + * Note that if the server sent us a list of referring calls, + * and we know that those have completed, then we trust the + * stateid argument is correct. + */ oldseq = be32_to_cpu(lo->plh_stateid.seqid); - if (newseq > oldseq + 1) + if (newseq > oldseq + 1 && !cps->referring_calls) return NFS4ERR_DELAY; + /* Crazy server! */ if (newseq <= oldseq) return NFS4ERR_OLD_STATEID; -out: + return NFS_OK; } static u32 initiate_file_draining(struct nfs_client *clp, - struct cb_layoutrecallargs *args) + struct cb_layoutrecallargs *args, + struct cb_process_state *cps) { struct inode *ino; struct pnfs_layout_hdr *lo; @@ -266,7 +268,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, goto out; } pnfs_get_layout_hdr(lo); - rv = pnfs_check_callback_stateid(lo, &args->cbl_stateid); + rv = pnfs_check_callback_stateid(lo, &args->cbl_stateid, cps); if (rv != NFS_OK) goto unlock; @@ -326,10 +328,11 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, } static u32 do_callback_layoutrecall(struct nfs_client *clp, - struct cb_layoutrecallargs *args) + struct cb_layoutrecallargs *args, + struct cb_process_state *cps) { if (args->cbl_recall_type == RETURN_FILE) - return initiate_file_draining(clp, args); + return initiate_file_draining(clp, args, cps); return initiate_bulk_draining(clp, args); } @@ -340,11 +343,12 @@ __be32 nfs4_callback_layoutrecall(void *argp, void *resp, u32 res = NFS4ERR_OP_NOT_IN_SESSION; if (cps->clp) - res = do_callback_layoutrecall(cps->clp, args); + res = do_callback_layoutrecall(cps->clp, args, cps); return cpu_to_be32(res); } -static void pnfs_recall_all_layouts(struct nfs_client *clp) +static void pnfs_recall_all_layouts(struct nfs_client *clp, + struct cb_process_state *cps) { struct cb_layoutrecallargs args; @@ -352,7 +356,7 @@ static void pnfs_recall_all_layouts(struct nfs_client *clp) memset(&args, 0, sizeof(args)); args.cbl_recall_type = RETURN_ALL; /* FIXME we ignore errors, what should we do? */ - do_callback_layoutrecall(clp, &args); + do_callback_layoutrecall(clp, &args, cps); } __be32 nfs4_callback_devicenotify(void *argp, void *resp, @@ -622,7 +626,7 @@ __be32 nfs4_callback_recallany(void *argp, void *resp, nfs_expire_unused_delegation_types(cps->clp, flags); if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_FILE_LAYOUT)) - pnfs_recall_all_layouts(cps->clp); + pnfs_recall_all_layouts(cps->clp, cps); if (args->craa_type_mask & BIT(PNFS_FF_RCA4_TYPE_MASK_READ)) { set_bit(NFS4CLNT_RECALL_ANY_LAYOUT_READ, &cps->clp->cl_state); -- cgit v1.2.3 From 037e56a22ff37f9a9c2330b66cff55d3d1ff9b90 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 15 Nov 2023 13:55:29 -0500 Subject: NFSv4.1/pnfs: Ensure we handle the error NFS4ERR_RETURNCONFLICT Once the client has processed the CB_LAYOUTRECALL, but has not yet successfully returned the layout, the server is supposed to switch to returning NFS4ERR_RETURNCONFLICT. This patch ensures that we handle that return value correctly. Fixes: 183d9e7b112a ("pnfs: rework LAYOUTGET retry handling") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8a943fffaad5..23819a756508 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -170,6 +170,7 @@ static int nfs4_map_errors(int err) case -NFS4ERR_RESOURCE: case -NFS4ERR_LAYOUTTRYLATER: case -NFS4ERR_RECALLCONFLICT: + case -NFS4ERR_RETURNCONFLICT: return -EREMOTEIO; case -NFS4ERR_WRONGSEC: case -NFS4ERR_WRONG_CRED: @@ -558,6 +559,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server, case -NFS4ERR_GRACE: case -NFS4ERR_LAYOUTTRYLATER: case -NFS4ERR_RECALLCONFLICT: + case -NFS4ERR_RETURNCONFLICT: exception->delay = 1; return 0; @@ -9691,6 +9693,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, status = -EBUSY; break; case -NFS4ERR_RECALLCONFLICT: + case -NFS4ERR_RETURNCONFLICT: status = -ERECALLCONFLICT; break; case -NFS4ERR_DELEG_REVOKED: -- cgit v1.2.3 From 98b4e5137504a5bd9346562b1310cdc13486603b Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 1 Dec 2023 14:42:03 -0500 Subject: SUNRPC: fix _xprt_switch_find_current_entry logic Fix the logic for picking current transport entry. Fixes: 95d0d30c66b8 ("SUNRPC create an iterator to list only OFFLINE xprts") Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- net/sunrpc/xprtmultipath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 0706575d9392..720d3ba742ec 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -284,7 +284,7 @@ struct rpc_xprt *_xprt_switch_find_current_entry(struct list_head *head, if (cur == pos) found = true; if (found && ((find_active && xprt_is_active(pos)) || - (!find_active && xprt_is_active(pos)))) + (!find_active && !xprt_is_active(pos)))) return pos; } return NULL; -- cgit v1.2.3 From 283064fca3e8c6a2f3d1d70bb56f5c01ee01118c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Nov 2023 06:10:49 -0400 Subject: nfs: add new tracepoint at nfs4 revalidate entry point Add a call to the v4 d_revalidate entrypoint, just like the v3 one. Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 273c0b68abf4..c8ecbe999059 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2194,6 +2194,8 @@ nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, { struct inode *inode; + trace_nfs_lookup_revalidate_enter(dir, dentry, flags); + if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) goto full_reval; if (d_mountpoint(dentry)) -- cgit v1.2.3 From 310b1f89ea81ff753f55cba7f6ec457b6bbf6549 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Nov 2023 06:10:50 -0400 Subject: nfs: rename the nfs_async_rename_done tracepoint We do async renames in other cases besides sillyrenames now. This tracepoint name is now misleading. Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/nfstrace.h | 2 +- fs/nfs/unlink.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 4e90ca531176..8ad1bed09295 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -893,7 +893,7 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done, DEFINE_NFS_RENAME_EVENT(nfs_rename_enter); DEFINE_NFS_RENAME_EVENT_DONE(nfs_rename_exit); -DEFINE_NFS_RENAME_EVENT_DONE(nfs_sillyrename_rename); +DEFINE_NFS_RENAME_EVENT_DONE(nfs_async_rename_done); TRACE_EVENT(nfs_sillyrename_unlink, TP_PROTO( diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 150a953a8be9..0110299643a2 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -267,7 +267,7 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) struct inode *new_dir = data->new_dir; struct dentry *old_dentry = data->old_dentry; - trace_nfs_sillyrename_rename(old_dir, old_dentry, + trace_nfs_async_rename_done(old_dir, old_dentry, new_dir, data->new_dentry, task->tk_status); if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { rpc_restart_call_prepare(task); -- cgit v1.2.3 From f6e70c59edee4203e9541e790e8c416a9250f190 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Nov 2023 06:10:51 -0400 Subject: nfs: print fileid in lookup tracepoints With this we can see the dentry -> inode linkage that's being revalidated. A fileid of 0 means "negative dentry". Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/nfstrace.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 8ad1bed09295..de3adc4a2ce0 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -400,6 +400,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event, __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) + __field(u64, fileid) __string(name, dentry->d_name.name) ), @@ -407,16 +408,18 @@ DECLARE_EVENT_CLASS(nfs_lookup_event, __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; + __entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry)); __assign_str(name, dentry->d_name.name); ), TP_printk( - "flags=0x%lx (%s) name=%02x:%02x:%llu/%s", + "flags=0x%lx (%s) name=%02x:%02x:%llu/%s fileid=%llu", __entry->flags, show_fs_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, - __get_str(name) + __get_str(name), + __entry->fileid ) ); @@ -444,6 +447,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done, __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) + __field(u64, fileid) __string(name, dentry->d_name.name) ), @@ -452,17 +456,19 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done, __entry->dir = NFS_FILEID(dir); __entry->error = error < 0 ? -error : 0; __entry->flags = flags; + __entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry)); __assign_str(name, dentry->d_name.name); ), TP_printk( - "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s", + "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s fileid=%llu", -__entry->error, show_nfs_status(__entry->error), __entry->flags, show_fs_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, - __get_str(name) + __get_str(name), + __entry->fileid ) ); -- cgit v1.2.3 From 8a6291bf3b0eae1bf26621e6419a91682f2d6227 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Nov 2023 06:25:13 -0500 Subject: pNFS: Fix the pnfs block driver's calculation of layoutget size Instead of relying on the value of the 'bytes_left' field, we should calculate the layout size based on the offset of the request that is being written out. Reported-by: Benjamin Coddington Signed-off-by: Trond Myklebust Fixes: 954998b60caa ("NFS: Fix error handling for O_DIRECT write scheduling") Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Reviewed-by: Christoph Hellwig Signed-off-by: Anna Schumaker --- fs/nfs/blocklayout/blocklayout.c | 5 ++--- fs/nfs/direct.c | 5 +++-- fs/nfs/internal.h | 2 +- fs/nfs/pnfs.c | 3 ++- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 1d1d7abc3205..6be13e0ec170 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -895,10 +895,9 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) } if (pgio->pg_dreq == NULL) - wb_size = pnfs_num_cont_bytes(pgio->pg_inode, - req->wb_index); + wb_size = pnfs_num_cont_bytes(pgio->pg_inode, req->wb_index); else - wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); + wb_size = nfs_dreq_bytes_left(pgio->pg_dreq, req_offset(req)); pnfs_generic_pg_init_write(pgio, req, wb_size); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f6c74f424691..5918c67dae0d 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -205,9 +205,10 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq) kref_put(&dreq->kref, nfs_direct_req_free); } -ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq) +ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset) { - return dreq->bytes_left; + loff_t start = offset - dreq->io_start; + return dreq->max_count - start; } EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 9c9cf764f600..b1fa81c9dff6 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -655,7 +655,7 @@ extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry); /* direct.c */ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, struct nfs_direct_req *dreq); -extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); +extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset); /* nfs4proc.c */ extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 21a365357629..0c0fed1ecd0b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2733,7 +2733,8 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r if (pgio->pg_dreq == NULL) rd_size = i_size_read(pgio->pg_inode) - req_offset(req); else - rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); + rd_size = nfs_dreq_bytes_left(pgio->pg_dreq, + req_offset(req)); pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), -- cgit v1.2.3 From 1fd5394e6ab8b11465a5d0867f188fad1835a762 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Fri, 17 Nov 2023 06:25:14 -0500 Subject: NFS: drop unused nfs_direct_req bytes_left Now that we're calculating how large a remaining IO should be based on the current request's offset, we no longer need to track bytes_left on each struct nfs_direct_req. Drop the field, and clean up the direct request tracepoints. Signed-off-by: Benjamin Coddington Reviewed-by: Christoph Hellwig Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 6 ++---- fs/nfs/internal.h | 1 - fs/nfs/nfstrace.h | 6 ++---- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 5918c67dae0d..c03926a1cc73 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -369,7 +369,6 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, bytes -= req_len; requested_bytes += req_len; pos += req_len; - dreq->bytes_left -= req_len; } nfs_direct_release_pages(pagevec, npages); kvfree(pagevec); @@ -441,7 +440,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, goto out; dreq->inode = inode; - dreq->bytes_left = dreq->max_count = count; + dreq->max_count = count; dreq->io_start = iocb->ki_pos; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); @@ -874,7 +873,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, bytes -= req_len; requested_bytes += req_len; pos += req_len; - dreq->bytes_left -= req_len; if (defer) { nfs_mark_request_commit(req, NULL, &cinfo, 0); @@ -981,7 +979,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, goto out; dreq->inode = inode; - dreq->bytes_left = dreq->max_count = count; + dreq->max_count = count; dreq->io_start = pos; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b1fa81c9dff6..e3722ce6722e 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -936,7 +936,6 @@ struct nfs_direct_req { loff_t io_start; /* Start offset for I/O */ ssize_t count, /* bytes actually processed */ max_count, /* max expected count */ - bytes_left, /* bytes left to be sent */ error; /* any reported error */ struct completion completion; /* wait for i/o completion */ diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index de3adc4a2ce0..afedb449b54f 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -1545,7 +1545,6 @@ DECLARE_EVENT_CLASS(nfs_direct_req_class, __field(u32, fhandle) __field(loff_t, offset) __field(ssize_t, count) - __field(ssize_t, bytes_left) __field(ssize_t, error) __field(int, flags) ), @@ -1560,19 +1559,18 @@ DECLARE_EVENT_CLASS(nfs_direct_req_class, __entry->fhandle = nfs_fhandle_hash(fh); __entry->offset = dreq->io_start; __entry->count = dreq->count; - __entry->bytes_left = dreq->bytes_left; __entry->error = dreq->error; __entry->flags = dreq->flags; ), TP_printk( "error=%zd fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%zd bytes_left=%zd flags=%s", + "offset=%lld count=%zd flags=%s", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->offset, - __entry->count, __entry->bytes_left, + __entry->count, nfs_show_direct_req_flags(__entry->flags) ) ); -- cgit v1.2.3 From 12fc0a963128b54b82e98b9909f463e784b90b07 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 15 Dec 2023 20:47:07 +0000 Subject: nfs: Remove writepage NFS already has writepages and migrate_folio, so it does not need to implement writepage. The writepage operation is deprecated as it leads to worse performance under high memory pressure due to folios being written out in LRU order rather than sequentially within a file. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Anna Schumaker --- fs/nfs/file.c | 1 - fs/nfs/write.c | 11 ----------- include/linux/nfs_fs.h | 1 - 3 files changed, 13 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 3f9768810427..e437de2a5ad4 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -558,7 +558,6 @@ const struct address_space_operations nfs_file_aops = { .read_folio = nfs_read_folio, .readahead = nfs_readahead, .dirty_folio = filemap_dirty_folio, - .writepage = nfs_writepage, .writepages = nfs_writepages, .write_begin = nfs_write_begin, .write_end = nfs_write_end, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b664caea8b4e..0b4fdad511d9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -680,17 +680,6 @@ static int nfs_writepage_locked(struct folio *folio, return err; } -int nfs_writepage(struct page *page, struct writeback_control *wbc) -{ - struct folio *folio = page_folio(page); - int ret; - - ret = nfs_writepage_locked(folio, wbc); - if (ret != AOP_WRITEPAGE_ACTIVATE) - unlock_page(page); - return ret; -} - static int nfs_writepages_callback(struct folio *folio, struct writeback_control *wbc, void *data) { diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 279262057a92..f5ce7b101146 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -595,7 +595,6 @@ extern void nfs_complete_unlink(struct dentry *dentry, struct inode *); * linux/fs/nfs/write.c */ extern int nfs_congestion_kb; -extern int nfs_writepage(struct page *page, struct writeback_control *wbc); extern int nfs_writepages(struct address_space *, struct writeback_control *); extern int nfs_flush_incompatible(struct file *file, struct folio *folio); extern int nfs_update_folio(struct file *file, struct folio *folio, -- cgit v1.2.3 From 597a421798033cd150586b5f2607389fa4052550 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 29 Dec 2023 13:18:56 +0100 Subject: rpc_pipefs: Replace one label in bl_resolve_deviceid() The kfree() function was called in one case by the bl_resolve_deviceid() function during error handling even if the passed data structure member contained a null pointer. This issue was detected by using the Coccinelle software. Thus use an other label. Signed-off-by: Markus Elfring Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/blocklayout/rpc_pipefs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c index 6c977288cc28..d8d50a88de04 100644 --- a/fs/nfs/blocklayout/rpc_pipefs.c +++ b/fs/nfs/blocklayout/rpc_pipefs.c @@ -75,7 +75,7 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, msg->len = sizeof(*bl_msg) + b->simple.len; msg->data = kzalloc(msg->len, gfp_mask); if (!msg->data) - goto out_free_data; + goto out_unlock; bl_msg = msg->data; bl_msg->type = BL_DEVICE_MOUNT; -- cgit v1.2.3 From e6f533b615971afcaa1141573a1a1714d9d4f31a Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 4 Jan 2024 09:58:45 -0500 Subject: SUNRPC: Fixup v4.1 backchannel request timeouts After commit 59464b262ff5 ("SUNRPC: SOFTCONN tasks should time out when on the sending list"), any 4.1 backchannel tasks placed on the sending queue would immediately return with -ETIMEDOUT since their req timers are zero. Initialize the backchannel's rpc_rqst timeout parameters from the xprt's default timeout settings. Fixes: 59464b262ff5 ("SUNRPC: SOFTCONN tasks should time out when on the sending list") Signed-off-by: Benjamin Coddington Tested-by: Chuck Lever Tested-by: Jeff Layton Signed-off-by: Anna Schumaker --- net/sunrpc/xprt.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 2364c485540c..6cc9ffac962d 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -651,9 +651,9 @@ static unsigned long xprt_abs_ktime_to_jiffies(ktime_t abstime) jiffies + nsecs_to_jiffies(-delta); } -static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req) +static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req, + const struct rpc_timeout *to) { - const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout; unsigned long majortimeo = req->rq_timeout; if (to->to_exponential) @@ -665,9 +665,10 @@ static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req) return majortimeo; } -static void xprt_reset_majortimeo(struct rpc_rqst *req) +static void xprt_reset_majortimeo(struct rpc_rqst *req, + const struct rpc_timeout *to) { - req->rq_majortimeo += xprt_calc_majortimeo(req); + req->rq_majortimeo += xprt_calc_majortimeo(req, to); } static void xprt_reset_minortimeo(struct rpc_rqst *req) @@ -675,7 +676,8 @@ static void xprt_reset_minortimeo(struct rpc_rqst *req) req->rq_minortimeo += req->rq_timeout; } -static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req) +static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req, + const struct rpc_timeout *to) { unsigned long time_init; struct rpc_xprt *xprt = req->rq_xprt; @@ -684,8 +686,9 @@ static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req) time_init = jiffies; else time_init = xprt_abs_ktime_to_jiffies(task->tk_start); - req->rq_timeout = task->tk_client->cl_timeout->to_initval; - req->rq_majortimeo = time_init + xprt_calc_majortimeo(req); + + req->rq_timeout = to->to_initval; + req->rq_majortimeo = time_init + xprt_calc_majortimeo(req, to); req->rq_minortimeo = time_init + req->rq_timeout; } @@ -713,7 +716,7 @@ int xprt_adjust_timeout(struct rpc_rqst *req) } else { req->rq_timeout = to->to_initval; req->rq_retries = 0; - xprt_reset_majortimeo(req); + xprt_reset_majortimeo(req, to); /* Reset the RTT counters == "slow start" */ spin_lock(&xprt->transport_lock); rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); @@ -1886,7 +1889,7 @@ xprt_request_init(struct rpc_task *task) req->rq_snd_buf.bvec = NULL; req->rq_rcv_buf.bvec = NULL; req->rq_release_snd_buf = NULL; - xprt_init_majortimeo(task, req); + xprt_init_majortimeo(task, req, task->tk_client->cl_timeout); trace_xprt_reserve(req); } @@ -1996,6 +1999,8 @@ xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task) */ xbufp->len = xbufp->head[0].iov_len + xbufp->page_len + xbufp->tail[0].iov_len; + + xprt_init_majortimeo(task, req, req->rq_xprt->timeout); } #endif -- cgit v1.2.3 From 57331a59ac0d680f606403eb24edd3c35aecba31 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 4 Jan 2024 09:58:46 -0500 Subject: NFSv4.1: Use the nfs_client's rpc timeouts for backchannel For backchannel requests that lookup the appropriate nfs_client, use the state-management rpc_clnt's rpc_timeout parameters for the backchannel's response. When the nfs_client cannot be found, fall back to using the xprt's default timeout parameters. Signed-off-by: Benjamin Coddington Tested-by: Chuck Lever Tested-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/callback_xdr.c | 5 +++++ include/linux/sunrpc/bc_xprt.h | 3 ++- include/linux/sunrpc/sched.h | 14 +++++++++++++- include/linux/sunrpc/svc.h | 2 ++ include/linux/sunrpc/xprt.h | 11 ----------- net/sunrpc/clnt.c | 6 ++++-- net/sunrpc/svc.c | 11 ++++++++++- net/sunrpc/xprt.c | 12 +++++++++--- 8 files changed, 45 insertions(+), 19 deletions(-) diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 321af81c456e..9369488f2ed4 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -967,6 +967,11 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) nops--; } + if (svc_is_backchannel(rqstp) && cps.clp) { + rqstp->bc_to_initval = cps.clp->cl_rpcclient->cl_timeout->to_initval; + rqstp->bc_to_retries = cps.clp->cl_rpcclient->cl_timeout->to_retries; + } + *hdr_res.status = status; *hdr_res.nops = htonl(nops); nfs4_cb_free_slot(&cps); diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index db30a159f9d5..f22bf915dcf6 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -20,7 +20,8 @@ #ifdef CONFIG_SUNRPC_BACKCHANNEL struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid); void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied); -void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task); +void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task, + const struct rpc_timeout *to); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 8ada7dc802d3..2d61987b3545 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -37,6 +37,17 @@ struct rpc_wait { struct list_head timer_list; /* Timer list */ }; +/* + * This describes a timeout strategy + */ +struct rpc_timeout { + unsigned long to_initval, /* initial timeout */ + to_maxval, /* max timeout */ + to_increment; /* if !exponential */ + unsigned int to_retries; /* max # of retries */ + unsigned char to_exponential; +}; + /* * This is the RPC task struct */ @@ -205,7 +216,8 @@ struct rpc_wait_queue { */ struct rpc_task *rpc_new_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_task(const struct rpc_task_setup *); -struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req); +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, + struct rpc_timeout *timeout); void rpc_put_task(struct rpc_task *); void rpc_put_task_async(struct rpc_task *); bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index b10f987509cc..3331a1c2b47e 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -250,6 +250,8 @@ struct svc_rqst { struct net *rq_bc_net; /* pointer to backchannel's * net namespace */ + unsigned long bc_to_initval; + unsigned int bc_to_retries; void ** rq_lease_breaker; /* The v4 client breaking a lease */ unsigned int rq_status_counter; /* RPC processing counter */ }; diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index f85d3a0daca2..464f6a9492ab 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -30,17 +30,6 @@ #define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) -/* - * This describes a timeout strategy - */ -struct rpc_timeout { - unsigned long to_initval, /* initial timeout */ - to_maxval, /* max timeout */ - to_increment; /* if !exponential */ - unsigned int to_retries; /* max # of retries */ - unsigned char to_exponential; -}; - enum rpc_display_format_t { RPC_DISPLAY_ADDR = 0, RPC_DISPLAY_PORT, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0b2c4b5484f5..19437b15ffc7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1311,8 +1311,10 @@ static void call_bc_encode(struct rpc_task *task); * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run * rpc_execute against it * @req: RPC request + * @timeout: timeout values to use for this task */ -struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, + struct rpc_timeout *timeout) { struct rpc_task *task; struct rpc_task_setup task_setup_data = { @@ -1331,7 +1333,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) return task; } - xprt_init_bc_request(req, task); + xprt_init_bc_request(req, task, timeout); task->tk_action = call_bc_encode; atomic_inc(&task->tk_count); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 3f2ea7a0496f..3f714d33624b 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1557,6 +1557,7 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp) { struct rpc_task *task; int proc_error; + struct rpc_timeout timeout; /* Build the svc_rqst used by the common processing routine */ rqstp->rq_xid = req->rq_xid; @@ -1602,8 +1603,16 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp) return; } /* Finally, send the reply synchronously */ + if (rqstp->bc_to_initval > 0) { + timeout.to_initval = rqstp->bc_to_initval; + timeout.to_retries = rqstp->bc_to_initval; + } else { + timeout.to_initval = req->rq_xprt->timeout->to_initval; + timeout.to_initval = req->rq_xprt->timeout->to_retries; + } memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); - task = rpc_run_bc_task(req); + task = rpc_run_bc_task(req, &timeout); + if (IS_ERR(task)) return; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 6cc9ffac962d..af13fdfa6672 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1986,7 +1986,8 @@ void xprt_release(struct rpc_task *task) #ifdef CONFIG_SUNRPC_BACKCHANNEL void -xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task) +xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task, + const struct rpc_timeout *to) { struct xdr_buf *xbufp = &req->rq_snd_buf; @@ -1999,8 +2000,13 @@ xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task) */ xbufp->len = xbufp->head[0].iov_len + xbufp->page_len + xbufp->tail[0].iov_len; - - xprt_init_majortimeo(task, req, req->rq_xprt->timeout); + /* + * Backchannel Replies are sent with !RPC_TASK_SOFT and + * RPC_TASK_NO_RETRANS_TIMEOUT. The major timeout setting + * affects only how long each Reply waits to be sent when + * a transport connection cannot be established. + */ + xprt_init_majortimeo(task, req, to); } #endif -- cgit v1.2.3