From 479219218fbe0a74bc36310fefc5ac6439c7cece Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 10 May 2018 10:08:36 -0400 Subject: NFS: Optimise away the close-to-open GETATTR when we have NFSv4 OPEN NFSv4 should not need to perform an extra close-to-open GETATTR as part of the process of looking up a regular file, since the OPEN call will do that for us. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 73f8b43d988c..4a73bd8b4517 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1012,13 +1012,25 @@ int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags) if (IS_AUTOMOUNT(inode)) return 0; + + if (flags & LOOKUP_OPEN) { + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + /* A NFSv4 OPEN will revalidate later */ + if (server->caps & NFS_CAP_ATOMIC_OPEN) + goto out; + /* Fallthrough */ + case S_IFDIR: + if (server->flags & NFS_MOUNT_NOCTO) + break; + /* NFS close-to-open cache consistency validation */ + goto out_force; + } + } + /* VFS wants an on-the-wire revalidation */ if (flags & LOOKUP_REVAL) goto out_force; - /* This is an open(2) */ - if ((flags & LOOKUP_OPEN) && !(server->flags & NFS_MOUNT_NOCTO) && - (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) - goto out_force; out: return (inode->i_nlink == 0) ? -ENOENT : 0; out_force: -- cgit v1.2.3 From 73dd684a4dcee8c55882a01aeb8b5cbf203de955 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 10 May 2018 10:13:09 -0400 Subject: NFS: If the VFS sets LOOKUP_REVAL then force a lookup of the dentry If nfs_lookup_revalidate() is called with LOOKUP_REVAL because a previous path lookup failed, then we ought to force a full lookup of the component name. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 4a73bd8b4517..9dde88334c9c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1118,7 +1118,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) goto out_set_verifier; /* Force a full look up iff the parent directory has changed */ - if (!nfs_is_exclusive_create(dir, flags) && + if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) && nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) { error = nfs_lookup_verify_inode(inode, flags); if (error) { -- cgit v1.2.3 From 9f6d44d418b1f47298a92cd2dc42b8dba8b04816 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 10 May 2018 10:34:21 -0400 Subject: NFS: Optimise away lookups for rename targets We can optimise away any lookup for a rename target, unless we're being asked to revalidate a dentry that might be in use. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9dde88334c9c..b315f53b3aec 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1051,13 +1051,15 @@ out_force: * * If LOOKUP_RCU prevents us from performing a full check, return 1 * suggesting a reval is needed. + * + * Note that when creating a new file, or looking up a rename target, + * then it shouldn't be necessary to revalidate a negative dentry. */ static inline int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, unsigned int flags) { - /* Don't revalidate a negative dentry if we're creating a new file */ - if (flags & LOOKUP_CREATE) + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) return 0; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) return 1; @@ -1347,7 +1349,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in * If we're doing an exclusive create, optimize away the lookup * but don't hash the dentry. */ - if (nfs_is_exclusive_create(dir, flags)) + if (nfs_is_exclusive_create(dir, flags) || flags & LOOKUP_RENAME_TARGET) return NULL; res = ERR_PTR(-ENOMEM); -- cgit v1.2.3 From f3893491427ff426047d83ef20be3aaa562e3044 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 31 May 2018 15:23:22 +1000 Subject: NFS: slight optimization for walking list for delegations There are 3 places where we walk the list of delegations for an nfs_client. In each case there are two nested loops, one for nfs_servers and one for nfs_delegations. When we find an interesting delegation we try to get an active reference to the server. If that fails, it is pointless to continue to look at the other delegation for the server as we will never be able to get an active reference. So instead of continuing in the inner loop, break out and continue in the outer loop. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 1819d0d0ba4b..b9cd3864335b 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -495,7 +495,7 @@ restart: if (!nfs_delegation_need_return(delegation)) continue; if (!nfs_sb_active(server->super)) - continue; + break; /* continue in outer loop */ inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) { rcu_read_unlock(); @@ -887,7 +887,7 @@ restart: &delegation->flags) == 0) continue; if (!nfs_sb_active(server->super)) - continue; + break; /* continue in outer loop */ inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) { rcu_read_unlock(); @@ -995,7 +995,7 @@ restart: &delegation->flags) == 0) continue; if (!nfs_sb_active(server->super)) - continue; + break; /* continue in outer loop */ inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) { rcu_read_unlock(); -- cgit v1.2.3 From 3ca951b618386321d9396e9e26be7f4235b5cdb2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 30 Apr 2018 14:31:30 +1000 Subject: NFS: use cond_resched() when restarting walk of delegation list. In three places we walk the list of delegations for an nfs_client until an interesting one is found, then we act of that delegation and restart the walk. New delegations are added to the end of a list and the interesting delegations are usually old, so in many case we won't repeat a long walk over and over again, but it is possible - particularly if the first server in the list has a large number of uninteresting delegations. In each cache the work done on interesting delegations will often complete without sleeping, so this could loop many times without giving up the CPU. So add a cond_resched() at an appropriate point to avoid hogging the CPU for too long. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index b9cd3864335b..c454532b6f03 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -508,6 +508,7 @@ restart: err = nfs_end_delegation_return(inode, delegation, 0); iput(inode); nfs_sb_deactive(server->super); + cond_resched(); if (!err) goto restart; set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); @@ -904,6 +905,7 @@ restart: } iput(inode); nfs_sb_deactive(server->super); + cond_resched(); goto restart; } } @@ -1020,6 +1022,7 @@ restart: } iput(inode); nfs_sb_deactive(server->super); + cond_resched(); goto restart; } } -- cgit v1.2.3 From e04bbf6b1bbe34e1baa2fa34c8774ba152f0426d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 30 Apr 2018 14:31:30 +1000 Subject: NFS: Avoid quadratic search when freeing delegations. There are three places that walk all delegation for an nfs_client and restart whenever they find something interesting - potentially resulting in a quadratic search: If there are 10,000 uninteresting delegations followed by 10,000 interesting one, then the code skips over 100,000,000 delegations, which can take a noticeable amount of time. Of these nfs_delegation_reap_unclaimed() and nfs_reap_expired_delegations() are only called during unusual events: a server reboots or reports expired delegations, probably due to a network partition. Optimizing these is not particularly important. The third, nfs_client_return_marked_delegations(), is called periodically via nfs_expire_unreferenced_delegations(). It could cause periodic problems on a busy server. New delegations are added to the end of the list, so if there are 10,000 open files with delegations, and 10,000 more recently opened files that received delegations but are now closed, then nfs_client_return_marked_delegations() can take seconds to skip over the 10,000 open files 10,000 times. That is a waste of time. The avoid this waste a place-holder (an inode) is kept when locks are dropped, so that the place can usually be found again after taking rcu_readlock(). This place holder ensure that we find the right starting point in the list of nfs_servers, and makes is probable that we find the right starting point in the list of delegations. We might need to occasionally restart at the head of that list. It might be possible that the place_holder inode could lose its delegation separately, and then get a new one using the same (freed and then reallocated) 'struct nfs_delegation'. Were this to happen, the new delegation would be at the end of the list and we would miss returning some other delegations. This would have the effect of unnecessarily delaying the return of some unused delegations until the next time this function is called - typically 90 seconds later. As this is not a correctness issue and is vanishingly unlikely to happen, it does not seem worth addressing. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index c454532b6f03..105b0f39a5b2 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -483,28 +483,73 @@ out: int nfs_client_return_marked_delegations(struct nfs_client *clp) { struct nfs_delegation *delegation; + struct nfs_delegation *prev; struct nfs_server *server; struct inode *inode; + struct inode *place_holder = NULL; + struct nfs_delegation *place_holder_deleg = NULL; int err = 0; restart: + /* + * To avoid quadratic looping we hold a reference + * to an inode place_holder. Each time we restart, we + * list nfs_servers from the server of that inode, and + * delegation in the server from the delegations of that + * inode. + * prev is an RCU-protected pointer to a delegation which + * wasn't marked for return and might be a good choice for + * the next place_holder. + */ rcu_read_lock(); - list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { - list_for_each_entry_rcu(delegation, &server->delegations, - super_list) { - if (!nfs_delegation_need_return(delegation)) + prev = NULL; + if (place_holder) + server = NFS_SERVER(place_holder); + else + server = list_entry_rcu(clp->cl_superblocks.next, + struct nfs_server, client_link); + list_for_each_entry_from_rcu(server, &clp->cl_superblocks, client_link) { + delegation = NULL; + if (place_holder && server == NFS_SERVER(place_holder)) + delegation = rcu_dereference(NFS_I(place_holder)->delegation); + if (!delegation || delegation != place_holder_deleg) + delegation = list_entry_rcu(server->delegations.next, + struct nfs_delegation, super_list); + list_for_each_entry_from_rcu(delegation, &server->delegations, super_list) { + struct inode *to_put = NULL; + + if (!nfs_delegation_need_return(delegation)) { + prev = delegation; continue; + } if (!nfs_sb_active(server->super)) break; /* continue in outer loop */ + + if (prev) { + struct inode *tmp; + + tmp = nfs_delegation_grab_inode(prev); + if (tmp) { + to_put = place_holder; + place_holder = tmp; + place_holder_deleg = prev; + } + } + inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) { rcu_read_unlock(); + if (to_put) + iput(to_put); nfs_sb_deactive(server->super); goto restart; } delegation = nfs_start_delegation_return_locked(NFS_I(inode)); rcu_read_unlock(); + if (to_put) + iput(to_put); + err = nfs_end_delegation_return(inode, delegation, 0); iput(inode); nfs_sb_deactive(server->super); @@ -512,10 +557,14 @@ restart: if (!err) goto restart; set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); + if (place_holder) + iput(place_holder); return err; } } rcu_read_unlock(); + if (place_holder) + iput(place_holder); return 0; } -- cgit v1.2.3 From fb91fb0ee7b266ed0344515c048f57ac65cdb4b4 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 4 May 2018 16:22:48 -0400 Subject: NFS: Move call to nfs4_state_protect_write() to nfs4_write_setup() This doesn't really need to be in the generic NFS client code, and I think it makes more sense to keep the v4 code in one place. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 3 ++- fs/nfs/nfs4proc.c | 4 +++- fs/nfs/proc.c | 3 ++- fs/nfs/write.c | 5 +---- include/linux/nfs_xdr.h | 3 ++- 5 files changed, 10 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index eadf1ab31d16..3ffaff471592 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -823,7 +823,8 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) } static void nfs3_proc_write_setup(struct nfs_pgio_header *hdr, - struct rpc_message *msg) + struct rpc_message *msg, + struct rpc_clnt **clnt) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b71757e85066..09be575e9194 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4979,7 +4979,8 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr) } static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, - struct rpc_message *msg) + struct rpc_message *msg, + struct rpc_clnt **clnt) { struct nfs_server *server = NFS_SERVER(hdr->inode); @@ -4996,6 +4997,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1); + nfs4_state_protect_write(server->nfs_client, clnt, msg, hdr); } static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 4e93d6308733..aaad337962d0 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -618,7 +618,8 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) } static void nfs_proc_write_setup(struct nfs_pgio_header *hdr, - struct rpc_message *msg) + struct rpc_message *msg, + struct rpc_clnt **clnt) { /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ hdr->args.stable = NFS_FILE_SYNC; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0193053bc139..b633583ca268 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1375,12 +1375,9 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, int priority = flush_task_priority(how); task_setup_data->priority = priority; - rpc_ops->write_setup(hdr, msg); + rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client); trace_nfs_initiate_write(hdr->inode, hdr->io_start, hdr->good_bytes, hdr->args.stable); - - nfs4_state_protect_write(NFS_SERVER(hdr->inode)->nfs_client, - &task_setup_data->rpc_client, msg, hdr); } /* If a nfs_flush_* function fails, it should remove reqs from @head and diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 34d28564ecf3..1009269e1a49 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1620,7 +1620,8 @@ struct nfs_rpc_ops { struct nfs_pgio_header *); void (*read_setup)(struct nfs_pgio_header *, struct rpc_message *); int (*read_done)(struct rpc_task *, struct nfs_pgio_header *); - void (*write_setup)(struct nfs_pgio_header *, struct rpc_message *); + void (*write_setup)(struct nfs_pgio_header *, struct rpc_message *, + struct rpc_clnt **); int (*write_done)(struct rpc_task *, struct nfs_pgio_header *); void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *); void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *); -- cgit v1.2.3 From e9ae1ee2b2ace7dcc37bc4e486b4410e7702ae57 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 4 May 2018 16:22:49 -0400 Subject: NFS: Move call to nfs4_state_protect() to nfs4_commit_setup() Rather than doing this in the generic NFS client code. Let's put this with the other v4 stuff so it's all in one place. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 3 ++- fs/nfs/nfs4proc.c | 4 +++- fs/nfs/proc.c | 3 ++- fs/nfs/write.c | 5 +---- include/linux/nfs_xdr.h | 3 ++- 5 files changed, 10 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 3ffaff471592..6ed6a4195906 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -845,7 +845,8 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data) return 0; } -static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) +static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg, + struct rpc_clnt **clnt) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT]; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 09be575e9194..50a067018a60 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5028,7 +5028,8 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_commit_data *data) return data->commit_done_cb(task, data); } -static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) +static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg, + struct rpc_clnt **clnt) { struct nfs_server *server = NFS_SERVER(data->inode); @@ -5037,6 +5038,7 @@ static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess data->res.server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_COMMIT, clnt, msg); } struct nfs4_renewdata { diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index aaad337962d0..80c350b6232a 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -632,7 +632,8 @@ static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit } static void -nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) +nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg, + struct rpc_clnt **clnt) { BUG(); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b633583ca268..a057b4f45a46 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1666,14 +1666,11 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, .priority = priority, }; /* Set up the initial task struct. */ - nfs_ops->commit_setup(data, &msg); + nfs_ops->commit_setup(data, &msg, &task_setup_data.rpc_client); trace_nfs_initiate_commit(data); dprintk("NFS: initiated commit call\n"); - nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client, - NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg); - task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 1009269e1a49..52b481dfd61e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1623,7 +1623,8 @@ struct nfs_rpc_ops { void (*write_setup)(struct nfs_pgio_header *, struct rpc_message *, struct rpc_clnt **); int (*write_done)(struct rpc_task *, struct nfs_pgio_header *); - void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *); + void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *, + struct rpc_clnt **); void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *); int (*commit_done) (struct rpc_task *, struct nfs_commit_data *); int (*lock)(struct file *, int, struct file_lock *); -- cgit v1.2.3 From fba83f34119a74a8ad3f6649d9c697b951562592 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 4 May 2018 16:22:50 -0400 Subject: NFS: Pass "privileged" value to nfs4_init_sequence() We currently have a separate function just to set this, but I think it makes more sense to set it at the same time as the other values in nfs4_init_sequence() Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 2 +- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 81 +++++++++++++++++++++--------------------------------- 3 files changed, 33 insertions(+), 52 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 9c374441f660..4d4a3df28779 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -462,7 +462,7 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server, nfs42_layoutstat_release(data); return -EAGAIN; } - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0, 0); task = rpc_run_task(&task_setup); if (IS_ERR(task)) return PTR_ERR(task); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b374f680830c..06a41aa2fdb2 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -251,7 +251,7 @@ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, struct rpc_message *, struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); -extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); +extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int, int); extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 50a067018a60..6c749ce7587e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -608,20 +608,16 @@ struct nfs4_call_sync_data { }; void nfs4_init_sequence(struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, int cache_reply) + struct nfs4_sequence_res *res, int cache_reply, + int privileged) { args->sa_slot = NULL; args->sa_cache_this = cache_reply; - args->sa_privileged = 0; + args->sa_privileged = privileged; res->sr_slot = NULL; } -static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) -{ - args->sa_privileged = 1; -} - static void nfs40_sequence_free_slot(struct nfs4_sequence_res *res) { struct nfs4_slot *slot = res->sr_slot; @@ -1035,7 +1031,7 @@ int nfs4_call_sync(struct rpc_clnt *clnt, struct nfs4_sequence_res *res, int cache_reply) { - nfs4_init_sequence(args, res, cache_reply); + nfs4_init_sequence(args, res, cache_reply, 0); return nfs4_call_sync_sequence(clnt, server, msg, args, res); } @@ -2187,13 +2183,12 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) }; int status; - nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1); + nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1, + data->is_recover); kref_get(&data->kref); data->rpc_done = false; data->rpc_status = 0; data->timestamp = jiffies; - if (data->is_recover) - nfs4_set_sequence_privileged(&data->c_arg.seq_args); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -2350,16 +2345,14 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) }; int status; - nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1); + nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, isrecover); kref_get(&data->kref); data->rpc_done = false; data->rpc_status = 0; data->cancelled = false; data->is_recover = false; - if (isrecover) { - nfs4_set_sequence_privileged(&o_arg->seq_args); + if (isrecover) data->is_recover = true; - } task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -3393,7 +3386,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) calldata = kzalloc(sizeof(*calldata), gfp_mask); if (calldata == NULL) goto out; - nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1); + nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1, 0); calldata->inode = state->inode; calldata->state = state; calldata->arg.fh = NFS_FH(state->inode); @@ -4273,7 +4266,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentr res->server = NFS_SB(dentry->d_sb); msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; - nfs4_init_sequence(&args->seq_args, &res->seq_res, 1); + nfs4_init_sequence(&args->seq_args, &res->seq_res, 1, 0); nfs_fattr_init(res->dir_attr); @@ -4319,7 +4312,7 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, nfs4_inode_return_delegation(new_inode); msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; res->server = NFS_SB(old_dentry->d_sb); - nfs4_init_sequence(&arg->seq_args, &res->seq_res, 1); + nfs4_init_sequence(&arg->seq_args, &res->seq_res, 1, 0); } static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) @@ -4895,7 +4888,7 @@ static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr, if (!hdr->pgio_done_cb) hdr->pgio_done_cb = nfs4_read_done_cb; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; - nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0); + nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0); } static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, @@ -4996,7 +4989,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, hdr->timestamp = jiffies; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; - nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1); + nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1, 0); nfs4_state_protect_write(server->nfs_client, clnt, msg, hdr); } @@ -5037,7 +5030,7 @@ static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess data->commit_done_cb = nfs4_commit_done_cb; data->res.server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0); nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_COMMIT, clnt, msg); } @@ -5976,7 +5969,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co data = kzalloc(sizeof(*data), GFP_NOFS); if (data == NULL) return -ENOMEM; - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0); nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_CLEANUP, @@ -6251,7 +6244,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, return ERR_PTR(-ENOMEM); } - nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1, 0); msg.rpc_argp = &data->arg; msg.rpc_resp = &data->res; task_setup_data.callback_data = data; @@ -6513,14 +6506,14 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f return -ENOMEM; if (IS_SETLKW(cmd)) data->arg.block = 1; - nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1, + recovery_type > NFS_LOCK_NEW); msg.rpc_argp = &data->arg; msg.rpc_resp = &data->res; task_setup_data.callback_data = data; if (recovery_type > NFS_LOCK_NEW) { if (recovery_type == NFS_LOCK_RECLAIM) data->arg.reclaim = NFS_LOCK_RECLAIM; - nfs4_set_sequence_privileged(&data->arg.seq_args); } else data->arg.new_lock = 1; task = rpc_run_task(&task_setup_data); @@ -6915,7 +6908,7 @@ nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) msg.rpc_argp = &data->args; msg.rpc_resp = &data->res; - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0, 0); rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); } @@ -7111,8 +7104,7 @@ static int _nfs40_proc_get_locations(struct inode *inode, locations->server = server; locations->nlocations = 0; - nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); - nfs4_set_sequence_privileged(&args.seq_args); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); status = nfs4_call_sync_sequence(clnt, server, &msg, &args.seq_args, &res.seq_res); if (status) @@ -7165,8 +7157,7 @@ static int _nfs41_proc_get_locations(struct inode *inode, locations->server = server; locations->nlocations = 0; - nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); - nfs4_set_sequence_privileged(&args.seq_args); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); status = nfs4_call_sync_sequence(clnt, server, &msg, &args.seq_args, &res.seq_res); if (status == NFS4_OK && @@ -7253,8 +7244,7 @@ static int _nfs40_proc_fsid_present(struct inode *inode, struct rpc_cred *cred) if (res.fh == NULL) return -ENOMEM; - nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); - nfs4_set_sequence_privileged(&args.seq_args); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); status = nfs4_call_sync_sequence(clnt, server, &msg, &args.seq_args, &res.seq_res); nfs_free_fhandle(res.fh); @@ -7295,8 +7285,7 @@ static int _nfs41_proc_fsid_present(struct inode *inode, struct rpc_cred *cred) if (res.fh == NULL) return -ENOMEM; - nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); - nfs4_set_sequence_privileged(&args.seq_args); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); status = nfs4_call_sync_sequence(clnt, server, &msg, &args.seq_args, &res.seq_res); nfs_free_fhandle(res.fh); @@ -8074,8 +8063,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) }; int status; - nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0); - nfs4_set_sequence_privileged(&args.la_seq_args); + nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0, 1); task = rpc_run_task(&task_setup); if (IS_ERR(task)) @@ -8412,10 +8400,8 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, calldata = kzalloc(sizeof(*calldata), GFP_NOFS); if (calldata == NULL) goto out_put_clp; - nfs4_init_sequence(&calldata->args, &calldata->res, 0); + nfs4_init_sequence(&calldata->args, &calldata->res, 0, is_privileged); nfs4_sequence_attach_slot(&calldata->args, &calldata->res, slot); - if (is_privileged) - nfs4_set_sequence_privileged(&calldata->args); msg.rpc_argp = &calldata->args; msg.rpc_resp = &calldata->res; calldata->clp = clp; @@ -8567,8 +8553,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp, calldata->clp = clp; calldata->arg.one_fs = 0; - nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0); - nfs4_set_sequence_privileged(&calldata->arg.seq_args); + nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0, 1); msg.rpc_argp = &calldata->arg; msg.rpc_resp = &calldata->res; task_setup_data.callback_data = calldata; @@ -8804,7 +8789,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) lgp->res.layoutp = &lgp->args.layout; lgp->res.seq_res.sr_slot = NULL; - nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); + nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) @@ -8931,7 +8916,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) } task_setup_data.flags |= RPC_TASK_ASYNC; } - nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); + nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1, 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -9078,7 +9063,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) } task_setup_data.flags = RPC_TASK_ASYNC; } - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -9258,8 +9243,7 @@ static int _nfs41_test_stateid(struct nfs_server *server, &rpc_client, &msg); dprintk("NFS call test_stateid %p\n", stateid); - nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); - nfs4_set_sequence_privileged(&args.seq_args); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); status = nfs4_call_sync_sequence(rpc_client, server, &msg, &args.seq_args, &res.seq_res); if (status != NFS_OK) { @@ -9382,10 +9366,7 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, msg.rpc_argp = &data->args; msg.rpc_resp = &data->res; - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); - if (privileged) - nfs4_set_sequence_privileged(&data->args.seq_args); - + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, privileged); return rpc_run_task(&task_setup); } -- cgit v1.2.3 From 2f261020b6ddfa49dc8f1017fdc53e56276ba8b1 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 15 May 2018 13:03:39 -0400 Subject: NFS: Merge nfs41_free_stateid() with _nfs41_free_stateid() Having these exist as two functions doesn't seem to add anything useful, and I think merging them together makes this easier to follow. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6c749ce7587e..553cb8f98194 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -9335,7 +9335,17 @@ static const struct rpc_call_ops nfs41_free_stateid_ops = { .rpc_release = nfs41_free_stateid_release, }; -static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, +/** + * nfs41_free_stateid - perform a FREE_STATEID operation + * + * @server: server / transport on which to perform the operation + * @stateid: state ID to release + * @cred: credential + * @is_recovery: set to true if this call needs to be privileged + * + * Note: this function is always asynchronous. + */ +static int nfs41_free_stateid(struct nfs_server *server, const nfs4_stateid *stateid, struct rpc_cred *cred, bool privileged) @@ -9351,6 +9361,7 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, .flags = RPC_TASK_ASYNC, }; struct nfs_free_stateid_data *data; + struct rpc_task *task; nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_STATEID, &task_setup.rpc_client, &msg); @@ -9358,7 +9369,7 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, dprintk("NFS call free_stateid %p\n", stateid); data = kmalloc(sizeof(*data), GFP_NOFS); if (!data) - return ERR_PTR(-ENOMEM); + return -ENOMEM; data->server = server; nfs4_stateid_copy(&data->args.stateid, stateid); @@ -9367,27 +9378,7 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, msg.rpc_argp = &data->args; msg.rpc_resp = &data->res; nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, privileged); - return rpc_run_task(&task_setup); -} - -/** - * nfs41_free_stateid - perform a FREE_STATEID operation - * - * @server: server / transport on which to perform the operation - * @stateid: state ID to release - * @cred: credential - * @is_recovery: set to true if this call needs to be privileged - * - * Note: this function is always asynchronous. - */ -static int nfs41_free_stateid(struct nfs_server *server, - const nfs4_stateid *stateid, - struct rpc_cred *cred, - bool is_recovery) -{ - struct rpc_task *task; - - task = _nfs41_free_stateid(server, stateid, cred, is_recovery); + task = rpc_run_task(&task_setup); if (IS_ERR(task)) return PTR_ERR(task); rpc_put_task(task); -- cgit v1.2.3 From 991eedb1371dc09b0f9848f59c8898fe63d198c0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 9 Apr 2018 11:15:30 -0400 Subject: NFSv4: Only pass the delegation to setattr if we're sending a truncate Even then it isn't really necessary. The reason why we may not want to pass in a stateid in other cases is that we cannot use the delegation credential. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 553cb8f98194..844eafbf1564 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3032,7 +3032,6 @@ static int _nfs4_do_setattr(struct inode *inode, }; struct rpc_cred *delegation_cred = NULL; unsigned long timestamp = jiffies; - fmode_t fmode; bool truncate; int status; @@ -3040,11 +3039,12 @@ static int _nfs4_do_setattr(struct inode *inode, /* Servers should only apply open mode checks for file size changes */ truncate = (arg->iap->ia_valid & ATTR_SIZE) ? true : false; - fmode = truncate ? FMODE_WRITE : FMODE_READ; + if (!truncate) + goto zero_stateid; - if (nfs4_copy_delegation_stateid(inode, fmode, &arg->stateid, &delegation_cred)) { + if (nfs4_copy_delegation_stateid(inode, FMODE_WRITE, &arg->stateid, &delegation_cred)) { /* Use that stateid */ - } else if (truncate && ctx != NULL) { + } else if (ctx != NULL) { struct nfs_lock_context *l_ctx; if (!nfs4_valid_open_stateid(ctx->state)) return -EBADF; @@ -3056,8 +3056,10 @@ static int _nfs4_do_setattr(struct inode *inode, nfs_put_lock_context(l_ctx); if (status == -EIO) return -EBADF; - } else + } else { +zero_stateid: nfs4_stateid_copy(&arg->stateid, &zero_stateid); + } if (delegation_cred) msg.rpc_cred = delegation_cred; -- cgit v1.2.3 From ed7e9ad0908a8c2a502f49ceed940d0ce122fe8b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 May 2018 16:11:52 -0400 Subject: NFSv4: Fix sillyrename to return the delegation when appropriate Ensure that we pass down the inode of the file being deleted so that we can return any delegation being held. Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 4 +++- fs/nfs/nfs4proc.c | 5 +++-- fs/nfs/proc.c | 4 +++- fs/nfs/unlink.c | 10 +++++----- include/linux/nfs_xdr.h | 2 +- 5 files changed, 15 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 6ed6a4195906..5645ef4c5259 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -414,7 +414,9 @@ out: } static void -nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) +nfs3_proc_unlink_setup(struct rpc_message *msg, + struct dentry *dentry, + struct inode *inode) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE]; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 844eafbf1564..d21c5e4220a0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4260,11 +4260,12 @@ static int nfs4_proc_rmdir(struct inode *dir, const struct qstr *name) return err; } -static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) +static void nfs4_proc_unlink_setup(struct rpc_message *msg, + struct dentry *dentry, + struct inode *inode) { struct nfs_removeargs *args = msg->rpc_argp; struct nfs_removeres *res = msg->rpc_resp; - struct inode *inode = d_inode(dentry); res->server = NFS_SB(dentry->d_sb); msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 80c350b6232a..763f77e7f1f1 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -321,7 +321,9 @@ nfs_proc_remove(struct inode *dir, struct dentry *dentry) } static void -nfs_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) +nfs_proc_unlink_setup(struct rpc_message *msg, + struct dentry *dentry, + struct inode *inode) { msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE]; } diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index bf54fc9ae135..6a73b8c808ea 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -85,7 +85,7 @@ static const struct rpc_call_ops nfs_unlink_ops = { .rpc_call_prepare = nfs_unlink_prepare, }; -static void nfs_do_call_unlink(struct nfs_unlinkdata *data) +static void nfs_do_call_unlink(struct inode *inode, struct nfs_unlinkdata *data) { struct rpc_message msg = { .rpc_argp = &data->args, @@ -105,7 +105,7 @@ static void nfs_do_call_unlink(struct nfs_unlinkdata *data) data->args.fh = NFS_FH(dir); nfs_fattr_init(data->res.dir_attr); - NFS_PROTO(dir)->unlink_setup(&msg, data->dentry); + NFS_PROTO(dir)->unlink_setup(&msg, data->dentry, inode); task_setup_data.rpc_client = NFS_CLIENT(dir); task = rpc_run_task(&task_setup_data); @@ -113,7 +113,7 @@ static void nfs_do_call_unlink(struct nfs_unlinkdata *data) rpc_put_task_async(task); } -static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) +static int nfs_call_unlink(struct dentry *dentry, struct inode *inode, struct nfs_unlinkdata *data) { struct inode *dir = d_inode(dentry->d_parent); struct dentry *alias; @@ -153,7 +153,7 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) return ret; } data->dentry = alias; - nfs_do_call_unlink(data); + nfs_do_call_unlink(inode, data); return 1; } @@ -231,7 +231,7 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode) dentry->d_fsdata = NULL; spin_unlock(&dentry->d_lock); - if (NFS_STALE(inode) || !nfs_call_unlink(dentry, data)) + if (NFS_STALE(inode) || !nfs_call_unlink(dentry, inode, data)) nfs_free_unlinkdata(data); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 52b481dfd61e..d3cefe57c2a3 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1591,7 +1591,7 @@ struct nfs_rpc_ops { int (*create) (struct inode *, struct dentry *, struct iattr *, int); int (*remove) (struct inode *, struct dentry *); - void (*unlink_setup) (struct rpc_message *, struct dentry *); + void (*unlink_setup) (struct rpc_message *, struct dentry *, struct inode *); void (*unlink_rpc_prepare) (struct rpc_task *, struct nfs_unlinkdata *); int (*unlink_done) (struct rpc_task *, struct inode *); void (*rename_setup) (struct rpc_message *msg, -- cgit v1.2.3 From 3cb3fd6da4e9c810f7d9322fd36a5b39941ce409 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 9 Apr 2018 14:11:32 -0400 Subject: NFS: Fix up sillyrename() Ensure that we register the fact that the inode ctime has changed. Signed-off-by: Trond Myklebust --- fs/nfs/unlink.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 6a73b8c808ea..fd61bf0fce63 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -448,6 +448,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) unsigned char silly[SILLYNAME_LEN + 1]; unsigned long long fileid; struct dentry *sdentry; + struct inode *inode = d_inode(dentry); struct rpc_task *task; int error = -EBUSY; @@ -485,6 +486,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) goto out; } while (d_inode(sdentry) != NULL); /* need negative lookup */ + ihold(inode); + /* queue unlink first. Can't do this from rpc_release as it * has to allocate memory */ @@ -509,6 +512,12 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) case 0: /* The rename succeeded */ nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + spin_lock(&inode->i_lock); + NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter(); + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME + | NFS_INO_REVAL_FORCED; + spin_unlock(&inode->i_lock); d_move(dentry, sdentry); break; case -ERESTARTSYS: @@ -519,6 +528,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) } rpc_put_task(task); out_dput: + iput(inode); dput(sdentry); out: return error; -- cgit v1.2.3 From 821a868a2316de95250d3069a7b8025ec24154a6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 27 Mar 2018 18:30:42 -0400 Subject: NFS: Set the force revalidate flag if the inode is not completely initialised Ensure that a delegation doesn't cause us to skip initialising the inode if it was incomplete when we exited nfs_fhget() Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bd15d0b57626..40a6cf6db4e8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -448,6 +448,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st /* We can't support update_atime(), since the server will reset it */ inode->i_flags |= S_NOATIME|S_NOCMTIME; inode->i_mode = fattr->mode; + nfsi->cache_validity = 0; if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 && nfs_server_capable(inode, NFS_CAP_MODE)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); @@ -534,6 +535,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); } + if (nfsi->cache_validity != 0) + nfsi->cache_validity |= NFS_INO_REVAL_FORCED; + nfs_setsecurity(inode, fattr, label); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); -- cgit v1.2.3 From 59a707b0d42ecf6326ed3846af629602bdf2ff08 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Apr 2018 18:11:18 -0400 Subject: NFS: Ensure we revalidate the inode correctly after remove or rename We may need to revalidate the change attribute, ctime and the nlinks count. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b315f53b3aec..978a22ea962c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1284,11 +1284,13 @@ static void nfs_drop_nlink(struct inode *inode) { spin_lock(&inode->i_lock); /* drop the inode if we're reasonably sure this is the last link */ - if (inode->i_nlink == 1) - clear_nlink(inode); + if (inode->i_nlink > 0) + drop_nlink(inode); + NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter(); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME - | NFS_INO_INVALID_OTHER; + | NFS_INO_INVALID_OTHER + | NFS_INO_REVAL_FORCED; spin_unlock(&inode->i_lock); } @@ -2050,7 +2052,15 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, } else error = task->tk_status; rpc_put_task(task); - nfs_mark_for_revalidate(old_inode); + /* Ensure the inode attributes are revalidated */ + if (error == 0) { + spin_lock(&old_inode->i_lock); + NFS_I(old_inode)->attr_gencount = nfs_inc_attr_generation_counter(); + NFS_I(old_inode)->cache_validity |= NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME + | NFS_INO_REVAL_FORCED; + spin_unlock(&old_inode->i_lock); + } out: if (rehash) d_rehash(rehash); -- cgit v1.2.3 From 472f761e118c233f13cd24b3a872ad3dcfa0b0eb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Apr 2018 18:14:43 -0400 Subject: NFS: Ensure we revalidate the inode correctly after setacl Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d21c5e4220a0..36e36f176c75 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5391,7 +5391,8 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl */ spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE - | NFS_INO_INVALID_CTIME; + | NFS_INO_INVALID_CTIME + | NFS_INO_REVAL_FORCED; spin_unlock(&inode->i_lock); nfs_access_zap_cache(inode); nfs_zap_acl_cache(inode); -- cgit v1.2.3 From d554168f87a55b35b7c59921a0dc45b6ba17d08d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 May 2018 11:15:49 -0400 Subject: NFS: Fix up nfs_post_op_update_inode() to force ctime updates We do not want to ignore ctime updates that originate from functions such as link(). Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 40a6cf6db4e8..5de724b1b90c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1633,7 +1633,8 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_fattr_set_barrier(fattr); status = nfs_post_op_update_inode_locked(inode, fattr, NFS_INO_INVALID_CHANGE - | NFS_INO_INVALID_CTIME); + | NFS_INO_INVALID_CTIME + | NFS_INO_REVAL_FORCED); spin_unlock(&inode->i_lock); return status; -- cgit v1.2.3 From d68894800ec5712d7ddf042356f11e36f87d7f78 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Tue, 29 May 2018 17:47:30 -0400 Subject: NFSv4: Fix possible 1-byte stack overflow in nfs_idmap_read_and_verify_message In nfs_idmap_read_and_verify_message there is an incorrect sprintf '%d' that converts the __u32 'im_id' from struct idmap_msg to 'id_str', which is a stack char array variable of length NFS_UINT_MAXLEN == 11. If a uid or gid value is > 2147483647 = 0x7fffffff, the conversion overflows into a negative value, for example: crash> p (unsigned) (0x80000000) $1 = 2147483648 crash> p (signed) (0x80000000) $2 = -2147483648 The '-' sign is written to the buffer and this causes a 1 byte overflow when the NULL byte is written, which corrupts kernel stack memory. If CONFIG_CC_STACKPROTECTOR_STRONG is set we see a stack-protector panic: [11558053.616565] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ffffffffa05b8a8c [11558053.639063] CPU: 6 PID: 9423 Comm: rpc.idmapd Tainted: G W ------------ T 3.10.0-514.el7.x86_64 #1 [11558053.641990] Hardware name: Red Hat OpenStack Compute, BIOS 1.10.2-3.el7_4.1 04/01/2014 [11558053.644462] ffffffff818c7bc0 00000000b1f3aec1 ffff880de0f9bd48 ffffffff81685eac [11558053.646430] ffff880de0f9bdc8 ffffffff8167f2b3 ffffffff00000010 ffff880de0f9bdd8 [11558053.648313] ffff880de0f9bd78 00000000b1f3aec1 ffffffff811dcb03 ffffffffa05b8a8c [11558053.650107] Call Trace: [11558053.651347] [] dump_stack+0x19/0x1b [11558053.653013] [] panic+0xe3/0x1f2 [11558053.666240] [] ? kfree+0x103/0x140 [11558053.682589] [] ? idmap_pipe_downcall+0x1cc/0x1e0 [nfsv4] [11558053.689710] [] __stack_chk_fail+0x1b/0x30 [11558053.691619] [] idmap_pipe_downcall+0x1cc/0x1e0 [nfsv4] [11558053.693867] [] rpc_pipe_write+0x56/0x70 [sunrpc] [11558053.695763] [] vfs_write+0xbd/0x1e0 [11558053.702236] [] ? task_work_run+0xac/0xe0 [11558053.704215] [] SyS_write+0x7f/0xe0 [11558053.709674] [] system_call_fastpath+0x16/0x1b Fix this by calling the internally defined nfs_map_numeric_to_string() function which properly uses '%u' to convert this __u32. For consistency, also replace the one other place where snprintf is called. Signed-off-by: Dave Wysochanski Reported-by: Stephen Johnston Fixes: cf4ab538f1516 ("NFSv4: Fix the string length returned by the idmapper") Cc: stable@vger.kernel.org # v3.4+ Signed-off-by: Trond Myklebust --- fs/nfs/nfs4idmap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 22dc30a679a0..b6f9d84ba19b 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -343,7 +343,7 @@ static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, int id_len; ssize_t ret; - id_len = snprintf(id_str, sizeof(id_str), "%u", id); + id_len = nfs_map_numeric_to_string(id, id_str, sizeof(id_str)); ret = nfs_idmap_get_key(id_str, id_len, type, buf, buflen, idmap); if (ret < 0) return -EINVAL; @@ -627,7 +627,8 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im, if (strcmp(upcall->im_name, im->im_name) != 0) break; /* Note: here we store the NUL terminator too */ - len = sprintf(id_str, "%d", im->im_id) + 1; + len = 1 + nfs_map_numeric_to_string(im->im_id, id_str, + sizeof(id_str)); ret = nfs_idmap_instantiate(key, authkey, id_str, len); break; case IDMAP_CONV_IDTONAME: -- cgit v1.2.3 From cf61eb268678c83eef812f937a3a9aeee67c460b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 May 2018 22:06:08 -0400 Subject: NFSv4: Always clear the pNFS layout when handling ESTALE If we get an ESTALE error in response to an RPC call operating on the file on the MDS, we should immediately cancel the layout for that file. Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 4 ++++ fs/nfs/nfs4proc.c | 5 +++++ 2 files changed, 9 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 4d4a3df28779..5f59b6f65a42 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -370,6 +370,10 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case 0: break; + case -NFS4ERR_BADHANDLE: + case -ESTALE: + pnfs_destroy_layout(NFS_I(inode)); + break; case -NFS4ERR_EXPIRED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 36e36f176c75..a0f16c8c5ebd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -407,6 +407,11 @@ static int nfs4_do_handle_exception(struct nfs_server *server, switch(errorcode) { case 0: return 0; + case -NFS4ERR_BADHANDLE: + case -ESTALE: + if (inode != NULL && S_ISREG(inode->i_mode)) + pnfs_destroy_layout(NFS_I(inode)); + break; case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_EXPIRED: -- cgit v1.2.3 From a3cf9bca2ace0351c4a4c17fbca4d652c323d5e5 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 3 May 2018 07:12:57 -0400 Subject: NFSv4: Don't add a new lock on an interrupted wait for LOCK If the wait for a LOCK operation is interrupted, and then the file is closed, the locks cleanup code will assume that no new locks will be added to the inode after it has completed. We already have a mechanism to detect if there was signal, so let's use that to avoid recreating the local lock once the RPC completes. Also skip re-sending the LOCK operation for the various error cases if we were signaled. Signed-off-by: Benjamin Coddington [Trond: Fix inverted test of locks_lock_inode_wait()] Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a0f16c8c5ebd..9945b36ea863 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6417,32 +6417,36 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) case 0: renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)), data->timestamp); - if (data->arg.new_lock) { + if (data->arg.new_lock && !data->cancelled) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); - if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) { - rpc_restart_call_prepare(task); + if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) break; - } } + if (data->arg.new_lock_owner != 0) { nfs_confirm_seqid(&lsp->ls_seqid, 0); nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid); set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); - } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid)) - rpc_restart_call_prepare(task); + goto out_done; + } else if (nfs4_update_lock_stateid(lsp, &data->res.stateid)) + goto out_done; + break; case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: if (data->arg.new_lock_owner != 0) { - if (!nfs4_stateid_match(&data->arg.open_stateid, + if (nfs4_stateid_match(&data->arg.open_stateid, &lsp->ls_state->open_stateid)) - rpc_restart_call_prepare(task); - } else if (!nfs4_stateid_match(&data->arg.lock_stateid, + goto out_done; + } else if (nfs4_stateid_match(&data->arg.lock_stateid, &lsp->ls_stateid)) - rpc_restart_call_prepare(task); + goto out_done; } + if (!data->cancelled) + rpc_restart_call_prepare(task); +out_done: dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); } -- cgit v1.2.3 From 34ec9aac7dbf199e0d676ff70724f496e4271579 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Tue, 20 Sep 2016 13:00:27 -0400 Subject: pnfs: Remove redundant assignment from nfs4_proc_layoutget(). nfs_init_sequence() will clear this for us. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9945b36ea863..4ff8f18ffc5c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8801,7 +8801,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) lgp->args.layout.pglen = max_pages * PAGE_SIZE; lgp->res.layoutp = &lgp->args.layout; - lgp->res.seq_res.sr_slot = NULL; nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0); task = rpc_run_task(&task_setup_data); -- cgit v1.2.3 From 808ba32abe84b74abef5eb7507b8031f65b8221d Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Tue, 4 Oct 2016 15:02:21 -0400 Subject: pnfs: Store return value of decode_layoutget for later processing This will be needed to seperate return value of OPEN and LAYOUTGET when they are combined into a single RPC. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 15 ++++++++++----- include/linux/nfs_xdr.h | 1 + 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 9b7392032321..6024980dfc9e 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -6024,7 +6024,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, status = decode_op_hdr(xdr, OP_LAYOUTGET); if (status) - return status; + goto out; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out_overflow; @@ -6037,7 +6037,8 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, if (!layout_count) { dprintk("%s: server responded with empty layout array\n", __func__); - return -EINVAL; + status = -EINVAL; + goto out; } p = xdr_inline_decode(xdr, 28); @@ -6062,7 +6063,8 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, dprintk("NFS: server cheating in layoutget reply: " "layout len %u > recvd %u\n", res->layoutp->len, recvd); - return -EINVAL; + status = -EINVAL; + goto out; } if (layout_count > 1) { @@ -6075,10 +6077,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, __func__, layout_count); } - return 0; +out: + res->status = status; + return status; out_overflow: print_overflow_msg(__func__, xdr); - return -EIO; + status = -EIO; + goto out; } static int decode_layoutreturn(struct xdr_stream *xdr, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d3cefe57c2a3..d3aa5eaf99a7 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -259,6 +259,7 @@ struct nfs4_layoutget_args { struct nfs4_layoutget_res { struct nfs4_sequence_res seq_res; + int status; __u32 return_on_close; struct pnfs_layout_range range; __u32 type; -- cgit v1.2.3 From 3b65a30df9b3f1cb336d9421892e7860c0f999c0 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Mon, 19 Sep 2016 10:06:49 -0400 Subject: NFS4: move ctx into nfs4_run_open_task Preparing to add conditional LAYOUTGET to OPEN rpc, the LAYOUTGET will need the ctx info. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4ff8f18ffc5c..556c1263999a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -86,7 +86,6 @@ | ATTR_MTIME_SET) struct nfs4_opendata; -static int _nfs4_proc_open(struct nfs4_opendata *data); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); @@ -2327,7 +2326,8 @@ static const struct rpc_call_ops nfs4_open_ops = { .rpc_release = nfs4_open_release, }; -static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) +static int nfs4_run_open_task(struct nfs4_opendata *data, + struct nfs_open_context *ctx) { struct inode *dir = d_inode(data->dir); struct nfs_server *server = NFS_SERVER(dir); @@ -2350,14 +2350,16 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) }; int status; - nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, isrecover); kref_get(&data->kref); data->rpc_done = false; data->rpc_status = 0; data->cancelled = false; data->is_recover = false; - if (isrecover) + if (!ctx) { + nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 1); data->is_recover = true; + } else + nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -2378,7 +2380,7 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data) struct nfs_openres *o_res = &data->o_res; int status; - status = nfs4_run_open_task(data, 1); + status = nfs4_run_open_task(data, NULL); if (status != 0 || !data->rpc_done) return status; @@ -2439,7 +2441,8 @@ static int nfs4_opendata_access(struct rpc_cred *cred, /* * Note: On error, nfs4_proc_open will free the struct nfs4_opendata */ -static int _nfs4_proc_open(struct nfs4_opendata *data) +static int _nfs4_proc_open(struct nfs4_opendata *data, + struct nfs_open_context *ctx) { struct inode *dir = d_inode(data->dir); struct nfs_server *server = NFS_SERVER(dir); @@ -2447,7 +2450,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) struct nfs_openres *o_res = &data->o_res; int status; - status = nfs4_run_open_task(data, 0); + status = nfs4_run_open_task(data, ctx); if (!data->rpc_done) return status; if (status != 0) { @@ -2798,7 +2801,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); - ret = _nfs4_proc_open(opendata); + ret = _nfs4_proc_open(opendata, ctx); if (ret != 0) goto out; -- cgit v1.2.3 From f86c3ac50276b6b9d6246e0fcb4781c4eaeb04eb Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Tue, 20 Sep 2016 06:56:02 -0400 Subject: pnfs: Add layout driver flag PNFS_LAYOUTGET_ON_OPEN Driver can set flag to allow LAYOUTGET to be sent with OPEN. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 1 + fs/nfs/pnfs.h | 1 + 2 files changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index c75ad982bcfc..3ae038d9c292 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -2347,6 +2347,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .id = LAYOUT_FLEX_FILES, .name = "LAYOUT_FLEX_FILES", .owner = THIS_MODULE, + .flags = PNFS_LAYOUTGET_ON_OPEN, .set_layoutdriver = ff_layout_set_layoutdriver, .alloc_layout_hdr = ff_layout_alloc_layout_hdr, .free_layout_hdr = ff_layout_free_layout_hdr, diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index daf6cbf5c15f..f71a55f11b97 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -110,6 +110,7 @@ enum layoutdriver_policy_flags { PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, PNFS_LAYOUTRET_ON_ERROR = 1 << 1, PNFS_READ_WHOLE_PAGE = 1 << 2, + PNFS_LAYOUTGET_ON_OPEN = 1 << 3, }; struct nfs4_deviceid_node; -- cgit v1.2.3 From 587f03deb69bdbf2c8f5f06c5cdcebb42196faff Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 21 Sep 2016 05:14:28 -0400 Subject: pnfs: refactor send_layoutget Pull out the alloc/init part for eventual reuse by OPEN. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ee723aa153a3..2c955c389ecf 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -915,18 +915,12 @@ pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo) test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); } -/* - * Get layout from server. - * for now, assume that whole file layouts are requested. - * arg->offset: 0 - * arg->length: all ones - */ -static struct pnfs_layout_segment * -send_layoutget(struct pnfs_layout_hdr *lo, +static struct nfs4_layoutget * +pnfs_alloc_init_layoutget_args(struct pnfs_layout_hdr *lo, struct nfs_open_context *ctx, nfs4_stateid *stateid, const struct pnfs_layout_range *range, - long *timeout, gfp_t gfp_flags) + gfp_t gfp_flags) { struct inode *ino = lo->plh_inode; struct nfs_server *server = NFS_SERVER(ino); @@ -935,14 +929,9 @@ send_layoutget(struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); - /* - * Synchronously retrieve layout information from server and - * store in lseg. If we race with a concurrent seqid morphing - * op, then re-send the LAYOUTGET. - */ lgp = kzalloc(sizeof(*lgp), gfp_flags); if (lgp == NULL) - return ERR_PTR(-ENOMEM); + return NULL; i_size = i_size_read(ino); @@ -963,8 +952,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, nfs4_stateid_copy(&lgp->args.stateid, stateid); lgp->gfp_flags = gfp_flags; lgp->cred = lo->plh_lc_cred; - - return nfs4_proc_layoutget(lgp, timeout, gfp_flags); + return lgp; } static void pnfs_clear_layoutcommit(struct inode *inode, @@ -1694,6 +1682,7 @@ pnfs_update_layout(struct inode *ino, struct nfs_client *clp = server->nfs_client; struct pnfs_layout_hdr *lo = NULL; struct pnfs_layout_segment *lseg = NULL; + struct nfs4_layoutget *lgp; nfs4_stateid stateid; long timeout = 0; unsigned long giveup = jiffies + (clp->cl_lease_time << 1); @@ -1838,7 +1827,15 @@ lookup_again: if (arg.length != NFS4_MAX_UINT64) arg.length = PAGE_ALIGN(arg.length); - lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags); + lgp = pnfs_alloc_init_layoutget_args(lo, ctx, &stateid, &arg, gfp_flags); + if (!lgp) { + trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL, + PNFS_UPDATE_LAYOUT_NOMEM); + atomic_dec(&lo->plh_outstanding); + goto out_put_layout_hdr; + } + + lseg = nfs4_proc_layoutget(lgp, &timeout, gfp_flags); trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); atomic_dec(&lo->plh_outstanding); -- cgit v1.2.3 From dacb452db8733474c0317d499244c3c1ac769ef5 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Mon, 19 Sep 2016 17:47:09 -0400 Subject: pnfs: move allocations out of nfs4_proc_layoutget They work better in the new alloc_init function. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 15 +++------------ fs/nfs/pnfs.c | 12 +++++++++++- fs/nfs/pnfs.h | 4 +++- 3 files changed, 17 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 556c1263999a..16b14aa99b79 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8698,7 +8698,7 @@ out: return status; } -static size_t max_response_pages(struct nfs_server *server) +size_t max_response_pages(struct nfs_server *server) { u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; return nfs_page_array_len(0, max_resp_sz); @@ -8719,7 +8719,7 @@ static void nfs4_free_pages(struct page **pages, size_t size) kfree(pages); } -static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) +struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) { struct page **pages; int i; @@ -8765,11 +8765,10 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { }; struct pnfs_layout_segment * -nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) +nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) { struct inode *inode = lgp->args.inode; struct nfs_server *server = NFS_SERVER(inode); - size_t max_pages = max_response_pages(server); struct rpc_task *task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], @@ -8796,14 +8795,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ pnfs_get_layout_hdr(NFS_I(inode)->layout); - lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); - if (!lgp->args.layout.pages) { - nfs4_layoutget_release(lgp); - return ERR_PTR(-ENOMEM); - } - lgp->args.layout.pglen = max_pages * PAGE_SIZE; - - lgp->res.layoutp = &lgp->args.layout; nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0); task = rpc_run_task(&task_setup_data); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2c955c389ecf..363420d0f3a0 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -924,6 +924,7 @@ pnfs_alloc_init_layoutget_args(struct pnfs_layout_hdr *lo, { struct inode *ino = lo->plh_inode; struct nfs_server *server = NFS_SERVER(ino); + size_t max_pages = max_response_pages(server); struct nfs4_layoutget *lgp; loff_t i_size; @@ -933,6 +934,15 @@ pnfs_alloc_init_layoutget_args(struct pnfs_layout_hdr *lo, if (lgp == NULL) return NULL; + lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); + if (!lgp->args.layout.pages) { + kfree(lgp); + return NULL; + } + lgp->args.layout.pglen = max_pages * PAGE_SIZE; + lgp->res.layoutp = &lgp->args.layout; + + i_size = i_size_read(ino); lgp->args.minlength = PAGE_SIZE; @@ -1835,7 +1845,7 @@ lookup_again: goto out_put_layout_hdr; } - lseg = nfs4_proc_layoutget(lgp, &timeout, gfp_flags); + lseg = nfs4_proc_layoutget(lgp, &timeout); trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); atomic_dec(&lo->plh_outstanding); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index f71a55f11b97..964a7227ea97 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -224,10 +224,12 @@ extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); /* nfs4proc.c */ +extern size_t max_response_pages(struct nfs_server *server); +extern struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev, struct rpc_cred *cred); -extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags); +extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout); extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync); /* pnfs.c */ -- cgit v1.2.3 From 56f487f8c8fc5d6e582b79a86fc132d050129e15 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 21 Sep 2016 11:43:41 -0400 Subject: pnfs: Add conditional encode/decode of LAYOUTGET within OPEN compound Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 1 + fs/nfs/nfs4xdr.c | 50 +++++++++++++++++++++++++++++++++++++++++++++---- include/linux/nfs_xdr.h | 2 ++ 3 files changed, 49 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 16b14aa99b79..b3f200208295 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1080,6 +1080,7 @@ struct nfs4_opendata { struct nfs4_state_owner *owner; struct nfs4_state *state; struct iattr attrs; + struct nfs4_layoutget *lgp; unsigned long timestamp; bool rpc_done; bool file_created; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 6024980dfc9e..738a7be019d2 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -65,7 +65,13 @@ /* Mapping from NFS error code to "errno" error code. */ #define errno_NFSERR_IO EIO +struct compound_hdr; static int nfs4_stat_to_errno(int); +static void encode_layoutget(struct xdr_stream *xdr, + const struct nfs4_layoutget_args *args, + struct compound_hdr *hdr); +static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, + struct nfs4_layoutget_res *res); /* NFSv4 COMPOUND tags are only wanted for debugging purposes */ #ifdef DEBUG @@ -424,6 +430,8 @@ static int nfs4_stat_to_errno(int); #define decode_sequence_maxsz 0 #define encode_layoutreturn_maxsz 0 #define decode_layoutreturn_maxsz 0 +#define encode_layoutget_maxsz 0 +#define decode_layoutget_maxsz 0 #endif /* CONFIG_NFS_V4_1 */ #define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ @@ -476,14 +484,16 @@ static int nfs4_stat_to_errno(int); encode_open_maxsz + \ encode_access_maxsz + \ encode_getfh_maxsz + \ - encode_getattr_maxsz) + encode_getattr_maxsz + \ + encode_layoutget_maxsz) #define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_open_maxsz + \ decode_access_maxsz + \ decode_getfh_maxsz + \ - decode_getattr_maxsz) + decode_getattr_maxsz + \ + decode_layoutget_maxsz) #define NFS4_enc_open_confirm_sz \ (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -497,13 +507,15 @@ static int nfs4_stat_to_errno(int); encode_putfh_maxsz + \ encode_open_maxsz + \ encode_access_maxsz + \ - encode_getattr_maxsz) + encode_getattr_maxsz + \ + encode_layoutget_maxsz) #define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_open_maxsz + \ decode_access_maxsz + \ - decode_getattr_maxsz) + decode_getattr_maxsz + \ + decode_layoutget_maxsz) #define NFS4_enc_open_downgrade_sz \ (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ @@ -2070,6 +2082,13 @@ encode_layoutreturn(struct xdr_stream *xdr, struct compound_hdr *hdr) { } + +static void +encode_layoutget(struct xdr_stream *xdr, + const struct nfs4_layoutget_args *args, + struct compound_hdr *hdr) +{ +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -2316,6 +2335,12 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, if (args->access) encode_access(xdr, args->access, &hdr); encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); + if (args->lg_args) { + encode_layoutget(xdr, args->lg_args, &hdr); + xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, + args->lg_args->layout.pages, + 0, args->lg_args->layout.pglen); + } encode_nops(&hdr); } @@ -2356,6 +2381,12 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, if (args->access) encode_access(xdr, args->access, &hdr); encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); + if (args->lg_args) { + encode_layoutget(xdr, args->lg_args, &hdr); + xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, + args->lg_args->layout.pages, + 0, args->lg_args->layout.pglen); + } encode_nops(&hdr); } @@ -6182,6 +6213,13 @@ int decode_layoutreturn(struct xdr_stream *xdr, { return 0; } + +static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, + struct nfs4_layoutget_res *res) +{ + return 0; +} + #endif /* CONFIG_NFS_V4_1 */ /* @@ -6628,6 +6666,8 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, if (res->access_request) decode_access(xdr, &res->access_supported, &res->access_result); decode_getfattr_label(xdr, res->f_attr, res->f_label, res->server); + if (res->lg_res) + decode_layoutget(xdr, rqstp, res->lg_res); out: return status; } @@ -6680,6 +6720,8 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, if (res->access_request) decode_access(xdr, &res->access_supported, &res->access_result); decode_getfattr(xdr, res->f_attr, res->server); + if (res->lg_res) + decode_layoutget(xdr, rqstp, res->lg_res); out: return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d3aa5eaf99a7..bc235e50415f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -436,6 +436,7 @@ struct nfs_openargs { enum createmode4 createmode; const struct nfs4_label *label; umode_t umask; + struct nfs4_layoutget_args *lg_args; }; struct nfs_openres { @@ -458,6 +459,7 @@ struct nfs_openres { __u32 access_request; __u32 access_supported; __u32 access_result; + struct nfs4_layoutget_res *lg_res; }; /* -- cgit v1.2.3 From 1b146fcff7301c578d6e4a4bff48f1988cc12b4b Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 21 Sep 2016 15:24:26 -0400 Subject: pnfs: Move nfs4_opendata into nfs4_fs.h It will be needed now by the pnfs code. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 25 +++++++++++++++++++++++++ fs/nfs/nfs4proc.c | 25 ------------------------- fs/nfs/pnfs.c | 1 + 3 files changed, 26 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 06a41aa2fdb2..137e18abb7e7 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -212,6 +212,31 @@ struct nfs4_state_recovery_ops { struct rpc_cred *); }; +struct nfs4_opendata { + struct kref kref; + struct nfs_openargs o_arg; + struct nfs_openres o_res; + struct nfs_open_confirmargs c_arg; + struct nfs_open_confirmres c_res; + struct nfs4_string owner_name; + struct nfs4_string group_name; + struct nfs4_label *a_label; + struct nfs_fattr f_attr; + struct nfs4_label *f_label; + struct dentry *dir; + struct dentry *dentry; + struct nfs4_state_owner *owner; + struct nfs4_state *state; + struct iattr attrs; + struct nfs4_layoutget *lgp; + unsigned long timestamp; + bool rpc_done; + bool file_created; + bool is_recover; + bool cancelled; + int rpc_status; +}; + struct nfs4_add_xprt_data { struct nfs_client *clp; struct rpc_cred *cred; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b3f200208295..c7f982cdcbdd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1064,31 +1064,6 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo, spin_unlock(&dir->i_lock); } -struct nfs4_opendata { - struct kref kref; - struct nfs_openargs o_arg; - struct nfs_openres o_res; - struct nfs_open_confirmargs c_arg; - struct nfs_open_confirmres c_res; - struct nfs4_string owner_name; - struct nfs4_string group_name; - struct nfs4_label *a_label; - struct nfs_fattr f_attr; - struct nfs4_label *f_label; - struct dentry *dir; - struct dentry *dentry; - struct nfs4_state_owner *owner; - struct nfs4_state *state; - struct iattr attrs; - struct nfs4_layoutget *lgp; - unsigned long timestamp; - bool rpc_done; - bool file_created; - bool is_recover; - bool cancelled; - int rpc_status; -}; - struct nfs4_open_createattrs { struct nfs4_label *label; struct iattr *sattr; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 363420d0f3a0..11148414b9b6 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -37,6 +37,7 @@ #include "nfs4trace.h" #include "delegation.h" #include "nfs42.h" +#include "nfs4_fs.h" #define NFSDBG_FACILITY NFSDBG_PNFS #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) -- cgit v1.2.3 From 5e36e2a9411210b1f81982af02504cd20f5c91d5 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 6 Oct 2016 12:08:51 -0400 Subject: pnfs: Change pnfs_alloc_init_layoutget_args call signature Don't send in a layout, instead use the (possibly NULL) inode. This is needed for LAYOUTGET attached to an OPEN where the inode is not yet set. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 11148414b9b6..07f1bbd9100c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -916,18 +916,31 @@ pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo) test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); } +static struct nfs_server * +pnfs_find_server(struct inode *inode, struct nfs_open_context *ctx) +{ + struct nfs_server *server; + + if (inode) + server = NFS_SERVER(inode); + else { + struct dentry *parent_dir = dget_parent(ctx->dentry); + server = NFS_SERVER(parent_dir->d_inode); + dput(parent_dir); + } + return server; +} + static struct nfs4_layoutget * -pnfs_alloc_init_layoutget_args(struct pnfs_layout_hdr *lo, +pnfs_alloc_init_layoutget_args(struct inode *ino, struct nfs_open_context *ctx, nfs4_stateid *stateid, const struct pnfs_layout_range *range, gfp_t gfp_flags) { - struct inode *ino = lo->plh_inode; - struct nfs_server *server = NFS_SERVER(ino); + struct nfs_server *server = pnfs_find_server(ino, ctx); size_t max_pages = max_response_pages(server); struct nfs4_layoutget *lgp; - loff_t i_size; dprintk("--> %s\n", __func__); @@ -944,16 +957,19 @@ pnfs_alloc_init_layoutget_args(struct pnfs_layout_hdr *lo, lgp->res.layoutp = &lgp->args.layout; - i_size = i_size_read(ino); lgp->args.minlength = PAGE_SIZE; if (lgp->args.minlength > range->length) lgp->args.minlength = range->length; - if (range->iomode == IOMODE_READ) { - if (range->offset >= i_size) - lgp->args.minlength = 0; - else if (i_size - range->offset < lgp->args.minlength) - lgp->args.minlength = i_size - range->offset; + if (ino) { + loff_t i_size = i_size_read(ino); + + if (range->iomode == IOMODE_READ) { + if (range->offset >= i_size) + lgp->args.minlength = 0; + else if (i_size - range->offset < lgp->args.minlength) + lgp->args.minlength = i_size - range->offset; + } } lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; pnfs_copy_range(&lgp->args.range, range); @@ -962,7 +978,7 @@ pnfs_alloc_init_layoutget_args(struct pnfs_layout_hdr *lo, lgp->args.ctx = get_nfs_open_context(ctx); nfs4_stateid_copy(&lgp->args.stateid, stateid); lgp->gfp_flags = gfp_flags; - lgp->cred = lo->plh_lc_cred; + lgp->cred = ctx->cred; return lgp; } @@ -1838,7 +1854,7 @@ lookup_again: if (arg.length != NFS4_MAX_UINT64) arg.length = PAGE_ALIGN(arg.length); - lgp = pnfs_alloc_init_layoutget_args(lo, ctx, &stateid, &arg, gfp_flags); + lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags); if (!lgp) { trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL, PNFS_UPDATE_LAYOUT_NOMEM); -- cgit v1.2.3 From 2409a976a2990ee1712c0945a75d75eeb3c60c08 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 6 Oct 2016 12:11:21 -0400 Subject: pnfs: Add LAYOUTGET to OPEN of a new file This triggers when have no pre-existing inode to attach to. The preexisting case is saved for later. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 21 +++++++++++--- fs/nfs/nfs4state.c | 8 ++++++ fs/nfs/pnfs.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- fs/nfs/pnfs.h | 17 ++++++++++++ 4 files changed, 121 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c7f982cdcbdd..062a9c753b7e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -86,6 +86,7 @@ | ATTR_MTIME_SET) struct nfs4_opendata; +static void nfs4_layoutget_release(void *calldata); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); @@ -877,6 +878,10 @@ nfs4_sequence_process_interrupted(struct nfs_client *client, #else /* !CONFIG_NFS_V4_1 */ +static void nfs4_layoutget_release(void *calldata) +{ +} + static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) { return nfs40_sequence_done(task, res); @@ -1244,6 +1249,8 @@ static void nfs4_opendata_free(struct kref *kref) struct nfs4_opendata, kref); struct super_block *sb = p->dentry->d_sb; + if (p->lgp) + nfs4_layoutget_release(p->lgp); nfs_free_seqid(p->o_arg.seqid); nfs4_sequence_free_slot(&p->o_res.seq_res); if (p->state != NULL) @@ -2334,8 +2341,10 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, if (!ctx) { nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 1); data->is_recover = true; - } else + } else { nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 0); + pnfs_lgopen_prepare(data, ctx); + } task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -2815,7 +2824,10 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, nfs_inode_attach_open_context(ctx); if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) nfs4_schedule_stateid_recovery(server, state); + else + pnfs_parse_lgopen(state->inode, opendata->lgp, ctx); } + out: return ret; } @@ -8722,13 +8734,14 @@ static void nfs4_layoutget_release(void *calldata) { struct nfs4_layoutget *lgp = calldata; struct inode *inode = lgp->args.inode; - struct nfs_server *server = NFS_SERVER(inode); - size_t max_pages = max_response_pages(server); + size_t max_pages = lgp->args.layout.pglen / PAGE_SIZE; dprintk("--> %s\n", __func__); nfs4_sequence_free_slot(&lgp->res.seq_res); nfs4_free_pages(lgp->args.layout.pages, max_pages); - pnfs_put_layout_hdr(NFS_I(inode)->layout); + if (inode) + pnfs_put_layout_hdr(NFS_I(inode)->layout); + put_rpccred(lgp->cred); put_nfs_open_context(lgp->args.ctx); kfree(calldata); dprintk("<-- %s\n", __func__); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c10a422efe6f..2bf2eaa08ca7 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -77,6 +77,14 @@ const nfs4_stateid invalid_stateid = { .type = NFS4_INVALID_STATEID_TYPE, }; +const nfs4_stateid current_stateid = { + { + /* Funky initialiser keeps older gcc versions happy */ + .data = { 0x0, 0x0, 0x0, 0x1, 0 }, + }, + .type = NFS4_SPECIAL_STATEID_TYPE, +}; + static DEFINE_MUTEX(nfs_clid_init_mutex); int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 07f1bbd9100c..a0a2484c3aed 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -934,7 +934,7 @@ pnfs_find_server(struct inode *inode, struct nfs_open_context *ctx) static struct nfs4_layoutget * pnfs_alloc_init_layoutget_args(struct inode *ino, struct nfs_open_context *ctx, - nfs4_stateid *stateid, + const nfs4_stateid *stateid, const struct pnfs_layout_range *range, gfp_t gfp_flags) { @@ -978,7 +978,7 @@ pnfs_alloc_init_layoutget_args(struct inode *ino, lgp->args.ctx = get_nfs_open_context(ctx); nfs4_stateid_copy(&lgp->args.stateid, stateid); lgp->gfp_flags = gfp_flags; - lgp->cred = ctx->cred; + lgp->cred = get_rpccred(ctx->cred); return lgp; } @@ -1943,6 +1943,83 @@ pnfs_sanity_check_layout_range(struct pnfs_layout_range *range) return true; } +extern const nfs4_stateid current_stateid; + +static void _lgopen_prepare_attached(struct nfs4_opendata *data, + struct nfs_open_context *ctx) +{ + /* STUB */ +} + +static void _lgopen_prepare_floating(struct nfs4_opendata *data, + struct nfs_open_context *ctx) +{ + struct pnfs_layout_range rng = { + .iomode = (data->o_arg.fmode & FMODE_WRITE) ? + IOMODE_RW: IOMODE_READ, + .offset = 0, + .length = NFS4_MAX_UINT64, + }; + struct nfs4_layoutget *lgp; + + lgp = pnfs_alloc_init_layoutget_args(NULL, ctx, ¤t_stateid, + &rng, GFP_KERNEL); + if (!lgp) + return; + data->lgp = lgp; + data->o_arg.lg_args = &lgp->args; + data->o_res.lg_res = &lgp->res; +} + +void pnfs_lgopen_prepare(struct nfs4_opendata *data, + struct nfs_open_context *ctx) +{ + struct nfs_server *server = NFS_SERVER(data->dir->d_inode); + + if (!(pnfs_enabled_sb(server) && + server->pnfs_curr_ld->flags & PNFS_LAYOUTGET_ON_OPEN)) + return; + /* Could check on max_ops, but currently hardcoded high enough */ + if (data->state) + _lgopen_prepare_attached(data, ctx); + else + _lgopen_prepare_floating(data, ctx); +} + +void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, + struct nfs_open_context *ctx) +{ + struct pnfs_layout_hdr *lo; + struct pnfs_layout_segment *lseg; + u32 iomode; + + if (!lgp || lgp->res.layoutp->len == 0) + return; + if (!lgp->args.inode) { + /* Need to grab lo */ + spin_lock(&ino->i_lock); + lo = pnfs_find_alloc_layout(ino, ctx, GFP_KERNEL); + atomic_inc(&lo->plh_outstanding); + spin_unlock(&ino->i_lock); + lgp->args.inode = ino; + } else + lo = NFS_I(lgp->args.inode)->layout; + pnfs_get_layout_hdr(lo); + + lseg = pnfs_layout_process(lgp); + atomic_dec(&lo->plh_outstanding); + if (IS_ERR(lseg)) { + /* ignore lseg, but would like to mark not to try lgopen */ + /* clear some lo flags - first and fail ???? */ + } else { + iomode = lgp->args.range.iomode; + pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); + pnfs_put_lseg(lseg); + } + pnfs_clear_first_layoutget(lo); + pnfs_put_layout_hdr(lo); +} + struct pnfs_layout_segment * pnfs_layout_process(struct nfs4_layoutget *lgp) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 964a7227ea97..b110c09ea89c 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -35,6 +35,8 @@ #include #include +struct nfs4_opendata; + enum { NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ NFS_LSEG_ROC, /* roc bit received from server */ @@ -378,6 +380,10 @@ void pnfs_layout_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo, u32 ds_commit_idx); +void pnfs_lgopen_prepare(struct nfs4_opendata *data, + struct nfs_open_context *ctx); +void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, + struct nfs_open_context *ctx); static inline bool nfs_have_layout(struct inode *inode) { @@ -778,6 +784,17 @@ static inline bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, { return false; } + +static inline void pnfs_lgopen_prepare(struct nfs4_opendata *data, + struct nfs_open_context *ctx) +{ +} + +static inline void pnfs_parse_lgopen(struct inode *ino, + struct nfs4_layoutget *lgp, + struct nfs_open_context *ctx) +{ +} #endif /* CONFIG_NFS_V4_1 */ #if IS_ENABLED(CONFIG_NFS_V4_2) -- cgit v1.2.3 From 29a8bfe52d1c38bde482971250af0ba9637ddaf2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 May 2018 17:16:20 -0400 Subject: pNFS: Refactor nfs4_layoutget_release() Move the actual freeing of the struct nfs4_layoutget into fs/nfs/pnfs.c where it can be reused by the layoutget on open code. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 47 +---------------------------------------------- fs/nfs/pnfs.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/pnfs.h | 2 +- 3 files changed, 52 insertions(+), 47 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 062a9c753b7e..0e6db190a874 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8692,58 +8692,13 @@ size_t max_response_pages(struct nfs_server *server) return nfs_page_array_len(0, max_resp_sz); } -static void nfs4_free_pages(struct page **pages, size_t size) -{ - int i; - - if (!pages) - return; - - for (i = 0; i < size; i++) { - if (!pages[i]) - break; - __free_page(pages[i]); - } - kfree(pages); -} - -struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) -{ - struct page **pages; - int i; - - pages = kcalloc(size, sizeof(struct page *), gfp_flags); - if (!pages) { - dprintk("%s: can't alloc array of %zu pages\n", __func__, size); - return NULL; - } - - for (i = 0; i < size; i++) { - pages[i] = alloc_page(gfp_flags); - if (!pages[i]) { - dprintk("%s: failed to allocate page\n", __func__); - nfs4_free_pages(pages, size); - return NULL; - } - } - - return pages; -} - static void nfs4_layoutget_release(void *calldata) { struct nfs4_layoutget *lgp = calldata; - struct inode *inode = lgp->args.inode; - size_t max_pages = lgp->args.layout.pglen / PAGE_SIZE; dprintk("--> %s\n", __func__); nfs4_sequence_free_slot(&lgp->res.seq_res); - nfs4_free_pages(lgp->args.layout.pages, max_pages); - if (inode) - pnfs_put_layout_hdr(NFS_I(inode)->layout); - put_rpccred(lgp->cred); - put_nfs_open_context(lgp->args.ctx); - kfree(calldata); + pnfs_layoutget_free(lgp); dprintk("<-- %s\n", __func__); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a0a2484c3aed..f568a1de5ec5 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -931,6 +931,44 @@ pnfs_find_server(struct inode *inode, struct nfs_open_context *ctx) return server; } +static void nfs4_free_pages(struct page **pages, size_t size) +{ + int i; + + if (!pages) + return; + + for (i = 0; i < size; i++) { + if (!pages[i]) + break; + __free_page(pages[i]); + } + kfree(pages); +} + +static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) +{ + struct page **pages; + int i; + + pages = kcalloc(size, sizeof(struct page *), gfp_flags); + if (!pages) { + dprintk("%s: can't alloc array of %zu pages\n", __func__, size); + return NULL; + } + + for (i = 0; i < size; i++) { + pages[i] = alloc_page(gfp_flags); + if (!pages[i]) { + dprintk("%s: failed to allocate page\n", __func__); + nfs4_free_pages(pages, size); + return NULL; + } + } + + return pages; +} + static struct nfs4_layoutget * pnfs_alloc_init_layoutget_args(struct inode *ino, struct nfs_open_context *ctx, @@ -982,6 +1020,18 @@ pnfs_alloc_init_layoutget_args(struct inode *ino, return lgp; } +void pnfs_layoutget_free(struct nfs4_layoutget *lgp) +{ + size_t max_pages = lgp->args.layout.pglen / PAGE_SIZE; + + nfs4_free_pages(lgp->args.layout.pages, max_pages); + if (lgp->args.inode) + pnfs_put_layout_hdr(NFS_I(lgp->args.inode)->layout); + put_rpccred(lgp->cred); + put_nfs_open_context(lgp->args.ctx); + kfree(lgp); +} + static void pnfs_clear_layoutcommit(struct inode *inode, struct list_head *head) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index b110c09ea89c..9941df824ca9 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -227,7 +227,6 @@ extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); /* nfs4proc.c */ extern size_t max_response_pages(struct nfs_server *server); -extern struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev, struct rpc_cred *cred); @@ -251,6 +250,7 @@ size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); +void pnfs_layoutget_free(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); void pnfs_destroy_all_layouts(struct nfs_client *); -- cgit v1.2.3 From 78746a384c88c6405594cd07cae11b3b3caffb9b Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 22 Sep 2016 12:30:20 -0400 Subject: pnfs: Add LAYOUTGET to OPEN of an existing file Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 73 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f568a1de5ec5..a6fe8132e944 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -921,9 +921,9 @@ pnfs_find_server(struct inode *inode, struct nfs_open_context *ctx) { struct nfs_server *server; - if (inode) + if (inode) { server = NFS_SERVER(inode); - else { + } else { struct dentry *parent_dir = dget_parent(ctx->dentry); server = NFS_SERVER(parent_dir->d_inode); dput(parent_dir); @@ -1736,6 +1736,22 @@ static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); } +static void _add_to_server_list(struct pnfs_layout_hdr *lo, + struct nfs_server *server) +{ + if (list_empty(&lo->plh_layouts)) { + struct nfs_client *clp = server->nfs_client; + + /* The lo must be on the clp list if there is any + * chance of a CB_LAYOUTRECALL(FILE) coming in. + */ + spin_lock(&clp->cl_lock); + if (list_empty(&lo->plh_layouts)) + list_add_tail(&lo->plh_layouts, &server->layouts); + spin_unlock(&clp->cl_lock); + } +} + /* * Layout segment is retreived from the server if not cached. * The appropriate layout segment is referenced and returned to the caller. @@ -1886,15 +1902,7 @@ lookup_again: atomic_inc(&lo->plh_outstanding); spin_unlock(&ino->i_lock); - if (list_empty(&lo->plh_layouts)) { - /* The lo must be on the clp list if there is any - * chance of a CB_LAYOUTRECALL(FILE) coming in. - */ - spin_lock(&clp->cl_lock); - if (list_empty(&lo->plh_layouts)) - list_add_tail(&lo->plh_layouts, &server->layouts); - spin_unlock(&clp->cl_lock); - } + _add_to_server_list(lo, server); pg_offset = arg.offset & ~PAGE_MASK; if (pg_offset) { @@ -1993,12 +2001,62 @@ pnfs_sanity_check_layout_range(struct pnfs_layout_range *range) return true; } +static struct pnfs_layout_hdr * +_pnfs_grab_empty_layout(struct inode *ino, struct nfs_open_context *ctx) +{ + struct pnfs_layout_hdr *lo; + + spin_lock(&ino->i_lock); + lo = pnfs_find_alloc_layout(ino, ctx, GFP_KERNEL); + if (!lo) + goto out_unlock; + if (!test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) + goto out_unlock; + if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + goto out_unlock; + if (pnfs_layoutgets_blocked(lo)) + goto out_unlock; + if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags)) + goto out_unlock; + atomic_inc(&lo->plh_outstanding); + spin_unlock(&ino->i_lock); + _add_to_server_list(lo, NFS_SERVER(ino)); + return lo; + +out_unlock: + spin_unlock(&ino->i_lock); + pnfs_put_layout_hdr(lo); + return NULL; +} + extern const nfs4_stateid current_stateid; static void _lgopen_prepare_attached(struct nfs4_opendata *data, struct nfs_open_context *ctx) { - /* STUB */ + struct inode *ino = data->dentry->d_inode; + struct pnfs_layout_range rng = { + .iomode = (data->o_arg.fmode & FMODE_WRITE) ? + IOMODE_RW: IOMODE_READ, + .offset = 0, + .length = NFS4_MAX_UINT64, + }; + struct nfs4_layoutget *lgp; + struct pnfs_layout_hdr *lo; + + lo = _pnfs_grab_empty_layout(ino, ctx); + if (!lo) + return; + lgp = pnfs_alloc_init_layoutget_args(ino, ctx, ¤t_stateid, + &rng, GFP_KERNEL); + if (!lgp) { + pnfs_clear_first_layoutget(lo); + pnfs_put_layout_hdr(lo); + return; + } + data->lgp = lgp; + data->o_arg.lg_args = &lgp->args; + data->o_res.lg_res = &lgp->res; } static void _lgopen_prepare_floating(struct nfs4_opendata *data, @@ -2046,11 +2104,9 @@ void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, if (!lgp || lgp->res.layoutp->len == 0) return; if (!lgp->args.inode) { - /* Need to grab lo */ - spin_lock(&ino->i_lock); - lo = pnfs_find_alloc_layout(ino, ctx, GFP_KERNEL); - atomic_inc(&lo->plh_outstanding); - spin_unlock(&ino->i_lock); + lo = _pnfs_grab_empty_layout(ino, ctx); + if (!lo) + return; lgp->args.inode = ino; } else lo = NFS_I(lgp->args.inode)->layout; -- cgit v1.2.3 From 6e01260ceeca299b82d466660935534c5c909d54 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Tue, 4 Oct 2016 15:26:41 -0400 Subject: pnfs: Stop attempting LAYOUTGET on OPEN on failure Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 +++- fs/nfs/pnfs.c | 19 ++++++++++++++++++- include/linux/nfs_fs_sb.h | 1 + 3 files changed, 22 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0e6db190a874..05454cbc473d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -9465,7 +9465,8 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | NFS_CAP_ATOMIC_OPEN | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 - | NFS_CAP_ATOMIC_OPEN_V1, + | NFS_CAP_ATOMIC_OPEN_V1 + | NFS_CAP_LGOPEN, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, @@ -9490,6 +9491,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1 + | NFS_CAP_LGOPEN | NFS_CAP_ALLOCATE | NFS_CAP_COPY | NFS_CAP_DEALLOCATE diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a6fe8132e944..3dfe9fa264a5 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2088,6 +2088,8 @@ void pnfs_lgopen_prepare(struct nfs4_opendata *data, server->pnfs_curr_ld->flags & PNFS_LAYOUTGET_ON_OPEN)) return; /* Could check on max_ops, but currently hardcoded high enough */ + if (!nfs_server_capable(data->dir->d_inode, NFS_CAP_LGOPEN)) + return; if (data->state) _lgopen_prepare_attached(data, ctx); else @@ -2101,8 +2103,23 @@ void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, struct pnfs_layout_segment *lseg; u32 iomode; - if (!lgp || lgp->res.layoutp->len == 0) + if (!lgp) return; + dprintk("%s: entered with status %i\n", __func__, lgp->res.status); + if (lgp->res.status) { + switch (lgp->res.status) { + case -NFS4ERR_DELAY: + case -NFS4ERR_GRACE: + case -NFS4ERR_LAYOUTTRYLATER: + break; + default: + /* FIXME - Any error not listed above permanently + * halts lgopen attempts. + */ + NFS_SERVER(ino)->caps &= ~NFS_CAP_LGOPEN; + } + return; + } if (!lgp->args.inode) { lo = _pnfs_grab_empty_layout(ino, ctx); if (!lo) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 4e735be53e70..2c18d618604e 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -235,6 +235,7 @@ struct nfs_server { #define NFS_CAP_ACLS (1U << 3) #define NFS_CAP_ATOMIC_OPEN (1U << 4) /* #define NFS_CAP_CHANGE_ATTR (1U << 5) */ +#define NFS_CAP_LGOPEN (1U << 5) #define NFS_CAP_FILEID (1U << 6) #define NFS_CAP_MODE (1U << 7) #define NFS_CAP_NLINK (1U << 8) -- cgit v1.2.3 From c49b5209f99abe082d3d4cd94f0ad924baea34ed Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 5 Oct 2016 09:37:12 -0400 Subject: pnfs: Add barrier to prevent lgopen using LAYOUTGET during recall Since the LAYOUTGET on OPEN can be sent without prior inode information, existing methods to prevent LAYOUTGET from being sent while processing CB_LAYOUTRECALL don't work. Track if a recall occurred while LAYOUTGET was being sent, and if so ignore the results. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 ++ fs/nfs/pnfs.c | 8 +++++++- include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 1 + 4 files changed, 11 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index a50d7813e3ea..d561161b7c3e 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -322,6 +322,8 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, static u32 do_callback_layoutrecall(struct nfs_client *clp, struct cb_layoutrecallargs *args) { + write_seqcount_begin(&clp->cl_callback_count); + write_seqcount_end(&clp->cl_callback_count); if (args->cbl_recall_type == RETURN_FILE) return initiate_file_draining(clp, args); return initiate_bulk_draining(clp, args); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3dfe9fa264a5..a29fdea2db91 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1017,6 +1017,7 @@ pnfs_alloc_init_layoutget_args(struct inode *ino, nfs4_stateid_copy(&lgp->args.stateid, stateid); lgp->gfp_flags = gfp_flags; lgp->cred = get_rpccred(ctx->cred); + lgp->callback_count = raw_seqcount_begin(&server->nfs_client->cl_callback_count); return lgp; } @@ -2101,6 +2102,7 @@ void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, { struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg; + struct nfs_server *srv = NFS_SERVER(ino); u32 iomode; if (!lgp) @@ -2116,7 +2118,7 @@ void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, /* FIXME - Any error not listed above permanently * halts lgopen attempts. */ - NFS_SERVER(ino)->caps &= ~NFS_CAP_LGOPEN; + srv->caps &= ~NFS_CAP_LGOPEN; } return; } @@ -2129,6 +2131,9 @@ void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, lo = NFS_I(lgp->args.inode)->layout; pnfs_get_layout_hdr(lo); + if (read_seqcount_retry(&srv->nfs_client->cl_callback_count, + lgp->callback_count)) + goto out; lseg = pnfs_layout_process(lgp); atomic_dec(&lo->plh_outstanding); if (IS_ERR(lseg)) { @@ -2139,6 +2144,7 @@ void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); pnfs_put_lseg(lseg); } +out: pnfs_clear_first_layoutget(lo); pnfs_put_layout_hdr(lo); } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 2c18d618604e..74ae3e1d19a0 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -28,6 +28,7 @@ struct nfs41_impl_id; struct nfs_client { refcount_t cl_count; atomic_t cl_mds_count; + seqcount_t cl_callback_count; int cl_cons_state; /* current construction state (-ve: init error) */ #define NFS_CS_READY 0 /* ready to be used */ #define NFS_CS_INITING 1 /* busy initialising */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index bc235e50415f..09dc14ac5804 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -271,6 +271,7 @@ struct nfs4_layoutget { struct nfs4_layoutget_args args; struct nfs4_layoutget_res res; struct rpc_cred *cred; + unsigned callback_count; gfp_t gfp_flags; }; -- cgit v1.2.3 From 30ae2412e90f0ae177da631e36537392d89a2ccd Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Tue, 18 Oct 2016 13:39:51 -0400 Subject: pnfs: Fix manipulation of NFS_LAYOUT_FIRST_LAYOUTGET The flag was not always being cleared after LAYOUTGET on OPEN. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 +-- fs/nfs/pnfs.c | 20 ++++++++++++++------ fs/nfs/pnfs.h | 6 ++++++ 3 files changed, 21 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 05454cbc473d..d18447d11b06 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1249,8 +1249,7 @@ static void nfs4_opendata_free(struct kref *kref) struct nfs4_opendata, kref); struct super_block *sb = p->dentry->d_sb; - if (p->lgp) - nfs4_layoutget_release(p->lgp); + nfs4_lgopen_release(p->lgp); nfs_free_seqid(p->o_arg.seqid); nfs4_sequence_free_slot(&p->o_res.seq_res); if (p->state != NULL) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a29fdea2db91..b0e42fd07cb1 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2129,13 +2129,11 @@ void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, lgp->args.inode = ino; } else lo = NFS_I(lgp->args.inode)->layout; - pnfs_get_layout_hdr(lo); if (read_seqcount_retry(&srv->nfs_client->cl_callback_count, lgp->callback_count)) - goto out; + return; lseg = pnfs_layout_process(lgp); - atomic_dec(&lo->plh_outstanding); if (IS_ERR(lseg)) { /* ignore lseg, but would like to mark not to try lgopen */ /* clear some lo flags - first and fail ???? */ @@ -2144,9 +2142,19 @@ void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); pnfs_put_lseg(lseg); } -out: - pnfs_clear_first_layoutget(lo); - pnfs_put_layout_hdr(lo); +} + +void nfs4_lgopen_release(struct nfs4_layoutget *lgp) +{ + if (lgp != NULL) { + struct inode *inode = lgp->args.inode; + if (inode) { + struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; + atomic_dec(&lo->plh_outstanding); + pnfs_clear_first_layoutget(lo); + } + pnfs_layoutget_free(lgp); + } } struct pnfs_layout_segment * diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 9941df824ca9..a8f5e6b16749 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -384,6 +384,7 @@ void pnfs_lgopen_prepare(struct nfs4_opendata *data, struct nfs_open_context *ctx); void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, struct nfs_open_context *ctx); +void nfs4_lgopen_release(struct nfs4_layoutget *lgp); static inline bool nfs_have_layout(struct inode *inode) { @@ -795,6 +796,11 @@ static inline void pnfs_parse_lgopen(struct inode *ino, struct nfs_open_context *ctx) { } + +static inline void nfs4_lgopen_release(struct nfs4_layoutget *lgp) +{ +} + #endif /* CONFIG_NFS_V4_1 */ #if IS_ENABLED(CONFIG_NFS_V4_2) -- cgit v1.2.3 From d49e0d5b999621412ffb177f08a1c1746dfb6071 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 1 Feb 2017 20:42:44 -0500 Subject: NFSv4/pnfs: Ensure pnfs_parse_lgopen() won't try to parse uninitialised data We need to ensure that pnfs_parse_lgopen() doesn't try to parse a struct nfs4_layoutget_res that was not filled by a successful call to decode_layoutget(). This can happen if we performed a cached open, or if either the OP_ACCESS or OP_GETATTR operations preceding the OP_LAYOUTGET in the compound returned an error. By initialising the 'status' field to NFS4ERR_DELAY, we ensure that pnfs_parse_lgopen() won't try to interpret the structure. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b0e42fd07cb1..4d3f04d55a65 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -994,7 +994,8 @@ pnfs_alloc_init_layoutget_args(struct inode *ino, lgp->args.layout.pglen = max_pages * PAGE_SIZE; lgp->res.layoutp = &lgp->args.layout; - + /* Don't confuse uninitialised result and success */ + lgp->res.status = -NFS4ERR_DELAY; lgp->args.minlength = PAGE_SIZE; if (lgp->args.minlength > range->length) -- cgit v1.2.3 From 8dc96566c0c34b8c2632bc6071dad208d69dd5b0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 1 Feb 2017 21:02:07 -0500 Subject: NFSv4/pnfs: Don't switch off layoutget-on-open for transient errors Ensure that we only switch off the LAYOUTGET operation in the OPEN compound when the server is truly broken, and/or it is complaining that the compound is too large. Currently, we end up turning off the functionality permanently, even for transient errors such as EACCES or ENOSPC. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 4d3f04d55a65..d5dc97f72b30 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2111,14 +2111,22 @@ void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, dprintk("%s: entered with status %i\n", __func__, lgp->res.status); if (lgp->res.status) { switch (lgp->res.status) { - case -NFS4ERR_DELAY: - case -NFS4ERR_GRACE: - case -NFS4ERR_LAYOUTTRYLATER: - break; default: - /* FIXME - Any error not listed above permanently - * halts lgopen attempts. - */ + break; + /* + * Halt lgopen attempts if the server doesn't recognise + * the "current stateid" value, the layout type, or the + * layoutget operation as being valid. + * Also if it complains about too many ops in the compound + * or of the request/reply being too big. + */ + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_NOTSUPP: + case -NFS4ERR_REP_TOO_BIG: + case -NFS4ERR_REP_TOO_BIG_TO_CACHE: + case -NFS4ERR_REQ_TOO_BIG: + case -NFS4ERR_TOO_MANY_OPS: + case -NFS4ERR_UNKNOWN_LAYOUTTYPE: srv->caps &= ~NFS_CAP_LGOPEN; } return; -- cgit v1.2.3 From 64294b08f9d27f83cbb0975250614864fa733bda Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 2 Feb 2017 12:26:38 -0500 Subject: pNFS: Don't send LAYOUTGET on OPEN for read, if we already have cached data If we're only opening the file for reading, and the file is empty and/or we already have cached data, then heuristically optimise away the LAYOUTGET. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d5dc97f72b30..717cd95a0306 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2046,6 +2046,11 @@ static void _lgopen_prepare_attached(struct nfs4_opendata *data, struct nfs4_layoutget *lgp; struct pnfs_layout_hdr *lo; + /* Heuristic: don't send layoutget if we have cached data */ + if (rng.iomode == IOMODE_READ && + (i_size_read(ino) == 0 || ino->i_mapping->nrpages != 0)) + return; + lo = _pnfs_grab_empty_layout(ino, ctx); if (!lo) return; -- cgit v1.2.3 From 32f1c28f3d453f4652948ab9298078874d1a56b6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 May 2018 11:15:32 -0400 Subject: pnfs: Don't call commit on failed layoutget-on-open If the layoutget on open call failed, we can't really commit the inode, so don't bother calling it. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 717cd95a0306..d93942fa3817 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2148,10 +2148,7 @@ void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, lgp->callback_count)) return; lseg = pnfs_layout_process(lgp); - if (IS_ERR(lseg)) { - /* ignore lseg, but would like to mark not to try lgopen */ - /* clear some lo flags - first and fail ???? */ - } else { + if (!IS_ERR(lseg)) { iomode = lgp->args.range.iomode; pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); pnfs_put_lseg(lseg); @@ -2236,8 +2233,6 @@ out_forget: spin_unlock(&ino->i_lock); lseg->pls_layout = lo; NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); - if (!pnfs_layout_is_valid(lo)) - nfs_commit_inode(ino, 0); return ERR_PTR(-EAGAIN); } -- cgit v1.2.3 From ae55e59da0e401893b3c52b575fc18a00623d0a1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 May 2018 11:17:16 -0400 Subject: pnfs: Don't release the sequence slot until we've processed layoutget on open If the server recalls the layout that was just handed out, we risk hitting a race as described in RFC5661 Section 2.10.6.3 unless we ensure that we release the sequence slot after processing the LAYOUTGET operation that was sent as part of the OPEN compound. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d18447d11b06..bb1141c48281 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2789,7 +2789,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, if (ret != 0) goto out; - state = nfs4_opendata_to_nfs4_state(opendata); + state = _nfs4_opendata_to_nfs4_state(opendata); ret = PTR_ERR(state); if (IS_ERR(state)) goto out; @@ -2828,6 +2828,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, } out: + nfs4_sequence_free_slot(&opendata->o_res.seq_res); return ret; } -- cgit v1.2.3 From 30846df06f937b692ea658aaf7e28acf56a255f8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Apr 2018 13:44:28 -0400 Subject: NFSv4: Don't request size+change attribute if they are delegated to us When we hold a delegation, we should not need to request attributes such as the file size or the change attribute. For some servers, avoiding asking for these unneeded attributes can improve the overall system performance. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bb1141c48281..c44cfa6be8ff 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -71,6 +71,8 @@ #define NFSDBG_FACILITY NFSDBG_PROC +#define NFS4_BITMASK_SZ 3 + #define NFS4_POLL_RETRY_MIN (HZ/10) #define NFS4_POLL_RETRY_MAX (15*HZ) @@ -274,6 +276,33 @@ const u32 nfs4_fs_locations_bitmap[3] = { | FATTR4_WORD1_MOUNTED_ON_FILEID, }; +static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src, + struct inode *inode) +{ + unsigned long cache_validity; + + memcpy(dst, src, NFS4_BITMASK_SZ*sizeof(*dst)); + if (!inode || !nfs4_have_delegation(inode, FMODE_READ)) + return; + + cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); + if (!(cache_validity & NFS_INO_REVAL_FORCED)) + cache_validity &= ~(NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_SIZE); + + if (!(cache_validity & NFS_INO_INVALID_SIZE)) + dst[0] &= ~FATTR4_WORD0_SIZE; + + if (!(cache_validity & NFS_INO_INVALID_CHANGE)) + dst[0] &= ~FATTR4_WORD0_CHANGE; +} + +static void nfs4_bitmap_copy_adjust_setattr(__u32 *dst, + const __u32 *src, struct inode *inode) +{ + nfs4_bitmap_copy_adjust(dst, src, inode); +} + static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry, struct nfs4_readdir_arg *readdir) { @@ -3074,12 +3103,13 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs4_label *olabel) { struct nfs_server *server = NFS_SERVER(inode); + __u32 bitmask[NFS4_BITMASK_SZ]; struct nfs4_state *state = ctx ? ctx->state : NULL; struct nfs_setattrargs arg = { .fh = NFS_FH(inode), .iap = sattr, .server = server, - .bitmask = server->attr_bitmask, + .bitmask = bitmask, .label = ilabel, }; struct nfs_setattrres res = { @@ -3094,11 +3124,11 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, }; int err; - arg.bitmask = nfs4_bitmask(server, ilabel); - if (ilabel) - arg.bitmask = nfs4_bitmask(server, olabel); - do { + nfs4_bitmap_copy_adjust_setattr(bitmask, + nfs4_bitmask(server, olabel), + inode); + err = _nfs4_do_setattr(inode, &arg, &res, cred, ctx); switch (err) { case -NFS4ERR_OPENMODE: -- cgit v1.2.3 From a841b54dbd65421726caf7129f8951910c7a8ea6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Apr 2018 13:50:59 -0400 Subject: NFS: Pass the inode down to the getattr() callback Allow the getattr() callback to check things like whether or not we hold a delegation so that it can adjust the attributes that it is asking for. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 3 ++- fs/nfs/dir.c | 3 ++- fs/nfs/export.c | 2 +- fs/nfs/inode.c | 3 ++- fs/nfs/nfs3proc.c | 3 ++- fs/nfs/nfs4proc.c | 17 ++++++++++------- fs/nfs/proc.c | 3 ++- include/linux/nfs_xdr.h | 3 ++- 8 files changed, 23 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index b9129e2befea..02e97c29af0c 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -969,7 +969,8 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info, } if (!(fattr->valid & NFS_ATTR_FATTR)) { - error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, fattr, NULL); + error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, + fattr, NULL, NULL); if (error < 0) { dprintk("nfs_create_server: getattr error = %d\n", -error); goto error; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 978a22ea962c..7a9c14426855 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1656,7 +1656,8 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); if (!(fattr->valid & NFS_ATTR_FATTR)) { struct nfs_server *server = NFS_SB(dentry->d_sb); - error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr, NULL); + error = server->nfs_client->rpc_ops->getattr(server, fhandle, + fattr, NULL, NULL); if (error < 0) goto out_error; } diff --git a/fs/nfs/export.c b/fs/nfs/export.c index ab5de3246c5c..deecb67638aa 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -102,7 +102,7 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid, } rpc_ops = NFS_SB(sb)->nfs_client->rpc_ops; - ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label); + ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label, NULL); if (ret) { dprintk("%s: getattr failed %d\n", __func__, ret); dentry = ERR_PTR(ret); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5de724b1b90c..a720427e5aa3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1101,7 +1101,8 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) goto out; } - status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, label); + status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, + label, inode); if (status != 0) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n", inode->i_sb->s_id, diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 5645ef4c5259..ec8a9efa268f 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -101,7 +101,8 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, */ static int nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) + struct nfs_fattr *fattr, struct nfs4_label *label, + struct inode *inode) { struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR], diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c44cfa6be8ff..cd60e8360ef2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -92,8 +92,8 @@ static void nfs4_layoutget_release(void *calldata); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); -static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); -static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); +static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label, struct inode *inode); +static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label, struct inode *inode); static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, struct nfs_open_context *ctx, struct nfs4_label *ilabel, @@ -2494,7 +2494,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data, } if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) { nfs4_sequence_free_slot(&o_res->seq_res); - nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label); + nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, + o_res->f_label, NULL); } return 0; } @@ -3763,7 +3764,7 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh, if (IS_ERR(label)) return PTR_ERR(label); - error = nfs4_proc_getattr(server, mntfh, fattr, label); + error = nfs4_proc_getattr(server, mntfh, fattr, label, NULL); if (error < 0) { dprintk("nfs4_get_root: getattr error = %d\n", -error); goto err_free_label; @@ -3828,7 +3829,8 @@ out: } static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) + struct nfs_fattr *fattr, struct nfs4_label *label, + struct inode *inode) { struct nfs4_getattr_arg args = { .fh = fhandle, @@ -3852,12 +3854,13 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, } static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) + struct nfs_fattr *fattr, struct nfs4_label *label, + struct inode *inode) { struct nfs4_exception exception = { }; int err; do { - err = _nfs4_proc_getattr(server, fhandle, fattr, label); + err = _nfs4_proc_getattr(server, fhandle, fattr, label, inode); trace_nfs4_getattr(server, fhandle, fattr, err); err = nfs4_handle_exception(server, err, &exception); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 763f77e7f1f1..e0c257bd62b9 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -99,7 +99,8 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, */ static int nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) + struct nfs_fattr *fattr, struct nfs4_label *label, + struct inode *inode) { struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_GETATTR], diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 09dc14ac5804..9dee3c23895d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1581,7 +1581,8 @@ struct nfs_rpc_ops { struct dentry *(*try_mount) (int, const char *, struct nfs_mount_info *, struct nfs_subversion *); int (*getattr) (struct nfs_server *, struct nfs_fh *, - struct nfs_fattr *, struct nfs4_label *); + struct nfs_fattr *, struct nfs4_label *, + struct inode *); int (*setattr) (struct dentry *, struct nfs_fattr *, struct iattr *); int (*lookup) (struct inode *, const struct qstr *, -- cgit v1.2.3 From 771734f291404285e40cb75713c60635ed5c61ca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Apr 2018 13:54:23 -0400 Subject: NFSv4: Don't ask for delegated attributes when revalidating the inode Again, when revalidating the inode, we don't need to ask for attributes for which we are authoritative. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cd60e8360ef2..744a6367618d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3832,9 +3832,10 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label, struct inode *inode) { + __u32 bitmask[NFS4_BITMASK_SZ]; struct nfs4_getattr_arg args = { .fh = fhandle, - .bitmask = server->attr_bitmask, + .bitmask = bitmask, }; struct nfs4_getattr_res res = { .fattr = fattr, @@ -3847,7 +3848,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_resp = &res, }; - args.bitmask = nfs4_bitmask(server, label); + nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode); nfs_fattr_init(fattr); return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); -- cgit v1.2.3 From 2f28dc385afd59f04cd42f0ced16050aa8db85e9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Apr 2018 21:06:40 -0400 Subject: NFSv4: Don't ask for delegated attributes when adding a hard link Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 744a6367618d..544cdcb79b4f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4378,11 +4378,12 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct qstr *name) { struct nfs_server *server = NFS_SERVER(inode); + __u32 bitmask[NFS4_BITMASK_SZ]; struct nfs4_link_arg arg = { .fh = NFS_FH(inode), .dir_fh = NFS_FH(dir), .name = name, - .bitmask = server->attr_bitmask, + .bitmask = bitmask, }; struct nfs4_link_res res = { .server = server, @@ -4404,9 +4405,9 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct status = PTR_ERR(res.label); goto out; } - arg.bitmask = nfs4_bitmask(server, res.label); nfs4_inode_make_writeable(inode); + nfs4_bitmap_copy_adjust_setattr(bitmask, nfs4_bitmask(server, res.label), inode); status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { -- cgit v1.2.3 From 7c6726546c9d9a1203beaa5fc90625871448986d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Jun 2018 15:00:53 -0400 Subject: NFSv4: Ignore NFS_INO_REVAL_FORCED in nfs4_proc_access If we hold a delegation, we don't need to care about whether or not the inode attributes are up to date. We know we can cache the results of this call regardless. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 544cdcb79b4f..f413d0c8c837 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4114,7 +4114,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry }; int status = 0; - if (!nfs_have_delegated_attributes(inode)) { + if (!nfs4_have_delegation(inode, FMODE_READ)) { res.fattr = nfs_alloc_fattr(); if (res.fattr == NULL) return -ENOMEM; -- cgit v1.2.3 From 97c2c17af9bd17c2cf4bc1e17f088793996cbba2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Apr 2018 18:43:17 -0400 Subject: NFSv4: Ensure the inode is clean when we set a delegation If there are attributes that are still invalid when we set a delegation, then we need to set the NFS_INO_REVAL_FORCED flag. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 105b0f39a5b2..91c3737d69df 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -404,6 +404,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, trace_nfs4_set_delegation(inode, type); + spin_lock(&inode->i_lock); + if (NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME)) + NFS_I(inode)->cache_validity |= NFS_INO_REVAL_FORCED; + spin_unlock(&inode->i_lock); out: spin_unlock(&clp->cl_lock); if (delegation != NULL) -- cgit v1.2.3 From 6a97d02dfe7fca29a3acec66ff51a1ceecacaa20 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Apr 2018 17:51:11 -0400 Subject: NFS: fix up nfs_setattr_update_inode Always try to set the attributes, even if we don't have a valid struct nfs_fattr. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 48 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a720427e5aa3..2b7edd011d86 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -671,9 +671,13 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, spin_lock(&inode->i_lock); NFS_I(inode)->attr_gencount = fattr->gencount; - nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE - | NFS_INO_INVALID_CTIME); + if ((attr->ia_valid & ATTR_SIZE) != 0) { + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); + nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); + nfs_vmtruncate(inode, attr->ia_size); + } if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_CTIME; if ((attr->ia_valid & ATTR_MODE) != 0) { int mode = attr->ia_mode & S_IALLUGO; mode |= inode->i_mode & ~S_IALLUGO; @@ -683,13 +687,45 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, inode->i_uid = attr->ia_uid; if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; + if (fattr->valid & NFS_ATTR_FATTR_CTIME) + inode->i_ctime = fattr->ctime; + else + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME); nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL); } - if ((attr->ia_valid & ATTR_SIZE) != 0) { - nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); - nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); - nfs_vmtruncate(inode, attr->ia_size); + if (attr->ia_valid & (ATTR_ATIME_SET|ATTR_ATIME)) { + NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_ATIME + | NFS_INO_INVALID_CTIME); + if (fattr->valid & NFS_ATTR_FATTR_ATIME) + inode->i_atime = fattr->atime; + else if (attr->ia_valid & ATTR_ATIME_SET) + inode->i_atime = attr->ia_atime; + else + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); + + if (fattr->valid & NFS_ATTR_FATTR_CTIME) + inode->i_ctime = fattr->ctime; + else + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME); + } + if (attr->ia_valid & (ATTR_MTIME_SET|ATTR_MTIME)) { + NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_MTIME + | NFS_INO_INVALID_CTIME); + if (fattr->valid & NFS_ATTR_FATTR_MTIME) + inode->i_mtime = fattr->mtime; + else if (attr->ia_valid & ATTR_MTIME_SET) + inode->i_mtime = attr->ia_mtime; + else + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); + + if (fattr->valid & NFS_ATTR_FATTR_CTIME) + inode->i_ctime = fattr->ctime; + else + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME); } if (fattr->valid) nfs_update_inode(inode, fattr); -- cgit v1.2.3 From 0b467264d0db011cd48f5adf24dd40543bceaccd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 3 Jun 2018 11:56:05 -0400 Subject: NFS: Fix attribute revalidation Don't mark attributes as invalid just because they have changed. Instead, for the purposes of adjusting the attribute cache timeout, keep a separate variable that tracks whether or not a change occurred. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2b7edd011d86..f35c5eb09cc9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1788,6 +1788,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) unsigned long save_cache_validity; bool have_writers = nfs_file_has_buffered_writers(nfsi); bool cache_revalidated = true; + bool attr_changed = false; dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", __func__, inode->i_sb->s_id, inode->i_ino, @@ -1848,8 +1849,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_sb->s_id, inode->i_ino); /* Could it be a race with writeback? */ if (!have_writers) { - invalid |= NFS_INO_INVALID_CHANGE - | NFS_INO_INVALID_DATA + invalid |= NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Force revalidate of all attributes */ @@ -1861,6 +1861,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_force_lookup_revalidate(inode); } inode_set_iversion_raw(inode, fattr->change_attr); + attr_changed = true; } } else { nfsi->cache_validity |= save_cache_validity & @@ -1899,6 +1900,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) i_size_write(inode, new_isize); if (!have_writers) invalid |= NFS_INO_INVALID_DATA; + attr_changed = true; } dprintk("NFS: isize change on server for file %s/%ld " "(%Ld to %Ld)\n", @@ -1931,14 +1933,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) newmode |= fattr->mode & S_IALLUGO; inode->i_mode = newmode; invalid |= NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_INVALID_OTHER; + | NFS_INO_INVALID_ACL; + attr_changed = true; } } else if (server->caps & NFS_CAP_MODE) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_INVALID_OTHER + (NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED); cache_revalidated = false; } @@ -1946,15 +1946,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_OWNER) { if (!uid_eq(inode->i_uid, fattr->uid)) { invalid |= NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_INVALID_OTHER; + | NFS_INO_INVALID_ACL; inode->i_uid = fattr->uid; + attr_changed = true; } } else if (server->caps & NFS_CAP_OWNER) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_INVALID_OTHER + (NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED); cache_revalidated = false; } @@ -1962,25 +1960,23 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_GROUP) { if (!gid_eq(inode->i_gid, fattr->gid)) { invalid |= NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_INVALID_OTHER; + | NFS_INO_INVALID_ACL; inode->i_gid = fattr->gid; + attr_changed = true; } } else if (server->caps & NFS_CAP_OWNER_GROUP) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_INVALID_OTHER + (NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED); cache_revalidated = false; } if (fattr->valid & NFS_ATTR_FATTR_NLINK) { if (inode->i_nlink != fattr->nlink) { - invalid |= NFS_INO_INVALID_OTHER; if (S_ISDIR(inode->i_mode)) invalid |= NFS_INO_INVALID_DATA; set_nlink(inode, fattr->nlink); + attr_changed = true; } } else if (server->caps & NFS_CAP_NLINK) { nfsi->cache_validity |= save_cache_validity & @@ -2000,7 +1996,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) cache_revalidated = false; /* Update attrtimeo value if we're out of the unstable period */ - if (invalid & NFS_INO_INVALID_ATTR) { + if (attr_changed) { invalid &= ~NFS_INO_INVALID_ATTR; nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); -- cgit v1.2.3 From c80d17c55d1171f3d3ead096a7fdc7d48e14725f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 3 Jun 2018 11:38:38 -0400 Subject: NFS: Improve caching while holding a delegation Make sure that the client completely ignores change attribute and size changes on the server when it holds a delegation. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f35c5eb09cc9..90d9fbfd82db 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1789,6 +1789,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) bool have_writers = nfs_file_has_buffered_writers(nfsi); bool cache_revalidated = true; bool attr_changed = false; + bool have_delegation; dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", __func__, inode->i_sb->s_id, inode->i_ino, @@ -1823,6 +1824,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) !IS_AUTOMOUNT(inode)) server->fsid = fattr->fsid; + /* Save the delegation state before clearing cache_validity */ + have_delegation = nfs_have_delegated_attributes(inode); + /* * Update the read time so we don't revalidate too often. */ @@ -1845,10 +1849,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* More cache consistency checks */ if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { if (!inode_eq_iversion_raw(inode, fattr->change_attr)) { - dprintk("NFS: change_attr change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); /* Could it be a race with writeback? */ - if (!have_writers) { + if (!(have_writers || have_delegation)) { invalid |= NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; @@ -1859,6 +1861,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | NFS_INO_INVALID_OTHER; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); + dprintk("NFS: change_attr change on server for file %s/%ld\n", + inode->i_sb->s_id, + inode->i_ino); } inode_set_iversion_raw(inode, fattr->change_attr); attr_changed = true; @@ -1893,7 +1898,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_SIZE) { new_isize = nfs_size_to_loff_t(fattr->size); cur_isize = i_size_read(inode); - if (new_isize != cur_isize) { + if (new_isize != cur_isize && !have_delegation) { /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ if (!nfs_have_writebacks(inode) || new_isize > cur_isize) { @@ -2022,9 +2027,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) invalid &= ~NFS_INO_INVALID_DATA; - if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) || - (save_cache_validity & NFS_INO_REVAL_FORCED)) - nfs_set_cache_invalid(inode, invalid); + nfs_set_cache_invalid(inode, invalid); return 0; out_err: -- cgit v1.2.3 From 4ebe83af202284b5ff8c65cb12902fd5518c5c58 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 3 Jun 2018 12:12:52 -0400 Subject: NFS: Ignore NFS_INO_REVAL_FORCED in nfs_check_inode_attributes() If we hold a delegation, we should not need to call nfs_check_inode_attributes() since we already know which attributes are valid, and which ones may still need revalidation. The state of the NFS_INO_REVAL_FORCED flag is therefore irrelevant. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 90d9fbfd82db..bc84ecaae886 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1390,8 +1390,9 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat unsigned long invalid = 0; - if (nfs_have_delegated_attributes(inode)) + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) return 0; + /* Has the inode gone and changed behind our back? */ if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) return -ESTALE; @@ -1441,7 +1442,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat invalid |= NFS_INO_INVALID_ATIME; if (invalid != 0) - nfs_set_cache_invalid(inode, invalid | NFS_INO_REVAL_FORCED); + nfs_set_cache_invalid(inode, invalid); nfsi->read_cache_jiffies = fattr->time_start; return 0; -- cgit v1.2.3 From 3f0b3cf46e0542ac4b4241c579b944b755d11b67 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 3 Jun 2018 13:05:21 -0400 Subject: NFS: Filter cache invalidation when holding a delegation If the client holds a delegation, then ensure we filter out attempts to invalidate the size, owner, group owner, or mode unless we made the change, in which case, check that NFS_INO_REVAL_FORCED is set by the caller. Always filter out attempts to invalidate the change attribute and size, since we are authoritative for those. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bc84ecaae886..73473d9bdfa4 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -195,10 +195,16 @@ bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags) static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { struct nfs_inode *nfsi = NFS_I(inode); - bool have_delegation = nfs_have_delegated_attributes(inode); + bool have_delegation = NFS_PROTO(inode)->have_delegation(inode, FMODE_READ); + + if (have_delegation) { + if (!(flags & NFS_INO_REVAL_FORCED)) + flags &= ~NFS_INO_INVALID_OTHER; + flags &= ~(NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_SIZE + | NFS_INO_REVAL_PAGECACHE); + } - if (have_delegation) - flags &= ~(NFS_INO_INVALID_CHANGE|NFS_INO_REVAL_PAGECACHE); if (inode->i_mapping->nrpages == 0) flags &= ~NFS_INO_INVALID_DATA; nfsi->cache_validity |= flags; -- cgit v1.2.3 From 977294c7199ed84d4878a63781a67014e1f0efe8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2018 10:38:39 -0400 Subject: NFSv4: Fix a compiler warning when CONFIG_NFS_V4_1 is undefined Fix a compiler warning: fs/nfs/nfs4proc.c:910:13: warning: 'nfs4_layoutget_release' defined but not used [-Wunused-function] static void nfs4_layoutget_release(void *calldata) ^~~~~~~~~~~~~~~~~~~~~~ Reported-by: Stephen Rothwell Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f413d0c8c837..e7998772bc51 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -88,7 +88,6 @@ | ATTR_MTIME_SET) struct nfs4_opendata; -static void nfs4_layoutget_release(void *calldata); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); @@ -907,10 +906,6 @@ nfs4_sequence_process_interrupted(struct nfs_client *client, #else /* !CONFIG_NFS_V4_1 */ -static void nfs4_layoutget_release(void *calldata) -{ -} - static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) { return nfs40_sequence_done(task, res); -- cgit v1.2.3 From 848a4eb2e3972400b683bf525a84fdaff6f6dbcc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 4 Jun 2018 10:53:29 -0400 Subject: NFSv4.0: Remove cl_ipaddr from non-UCS client ID It is possible for two distinct clients to have the same cl_ipaddr: - if the client admin disables callback with clientaddr=0.0.0.0 on more than one client - if two clients behind separate NATs use the same private subnet number - if the client admin specifies the same address via clientaddr= mount option (pointing the server at the same NAT box, for example) Because of the way the Linux NFSv4.0 client constructs its client ID string by default, such clients could interfere with each others' lease state when mounting the same server: scnprintf(str, len, "Linux NFSv4.0 %s/%s %s", clp->cl_ipaddr, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)); cl_ipaddr is set to the value of the clientaddr= mount option. Two clients whose addresses are 192.168.3.77 that mount the same server (whose public IP address is, say, 3.4.5.6) would both generate the same client ID string when sending a SETCLIENTID: Linux NFSv4.0 192.168.3.77/3.4.5.6 tcp and thus the server would not be able to distinguish the clients' leases. If both clients are using AUTH_SYS when sending SETCLIENTID then the server could possibly permit the two clients to interfere with or purge each others' leases. To better ensure that Linux's NFSv4.0 client ID strings are distinct in these cases, remove cl_ipaddr from the client ID string and replace it with something more likely to be unique. Note that the replacement looks a lot like the uniform client ID string. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e7998772bc51..fda45ff7a653 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5618,13 +5618,16 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp) return 0; rcu_read_lock(); - len = 14 + strlen(clp->cl_ipaddr) + 1 + + len = 14 + + strlen(clp->cl_rpcclient->cl_nodename) + + 1 + strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) + 1 + strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)) + 1; rcu_read_unlock(); - + if (nfs4_client_id_uniquifier[0] != '\0') + len += strlen(nfs4_client_id_uniquifier) + 1; if (len > NFS4_OPAQUE_LIMIT + 1) return -EINVAL; @@ -5638,10 +5641,21 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp) return -ENOMEM; rcu_read_lock(); - scnprintf(str, len, "Linux NFSv4.0 %s/%s %s", - clp->cl_ipaddr, - rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), - rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)); + if (nfs4_client_id_uniquifier[0] != '\0') + scnprintf(str, len, "Linux NFSv4.0 %s/%s/%s %s", + clp->cl_rpcclient->cl_nodename, + nfs4_client_id_uniquifier, + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR), + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_PROTO)); + else + scnprintf(str, len, "Linux NFSv4.0 %s/%s %s", + clp->cl_rpcclient->cl_nodename, + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR), + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_PROTO)); rcu_read_unlock(); clp->cl_owner_id = str; -- cgit v1.2.3 From 025bb9f8720577b3815263dd961cdebe0c823080 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 4 Jun 2018 10:53:34 -0400 Subject: NFSv4.0: Remove transport protocol name from non-UCS client ID Commit 69dd716c5ffd ("NFSv4: Add socket proto argument to setclientid") (2007) added the transport protocol name to the client ID string, but the patch description doesn't explain why this was necessary. At that time, the only transport protocol name that would have been used is "tcp" (for both IPv4 and IPv6), resulting in no additional distinctiveness of the client ID string. Since there is one client instance, the server should recognize it's state whether the client is connecting via TCP or RDMA. Same client, same lease. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index fda45ff7a653..e39f59b5e6d2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5622,8 +5622,6 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp) strlen(clp->cl_rpcclient->cl_nodename) + 1 + strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) + - 1 + - strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)) + 1; rcu_read_unlock(); if (nfs4_client_id_uniquifier[0] != '\0') @@ -5642,20 +5640,16 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp) rcu_read_lock(); if (nfs4_client_id_uniquifier[0] != '\0') - scnprintf(str, len, "Linux NFSv4.0 %s/%s/%s %s", + scnprintf(str, len, "Linux NFSv4.0 %s/%s/%s", clp->cl_rpcclient->cl_nodename, nfs4_client_id_uniquifier, rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR), - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_PROTO)); + RPC_DISPLAY_ADDR)); else - scnprintf(str, len, "Linux NFSv4.0 %s/%s %s", + scnprintf(str, len, "Linux NFSv4.0 %s/%s", clp->cl_rpcclient->cl_nodename, rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR), - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_PROTO)); + RPC_DISPLAY_ADDR)); rcu_read_unlock(); clp->cl_owner_id = str; -- cgit v1.2.3 From 6c342655022d5189c45e4f7ed0cc8048c9ad9815 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Jun 2018 14:22:00 -0400 Subject: NFSv4: Return NFS4ERR_DELAY when a delegation recall fails due to igrab() If the attempt to recall the delegation fails because the inode is in the process of being evicted from cache, then use NFS4ERR_DELAY to ask the server to retry later. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 8 ++++++-- fs/nfs/delegation.c | 16 +++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index d561161b7c3e..eacd09dcdad1 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -40,7 +40,9 @@ __be32 nfs4_callback_getattr(void *argp, void *resp, rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); inode = nfs_delegation_find_inode(cps->clp, &args->fh); - if (inode == NULL) { + if (IS_ERR(inode)) { + if (inode == ERR_PTR(-EAGAIN)) + res->status = htonl(NFS4ERR_DELAY); trace_nfs4_cb_getattr(cps->clp, &args->fh, NULL, -ntohl(res->status)); goto out; @@ -86,7 +88,9 @@ __be32 nfs4_callback_recall(void *argp, void *resp, res = htonl(NFS4ERR_BADHANDLE); inode = nfs_delegation_find_inode(cps->clp, &args->fh); - if (inode == NULL) { + if (IS_ERR(inode)) { + if (inode == ERR_PTR(-EAGAIN)) + res = htonl(NFS4ERR_DELAY); trace_nfs4_cb_recall(cps->clp, &args->fh, NULL, &args->stateid, -ntohl(res)); goto out; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 91c3737d69df..bbd0465535eb 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -856,12 +856,14 @@ nfs_delegation_find_inode_server(struct nfs_server *server, if (delegation->inode != NULL && nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { res = igrab(delegation->inode); + spin_unlock(&delegation->lock); + if (res != NULL) + return res; + return ERR_PTR(-EAGAIN); } spin_unlock(&delegation->lock); - if (res != NULL) - break; } - return res; + return ERR_PTR(-ENOENT); } /** @@ -876,16 +878,16 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle) { struct nfs_server *server; - struct inode *res = NULL; + struct inode *res; rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { res = nfs_delegation_find_inode_server(server, fhandle); - if (res != NULL) - break; + if (res != ERR_PTR(-ENOENT)) + return res; } rcu_read_unlock(); - return res; + return ERR_PTR(-ENOENT); } static void nfs_delegation_mark_reclaim_server(struct nfs_server *server) -- cgit v1.2.3 From ce5624f7e6675ae0425f0041bbad703ea41c784c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Jun 2018 14:31:25 -0400 Subject: NFSv4: Return NFS4ERR_DELAY when a layout recall fails due to igrab() Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index eacd09dcdad1..3a49bb19ef07 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -128,7 +128,6 @@ static struct inode *nfs_layout_find_inode_by_stateid(struct nfs_client *clp, struct inode *inode; struct pnfs_layout_hdr *lo; -restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry(lo, &server->layouts, plh_layouts) { if (stateid != NULL && @@ -136,20 +135,20 @@ restart: continue; inode = igrab(lo->plh_inode); if (!inode) - continue; + return ERR_PTR(-EAGAIN); if (!nfs_sb_active(inode->i_sb)) { rcu_read_unlock(); spin_unlock(&clp->cl_lock); iput(inode); spin_lock(&clp->cl_lock); rcu_read_lock(); - goto restart; + return ERR_PTR(-EAGAIN); } return inode; } } - return NULL; + return ERR_PTR(-ENOENT); } /* @@ -166,7 +165,6 @@ static struct inode *nfs_layout_find_inode_by_fh(struct nfs_client *clp, struct inode *inode; struct pnfs_layout_hdr *lo; -restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry(lo, &server->layouts, plh_layouts) { nfsi = NFS_I(lo->plh_inode); @@ -176,20 +174,20 @@ restart: continue; inode = igrab(lo->plh_inode); if (!inode) - continue; + return ERR_PTR(-EAGAIN); if (!nfs_sb_active(inode->i_sb)) { rcu_read_unlock(); spin_unlock(&clp->cl_lock); iput(inode); spin_lock(&clp->cl_lock); rcu_read_lock(); - goto restart; + return ERR_PTR(-EAGAIN); } return inode; } } - return NULL; + return ERR_PTR(-ENOENT); } static struct inode *nfs_layout_find_inode(struct nfs_client *clp, @@ -201,7 +199,7 @@ static struct inode *nfs_layout_find_inode(struct nfs_client *clp, spin_lock(&clp->cl_lock); rcu_read_lock(); inode = nfs_layout_find_inode_by_stateid(clp, stateid); - if (!inode) + if (inode == ERR_PTR(-ENOENT)) inode = nfs_layout_find_inode_by_fh(clp, fh); rcu_read_unlock(); spin_unlock(&clp->cl_lock); @@ -256,8 +254,11 @@ static u32 initiate_file_draining(struct nfs_client *clp, LIST_HEAD(free_me_list); ino = nfs_layout_find_inode(clp, &args->cbl_fh, &args->cbl_stateid); - if (!ino) - goto out; + if (IS_ERR(ino)) { + if (ino == ERR_PTR(-EAGAIN)) + rv = NFS4ERR_DELAY; + goto out_noput; + } pnfs_layoutcommit_inode(ino, false); @@ -303,9 +304,10 @@ unlock: nfs_commit_inode(ino, 0); pnfs_put_layout_hdr(lo); out: + nfs_iput_and_deactive(ino); +out_noput: trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino, &args->cbl_stateid, -rv); - nfs_iput_and_deactive(ino); return rv; } -- cgit v1.2.3 From fc40724fc6731d90cc7fb6d62d66135f85a33dd2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 9 Jun 2018 12:43:06 -0400 Subject: NFSv4: Revert commit 5f83d86cf531d ("NFSv4.x: Fix wraparound issues..") The correct behaviour for NFSv4 sequence IDs is to wrap around to the value 0 after 0xffffffff. See https://tools.ietf.org/html/rfc5661#section-2.10.6.1 Fixes: 5f83d86cf531d ("NFSv4.x: Fix wraparound issues when validing...") Cc: stable@vger.kernel.org # 4.6+ Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 3a49bb19ef07..ee81031cab29 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -428,11 +428,8 @@ validate_seqid(const struct nfs4_slot_table *tbl, const struct nfs4_slot *slot, return htonl(NFS4ERR_SEQ_FALSE_RETRY); } - /* Wraparound */ - if (unlikely(slot->seq_nr == 0xFFFFFFFFU)) { - if (args->csa_sequenceid == 1) - return htonl(NFS4_OK); - } else if (likely(args->csa_sequenceid == slot->seq_nr + 1)) + /* Note: wraparound relies on seq_nr being of type u32 */ + if (likely(args->csa_sequenceid == slot->seq_nr + 1)) return htonl(NFS4_OK); /* Misordered request */ -- cgit v1.2.3 From 995891006ccbb73c0c9c3923cf9d25c4d07ec16b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 9 Jun 2018 12:50:50 -0400 Subject: NFSv4: Fix a typo in nfs41_sequence_process We want to compare the slot_id to the highest slot number advertised by the server. Fixes: 3be0f80b5fe9c ("NFSv4.1: Fix up replays of interrupted requests") Cc: stable@vger.kernel.org # 4.15+ Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e39f59b5e6d2..c5c5d6c9af25 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -780,7 +780,7 @@ static int nfs41_sequence_process(struct rpc_task *task, * The slot id we used was probably retired. Try again * using a different slot id. */ - if (slot->seq_nr < slot->table->target_highest_slotid) + if (slot->slot_nr < slot->table->target_highest_slotid) goto session_recover; goto retry_nowait; case -NFS4ERR_SEQ_MISORDERED: -- cgit v1.2.3 From f9312a541050007ec59eb0106273a0a10718cd83 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 9 Jun 2018 19:10:31 -0400 Subject: NFSv4.1: Fix the client behaviour on NFS4ERR_SEQ_FALSE_RETRY If the server returns NFS4ERR_SEQ_FALSE_RETRY or NFS4ERR_RETRY_UNCACHED_REP, then it thinks we're trying to replay an existing request. If so, then let's just bump the sequence ID and retry the operation. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c5c5d6c9af25..ed45090e4df6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -775,6 +775,13 @@ static int nfs41_sequence_process(struct rpc_task *task, slot->slot_nr, slot->seq_nr); goto out_retry; + case -NFS4ERR_RETRY_UNCACHED_REP: + case -NFS4ERR_SEQ_FALSE_RETRY: + /* + * The server thinks we tried to replay a request. + * Retry the call after bumping the sequence ID. + */ + goto retry_new_seq; case -NFS4ERR_BADSLOT: /* * The slot id we used was probably retired. Try again @@ -799,10 +806,6 @@ static int nfs41_sequence_process(struct rpc_task *task, goto retry_nowait; } goto session_recover; - case -NFS4ERR_SEQ_FALSE_RETRY: - if (interrupted) - goto retry_new_seq; - goto session_recover; default: /* Just update the slot sequence no. */ slot->seq_done = 1; -- cgit v1.2.3 From 93b7f7ad2018d2037559b1d0892417864c78b371 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 11 Jun 2018 15:32:06 -0400 Subject: skip LAYOUTRETURN if layout is invalid Currently, when IO to DS fails, client returns the layout and retries against the MDS. However, then on umounting (inode eviction) it returns the layout again. This is because pnfs_return_layout() was changed in commit d78471d32bb6 ("pnfs/blocklayout: set PNFS_LAYOUTRETURN_ON_ERROR") to always set NFS_LAYOUT_RETURN_REQUESTED so even if we returned the layout, it will be returned again. Instead, let's also check if we have already marked the layout invalid. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d93942fa3817..bcc3addec3c5 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1211,7 +1211,7 @@ _pnfs_return_layout(struct inode *ino) LIST_HEAD(tmp_list); nfs4_stateid stateid; int status = 0; - bool send; + bool send, valid_layout; dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); @@ -1232,6 +1232,7 @@ _pnfs_return_layout(struct inode *ino) goto out_put_layout_hdr; spin_lock(&ino->i_lock); } + valid_layout = pnfs_layout_is_valid(lo); pnfs_clear_layoutcommit(ino, &tmp_list); pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0); @@ -1245,7 +1246,8 @@ _pnfs_return_layout(struct inode *ino) } /* Don't send a LAYOUTRETURN if list was initially empty */ - if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { + if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) || + !valid_layout) { spin_unlock(&ino->i_lock); dprintk("NFS: %s no layout segments to return\n", __func__); goto out_put_layout_hdr; -- cgit v1.2.3