diff options
Diffstat (limited to 'fs/nfsd')
-rw-r--r-- | fs/nfsd/Kconfig | 13 | ||||
-rw-r--r-- | fs/nfsd/Makefile | 16 | ||||
-rw-r--r-- | fs/nfsd/auth.c | 3 | ||||
-rw-r--r-- | fs/nfsd/export.c | 28 | ||||
-rw-r--r-- | fs/nfsd/filecache.c | 176 | ||||
-rw-r--r-- | fs/nfsd/filecache.h | 10 | ||||
-rw-r--r-- | fs/nfsd/localio.c | 72 | ||||
-rw-r--r-- | fs/nfsd/netns.h | 18 | ||||
-rw-r--r-- | fs/nfsd/nfs4callback.c | 204 | ||||
-rw-r--r-- | fs/nfsd/nfs4layouts.c | 7 | ||||
-rw-r--r-- | fs/nfsd/nfs4proc.c | 36 | ||||
-rw-r--r-- | fs/nfsd/nfs4recover.c | 11 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 637 | ||||
-rw-r--r-- | fs/nfsd/nfs4xdr.c | 359 | ||||
-rw-r--r-- | fs/nfsd/nfs4xdr_gen.c | 256 | ||||
-rw-r--r-- | fs/nfsd/nfs4xdr_gen.h | 25 | ||||
-rw-r--r-- | fs/nfsd/nfsctl.c | 127 | ||||
-rw-r--r-- | fs/nfsd/nfsd.h | 13 | ||||
-rw-r--r-- | fs/nfsd/nfsfh.c | 4 | ||||
-rw-r--r-- | fs/nfsd/nfsfh.h | 7 | ||||
-rw-r--r-- | fs/nfsd/nfssvc.c | 83 | ||||
-rw-r--r-- | fs/nfsd/state.h | 56 | ||||
-rw-r--r-- | fs/nfsd/stats.c | 4 | ||||
-rw-r--r-- | fs/nfsd/stats.h | 2 | ||||
-rw-r--r-- | fs/nfsd/trace.h | 25 | ||||
-rw-r--r-- | fs/nfsd/vfs.c | 150 | ||||
-rw-r--r-- | fs/nfsd/xdr4.h | 2 | ||||
-rw-r--r-- | fs/nfsd/xdr4cb.h | 10 |
28 files changed, 1616 insertions, 738 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index c0bd1509ccd4..731a88f6313e 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -4,6 +4,7 @@ config NFSD depends on INET depends on FILE_LOCKING depends on FSNOTIFY + select CRC32 select LOCKD select SUNRPC select EXPORTFS @@ -172,6 +173,16 @@ config NFSD_LEGACY_CLIENT_TRACKING recoverydir, or spawn a process directly using a usermodehelper upcall. - These legacy client tracking methods have proven to be probelmatic + These legacy client tracking methods have proven to be problematic and will be removed in the future. Say Y here if you need support for them in the interim. + +config NFSD_V4_DELEG_TIMESTAMPS + bool "Support delegated timestamps" + depends on NFSD_V4 + default n + help + NFSD implements delegated timestamps according to + draft-ietf-nfsv4-delstid-08 "Extending the Opening of Files". This + is currently an experimental feature and is therefore left disabled + by default. diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 18cbd3fa7691..2f687619f65b 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -18,9 +18,23 @@ nfsd-$(CONFIG_NFSD_V2) += nfsproc.o nfsxdr.o nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ - nfs4acl.o nfs4callback.o nfs4recover.o + nfs4acl.o nfs4callback.o nfs4recover.o nfs4xdr_gen.o nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o nfsd-$(CONFIG_NFSD_FLEXFILELAYOUT) += flexfilelayout.o flexfilelayoutxdr.o nfsd-$(CONFIG_NFS_LOCALIO) += localio.o + + +.PHONY: xdrgen + +xdrgen: ../../include/linux/sunrpc/xdrgen/nfs4_1.h nfs4xdr_gen.h nfs4xdr_gen.c + +../../include/linux/sunrpc/xdrgen/nfs4_1.h: ../../Documentation/sunrpc/xdr/nfs4_1.x + ../../tools/net/sunrpc/xdrgen/xdrgen definitions $< > $@ + +nfs4xdr_gen.h: ../../Documentation/sunrpc/xdr/nfs4_1.x + ../../tools/net/sunrpc/xdrgen/xdrgen declarations $< > $@ + +nfs4xdr_gen.c: ../../Documentation/sunrpc/xdr/nfs4_1.x + ../../tools/net/sunrpc/xdrgen/xdrgen source $< > $@ diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 93e33d1ee891..4dc327e02456 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -27,7 +27,7 @@ int nfsd_setuser(struct svc_cred *cred, struct svc_export *exp) int flags = nfsexp_flags(cred, exp); /* discard any old override before preparing the new set */ - revert_creds(get_cred(current_real_cred())); + put_cred(revert_creds(get_cred(current_real_cred()))); new = prepare_creds(); if (!new) return -ENOMEM; @@ -80,7 +80,6 @@ int nfsd_setuser(struct svc_cred *cred, struct svc_export *exp) new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); put_cred(override_creds(new)); - put_cred(new); return 0; oom: diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index aa4712362b3b..88ae410b4113 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -355,16 +355,25 @@ static void export_stats_destroy(struct export_stats *stats) EXP_STATS_COUNTERS_NUM); } -static void svc_export_put(struct kref *ref) +static void svc_export_release(struct rcu_head *rcu_head) { - struct svc_export *exp = container_of(ref, struct svc_export, h.ref); - path_put(&exp->ex_path); - auth_domain_put(exp->ex_client); + struct svc_export *exp = container_of(rcu_head, struct svc_export, + ex_rcu); + nfsd4_fslocs_free(&exp->ex_fslocs); export_stats_destroy(exp->ex_stats); kfree(exp->ex_stats); kfree(exp->ex_uuid); - kfree_rcu(exp, ex_rcu); + kfree(exp); +} + +static void svc_export_put(struct kref *ref) +{ + struct svc_export *exp = container_of(ref, struct svc_export, h.ref); + + path_put(&exp->ex_path); + auth_domain_put(exp->ex_client); + call_rcu(&exp->ex_rcu, svc_export_release); } static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) @@ -1115,7 +1124,8 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp, test_bit(XPT_PEER_AUTH, &xprt->xpt_flags)) goto ok; } - goto denied; + if (!may_bypass_gss) + goto denied; ok: /* legacy gss-only clients are always OK: */ @@ -1425,13 +1435,9 @@ static int e_show(struct seq_file *m, void *p) return 0; } - if (!cache_get_rcu(&exp->h)) - return 0; - - if (cache_check(cd, &exp->h, NULL)) + if (cache_check_rcu(cd, &exp->h, NULL)) return 0; - exp_put(exp); return svc_export_show(m, cd, cp); } diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 78f4b5573b90..e108b6c705b4 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -39,6 +39,7 @@ #include <linux/fsnotify.h> #include <linux/seq_file.h> #include <linux/rhashtable.h> +#include <linux/nfslocalio.h> #include "vfs.h" #include "nfsd.h" @@ -318,15 +319,14 @@ nfsd_file_check_writeback(struct nfsd_file *nf) mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); } - -static bool nfsd_file_lru_add(struct nfsd_file *nf) +static void nfsd_file_lru_add(struct nfsd_file *nf) { - set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (list_lru_add_obj(&nfsd_file_lru, &nf->nf_lru)) { + refcount_inc(&nf->nf_ref); + if (list_lru_add_obj(&nfsd_file_lru, &nf->nf_lru)) trace_nfsd_file_lru_add(nf); - return true; - } - return false; + else + WARN_ON(1); + nfsd_file_schedule_laundrette(); } static bool nfsd_file_lru_remove(struct nfsd_file *nf) @@ -362,51 +362,57 @@ nfsd_file_put(struct nfsd_file *nf) if (test_bit(NFSD_FILE_GC, &nf->nf_flags) && test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - /* - * If this is the last reference (nf_ref == 1), then try to - * transfer it to the LRU. - */ - if (refcount_dec_not_one(&nf->nf_ref)) - return; - - /* Try to add it to the LRU. If that fails, decrement. */ - if (nfsd_file_lru_add(nf)) { - /* If it's still hashed, we're done */ - if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_schedule_laundrette(); - return; - } - - /* - * We're racing with unhashing, so try to remove it from - * the LRU. If removal fails, then someone else already - * has our reference. - */ - if (!nfsd_file_lru_remove(nf)) - return; - } + set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); + set_bit(NFSD_FILE_RECENT, &nf->nf_flags); } + if (refcount_dec_and_test(&nf->nf_ref)) nfsd_file_free(nf); } /** - * nfsd_file_put_local - put nfsd_file reference and arm nfsd_serv_put in caller + * nfsd_file_put_local - put nfsd_file reference and arm nfsd_net_put in caller * @nf: nfsd_file of which to put the reference * * First save the associated net to return to caller, then put * the reference of the nfsd_file. */ struct net * -nfsd_file_put_local(struct nfsd_file *nf) +nfsd_file_put_local(struct nfsd_file __rcu **pnf) { - struct net *net = nf->nf_net; + struct nfsd_file *nf; + struct net *net = NULL; - nfsd_file_put(nf); + nf = unrcu_pointer(xchg(pnf, NULL)); + if (nf) { + net = nf->nf_net; + nfsd_file_put(nf); + } return net; } /** + * nfsd_file_get_local - get nfsd_file reference and reference to net + * @nf: nfsd_file of which to put the reference + * + * Get reference to both the nfsd_file and nf->nf_net. + */ +struct nfsd_file * +nfsd_file_get_local(struct nfsd_file *nf) +{ + struct net *net = nf->nf_net; + + if (nfsd_net_try_get(net)) { + nf = nfsd_file_get(nf); + if (!nf) + nfsd_net_put(net); + } else { + nf = NULL; + } + return nf; +} + +/** * nfsd_file_file - get the backing file of an nfsd_file * @nf: nfsd_file of which to access the backing file. * @@ -529,13 +535,12 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, } /* - * Put the reference held on behalf of the LRU. If it wasn't the last - * one, then just remove it from the LRU and ignore it. + * Put the reference held on behalf of the LRU if it is the last + * reference, else rotate. */ - if (!refcount_dec_and_test(&nf->nf_ref)) { + if (!refcount_dec_if_one(&nf->nf_ref)) { trace_nfsd_file_gc_in_use(nf); - list_lru_isolate(lru, &nf->nf_lru); - return LRU_REMOVED; + return LRU_ROTATE; } /* Refcount went to zero. Unhash it and queue it to the dispose list */ @@ -547,14 +552,54 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, return LRU_REMOVED; } +static enum lru_status +nfsd_file_gc_cb(struct list_head *item, struct list_lru_one *lru, + void *arg) +{ + struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); + + if (test_and_clear_bit(NFSD_FILE_RECENT, &nf->nf_flags)) { + /* + * "REFERENCED" really means "should be at the end of the + * LRU. As we are putting it there we can clear the flag. + */ + clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); + trace_nfsd_file_gc_aged(nf); + return LRU_ROTATE; + } + return nfsd_file_lru_cb(item, lru, arg); +} + +/* If the shrinker runs between calls to list_lru_walk_node() in + * nfsd_file_gc(), the "remaining" count will be wrong. This could + * result in premature freeing of some files. This may not matter much + * but is easy to fix with this spinlock which temporarily disables + * the shrinker. + */ +static DEFINE_SPINLOCK(nfsd_gc_lock); static void nfsd_file_gc(void) { + unsigned long ret = 0; LIST_HEAD(dispose); - unsigned long ret; + int nid; + + spin_lock(&nfsd_gc_lock); + for_each_node_state(nid, N_NORMAL_MEMORY) { + unsigned long remaining = list_lru_count_node(&nfsd_file_lru, nid); - ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, - &dispose, list_lru_count(&nfsd_file_lru)); + while (remaining > 0) { + unsigned long nr = min(remaining, NFSD_FILE_GC_BATCH); + + remaining -= nr; + ret += list_lru_walk_node(&nfsd_file_lru, nid, nfsd_file_gc_cb, + &dispose, &nr); + if (nr) + /* walk aborted early */ + remaining = 0; + } + } + spin_unlock(&nfsd_gc_lock); trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); nfsd_file_dispose_list_delayed(&dispose); } @@ -562,9 +607,9 @@ nfsd_file_gc(void) static void nfsd_file_gc_worker(struct work_struct *work) { - nfsd_file_gc(); if (list_lru_count(&nfsd_file_lru)) - nfsd_file_schedule_laundrette(); + nfsd_file_gc(); + nfsd_file_schedule_laundrette(); } static unsigned long @@ -579,8 +624,12 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) LIST_HEAD(dispose); unsigned long ret; + if (!spin_trylock(&nfsd_gc_lock)) + return SHRINK_STOP; + ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &dispose); + spin_unlock(&nfsd_gc_lock); trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); nfsd_file_dispose_list_delayed(&dispose); return ret; @@ -685,17 +734,12 @@ nfsd_file_close_inode(struct inode *inode) void nfsd_file_close_inode_sync(struct inode *inode) { - struct nfsd_file *nf; LIST_HEAD(dispose); trace_nfsd_file_close(inode); nfsd_file_queue_for_close(inode, &dispose); - while (!list_empty(&dispose)) { - nf = list_first_entry(&dispose, struct nfsd_file, nf_gc); - list_del_init(&nf->nf_gc); - nfsd_file_free(nf); - } + nfsd_file_dispose_list(&dispose); } static int @@ -842,6 +886,14 @@ __nfsd_file_cache_purge(struct net *net) struct nfsd_file *nf; LIST_HEAD(dispose); +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + if (net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + nfs_localio_invalidate_clients(&nn->local_clients, + &nn->local_clients_lock); + } +#endif + rhltable_walk_enter(&nfsd_file_rhltable, &iter); do { rhashtable_walk_start(&iter); @@ -1049,16 +1101,8 @@ retry: nf = nfsd_file_lookup_locked(net, current_cred(), inode, need, want_gc); rcu_read_unlock(); - if (nf) { - /* - * If the nf is on the LRU then it holds an extra reference - * that must be put if it's removed. It had better not be - * the last one however, since we should hold another. - */ - if (nfsd_file_lru_remove(nf)) - refcount_dec(&nf->nf_ref); + if (nf) goto wait_for_construction; - } new = nfsd_file_alloc(net, inode, need, want_gc); if (!new) { @@ -1152,6 +1196,9 @@ open_file: */ if (status != nfs_ok || inode->i_nlink == 0) nfsd_file_unhash(nf); + else if (want_gc) + nfsd_file_lru_add(nf); + clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); if (status == nfs_ok) goto out; @@ -1231,10 +1278,9 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, * a file. The security implications of this should be carefully * considered before use. * - * The nfsd_file object returned by this API is reference-counted - * and garbage-collected. The object is retained for a few - * seconds after the final nfsd_file_put() in case the caller - * wants to re-use it. + * The nfsd_file_object returned by this API is reference-counted + * but not garbage-collected. The object is unhashed after the + * final nfsd_file_put(). * * Return values: * %nfs_ok - @pnf points to an nfsd_file with its reference @@ -1256,8 +1302,8 @@ nfsd_file_acquire_local(struct net *net, struct svc_cred *cred, __be32 beres; beres = nfsd_file_do_acquire(NULL, net, cred, client, - fhp, may_flags, NULL, pnf, true); - revert_creds(save_cred); + fhp, may_flags, NULL, pnf, false); + put_cred(revert_creds(save_cred)); return beres; } diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index d5db6b34ba30..722b26c71e45 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -4,6 +4,12 @@ #include <linux/fsnotify_backend.h> /* + * Limit the time that the list_lru_one lock is held during + * an LRU scan. + */ +#define NFSD_FILE_GC_BATCH (16UL) + +/* * This is the fsnotify_mark container that nfsd attaches to the files that it * is holding open. Note that we have a separate refcount here aside from the * one in the fsnotify_mark. We only want a single fsnotify_mark attached to @@ -38,6 +44,7 @@ struct nfsd_file { #define NFSD_FILE_PENDING (1) #define NFSD_FILE_REFERENCED (2) #define NFSD_FILE_GC (3) +#define NFSD_FILE_RECENT (4) unsigned long nf_flags; refcount_t nf_ref; unsigned char nf_may; @@ -55,7 +62,8 @@ void nfsd_file_cache_shutdown(void); int nfsd_file_cache_start_net(struct net *net); void nfsd_file_cache_shutdown_net(struct net *net); void nfsd_file_put(struct nfsd_file *nf); -struct net *nfsd_file_put_local(struct nfsd_file *nf); +struct net *nfsd_file_put_local(struct nfsd_file __rcu **nf); +struct nfsd_file *nfsd_file_get_local(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); struct file *nfsd_file_file(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c index f441cb9f74d5..519bbdedcb11 100644 --- a/fs/nfsd/localio.c +++ b/fs/nfsd/localio.c @@ -24,19 +24,6 @@ #include "filecache.h" #include "cache.h" -static const struct nfsd_localio_operations nfsd_localio_ops = { - .nfsd_serv_try_get = nfsd_serv_try_get, - .nfsd_serv_put = nfsd_serv_put, - .nfsd_open_local_fh = nfsd_open_local_fh, - .nfsd_file_put_local = nfsd_file_put_local, - .nfsd_file_file = nfsd_file_file, -}; - -void nfsd_localio_ops_init(void) -{ - nfs_to = &nfsd_localio_ops; -} - /** * nfsd_open_local_fh - lookup a local filehandle @nfs_fh and map to nfsd_file * @@ -45,6 +32,7 @@ void nfsd_localio_ops_init(void) * @rpc_clnt: rpc_clnt that the client established * @cred: cred that the client established * @nfs_fh: filehandle to lookup + * @nfp: place to find the nfsd_file, or store it if it was non-NULL * @fmode: fmode_t to use for open * * This function maps a local fh to a path on a local filesystem. @@ -52,13 +40,14 @@ void nfsd_localio_ops_init(void) * avoid all the NFS overhead with reads, writes and commits. * * On successful return, returned nfsd_file will have its nf_net member - * set. Caller (NFS client) is responsible for calling nfsd_serv_put and + * set. Caller (NFS client) is responsible for calling nfsd_net_put and * nfsd_file_put (via nfs_to_nfsd_file_put_local). */ -struct nfsd_file * +static struct nfsd_file * nfsd_open_local_fh(struct net *net, struct auth_domain *dom, struct rpc_clnt *rpc_clnt, const struct cred *cred, - const struct nfs_fh *nfs_fh, const fmode_t fmode) + const struct nfs_fh *nfs_fh, struct nfsd_file __rcu **pnf, + const fmode_t fmode) { int mayflags = NFSD_MAY_LOCALIO; struct svc_cred rq_cred; @@ -69,6 +58,15 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom, if (nfs_fh->size > NFS4_FHSIZE) return ERR_PTR(-EINVAL); + if (!nfsd_net_try_get(net)) + return ERR_PTR(-ENXIO); + + rcu_read_lock(); + localio = nfsd_file_get(rcu_dereference(*pnf)); + rcu_read_unlock(); + if (localio) + return localio; + /* nfs_fh -> svc_fh */ fh_init(&fh, NFS4_FHSIZE); fh.fh_handle.fh_size = nfs_fh->size; @@ -90,9 +88,48 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom, if (rq_cred.cr_group_info) put_group_info(rq_cred.cr_group_info); + if (!IS_ERR(localio)) { + struct nfsd_file *new; + if (!nfsd_net_try_get(net)) { + nfsd_file_put(localio); + nfsd_net_put(net); + return ERR_PTR(-ENXIO); + } + nfsd_file_get(localio); + again: + new = unrcu_pointer(cmpxchg(pnf, NULL, RCU_INITIALIZER(localio))); + if (new) { + /* Some other thread installed an nfsd_file */ + if (nfsd_file_get(new) == NULL) + goto again; + /* + * Drop the ref we were going to install (both file and + * net) and the one we were going to return (only file). + */ + nfsd_file_put(localio); + nfsd_net_put(net); + nfsd_file_put(localio); + localio = new; + } + } else + nfsd_net_put(net); + return localio; } -EXPORT_SYMBOL_GPL(nfsd_open_local_fh); + +static const struct nfsd_localio_operations nfsd_localio_ops = { + .nfsd_net_try_get = nfsd_net_try_get, + .nfsd_net_put = nfsd_net_put, + .nfsd_open_local_fh = nfsd_open_local_fh, + .nfsd_file_put_local = nfsd_file_put_local, + .nfsd_file_get_local = nfsd_file_get_local, + .nfsd_file_file = nfsd_file_file, +}; + +void nfsd_localio_ops_init(void) +{ + nfs_to = &nfsd_localio_ops; +} /* * UUID_IS_LOCAL XDR functions @@ -114,6 +151,7 @@ static __be32 localio_proc_uuid_is_local(struct svc_rqst *rqstp) struct nfsd_net *nn = net_generic(net, nfsd_net_id); nfs_uuid_is_local(&argp->uuid, &nn->local_clients, + &nn->local_clients_lock, net, rqstp->rq_client, THIS_MODULE); return rpc_success; diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 26f7b34d1a03..3e2d0fde80a7 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -128,21 +128,16 @@ struct nfsd_net { seqlock_t writeverf_lock; unsigned char writeverf[8]; - /* - * Max number of connections this nfsd container will allow. Defaults - * to '0' which is means that it bases this on the number of threads. - */ - unsigned int max_connections; - u32 clientid_base; u32 clientid_counter; u32 clverifier_counter; struct svc_info nfsd_info; #define nfsd_serv nfsd_info.serv - struct percpu_ref nfsd_serv_ref; - struct completion nfsd_serv_confirm_done; - struct completion nfsd_serv_free_done; + + struct percpu_ref nfsd_net_ref; + struct completion nfsd_net_confirm_done; + struct completion nfsd_net_free_done; /* * clientid and stateid data for construction of net unique COPY @@ -219,6 +214,7 @@ struct nfsd_net { #if IS_ENABLED(CONFIG_NFS_LOCALIO) /* Local clients to be invalidated when net is shut down */ + spinlock_t local_clients_lock; struct list_head local_clients; #endif }; @@ -229,8 +225,8 @@ struct nfsd_net { extern bool nfsd_support_version(int vers); extern unsigned int nfsd_net_id; -bool nfsd_serv_try_get(struct net *net); -void nfsd_serv_put(struct net *net); +bool nfsd_net_try_get(struct net *net); +void nfsd_net_put(struct net *net); void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn); void nfsd_reset_write_verifier(struct nfsd_net *nn); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index b7b70ab962f8..ec6539cec0fe 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -42,11 +42,10 @@ #include "trace.h" #include "xdr4cb.h" #include "xdr4.h" +#include "nfs4xdr_gen.h" #define NFSDDBG_FACILITY NFSDDBG_PROC -static void nfsd4_mark_cb_fault(struct nfs4_client *clp); - #define NFSPROC4_CB_NULL 0 #define NFSPROC4_CB_COMPOUND 1 @@ -93,12 +92,35 @@ static int decode_cb_fattr4(struct xdr_stream *xdr, uint32_t *bitmap, { fattr->ncf_cb_change = 0; fattr->ncf_cb_fsize = 0; + fattr->ncf_cb_atime.tv_sec = 0; + fattr->ncf_cb_atime.tv_nsec = 0; + fattr->ncf_cb_mtime.tv_sec = 0; + fattr->ncf_cb_mtime.tv_nsec = 0; + if (bitmap[0] & FATTR4_WORD0_CHANGE) if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_change) < 0) - return -NFSERR_BAD_XDR; + return -EIO; if (bitmap[0] & FATTR4_WORD0_SIZE) if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_fsize) < 0) - return -NFSERR_BAD_XDR; + return -EIO; + if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) { + fattr4_time_deleg_access access; + + if (!xdrgen_decode_fattr4_time_deleg_access(xdr, &access)) + return -EIO; + fattr->ncf_cb_atime.tv_sec = access.seconds; + fattr->ncf_cb_atime.tv_nsec = access.nseconds; + + } + if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) { + fattr4_time_deleg_modify modify; + + if (!xdrgen_decode_fattr4_time_deleg_modify(xdr, &modify)) + return -EIO; + fattr->ncf_cb_mtime.tv_sec = modify.seconds; + fattr->ncf_cb_mtime.tv_nsec = modify.nseconds; + + } return 0; } @@ -361,16 +383,24 @@ static void encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr, struct nfs4_cb_fattr *fattr) { - struct nfs4_delegation *dp = - container_of(fattr, struct nfs4_delegation, dl_cb_fattr); + struct nfs4_delegation *dp = container_of(fattr, struct nfs4_delegation, dl_cb_fattr); struct knfsd_fh *fh = &dp->dl_stid.sc_file->fi_fhandle; - u32 bmap[1]; - - bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE; - + struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr; + u32 bmap_size = 1; + u32 bmap[3]; + + bmap[0] = FATTR4_WORD0_SIZE; + if (!ncf->ncf_file_modified) + bmap[0] |= FATTR4_WORD0_CHANGE; + + if (deleg_attrs_deleg(dp->dl_type)) { + bmap[1] = 0; + bmap[2] = FATTR4_WORD2_TIME_DELEG_ACCESS | FATTR4_WORD2_TIME_DELEG_MODIFY; + bmap_size = 3; + } encode_nfs_cb_opnum4(xdr, OP_CB_GETATTR); encode_nfs_fh4(xdr, fh); - encode_bitmap4(xdr, bmap, ARRAY_SIZE(bmap)); + encode_bitmap4(xdr, bmap, bmap_size); hdr->nops++; } @@ -634,7 +664,7 @@ static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp, struct nfs4_cb_compound_hdr hdr; int status; u32 bitmap[3] = {0}; - u32 attrlen; + u32 attrlen, maxlen; struct nfs4_cb_fattr *ncf = container_of(cb, struct nfs4_cb_fattr, ncf_getattr); @@ -647,14 +677,18 @@ static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp, return status; status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status); - if (status) + if (unlikely(status || cb->cb_status)) return status; if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0) - return -NFSERR_BAD_XDR; + return -EIO; if (xdr_stream_decode_u32(xdr, &attrlen) < 0) - return -NFSERR_BAD_XDR; - if (attrlen > (sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize))) - return -NFSERR_BAD_XDR; + return -EIO; + maxlen = sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize); + if (bitmap[2] != 0) + maxlen += (sizeof(ncf->ncf_cb_mtime.tv_sec) + + sizeof(ncf->ncf_cb_mtime.tv_nsec)) * 2; + if (attrlen > maxlen) + return -EIO; status = decode_cb_fattr4(xdr, bitmap, ncf); return status; } @@ -1028,6 +1062,17 @@ static bool nfsd4_queue_cb(struct nfsd4_callback *cb) return queue_work(clp->cl_callback_wq, &cb->cb_work); } +static void nfsd4_requeue_cb(struct rpc_task *task, struct nfsd4_callback *cb) +{ + struct nfs4_client *clp = cb->cb_clp; + + if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) { + trace_nfsd_cb_restart(clp, cb); + task->tk_status = 0; + set_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags); + } +} + static void nfsd41_cb_inflight_begin(struct nfs4_client *clp) { atomic_inc(&clp->cl_cb_inflight); @@ -1036,8 +1081,7 @@ static void nfsd41_cb_inflight_begin(struct nfs4_client *clp) static void nfsd41_cb_inflight_end(struct nfs4_client *clp) { - if (atomic_dec_and_test(&clp->cl_cb_inflight)) - wake_up_var(&clp->cl_cb_inflight); + atomic_dec_and_wake_up(&clp->cl_cb_inflight); } static void nfsd41_cb_inflight_wait_complete(struct nfs4_client *clp) @@ -1266,6 +1310,11 @@ static void nfsd41_destroy_cb(struct nfsd4_callback *cb) trace_nfsd_cb_destroy(clp, cb); nfsd41_cb_release_slot(cb); + if (test_bit(NFSD4_CALLBACK_WAKE, &cb->cb_flags)) + clear_and_wake_up_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags); + else + clear_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags); + if (cb->cb_ops && cb->cb_ops->release) cb->cb_ops->release(cb); nfsd41_cb_inflight_end(clp); @@ -1293,30 +1342,14 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) rpc_call_start(task); } +/* Returns true if CB_COMPOUND processing should continue */ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback *cb) { - struct nfs4_client *clp = cb->cb_clp; - struct nfsd4_session *session = clp->cl_cb_session; - bool ret = true; - - if (!clp->cl_minorversion) { - /* - * If the backchannel connection was shut down while this - * task was queued, we need to resubmit it after setting up - * a new backchannel connection. - * - * Note that if we lost our callback connection permanently - * the submission code will error out, so we don't need to - * handle that case here. - */ - if (RPC_SIGNALLED(task)) - goto need_restart; - - return true; - } + struct nfsd4_session *session = cb->cb_clp->cl_cb_session; + bool ret = false; if (cb->cb_held_slot < 0) - goto need_restart; + goto requeue; /* This is the operation status code for CB_SEQUENCE */ trace_nfsd_cb_seq_status(task, cb); @@ -1330,11 +1363,16 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback * (sequence ID, cached reply) MUST NOT change. */ ++session->se_cb_seq_nr[cb->cb_held_slot]; + ret = true; break; case -ESERVERFAULT: - ++session->se_cb_seq_nr[cb->cb_held_slot]; + /* + * Call succeeded, but the session, slot index, or slot + * sequence number in the response do not match the same + * in the server's call. The sequence information is thus + * untrustworthy. + */ nfsd4_mark_cb_fault(cb->cb_clp); - ret = false; break; case 1: /* @@ -1346,43 +1384,42 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback fallthrough; case -NFS4ERR_BADSESSION: nfsd4_mark_cb_fault(cb->cb_clp); - ret = false; - goto need_restart; + goto requeue; case -NFS4ERR_DELAY: cb->cb_seq_status = 1; - if (!rpc_restart_call(task)) - goto out; - + if (RPC_SIGNALLED(task) || !rpc_restart_call(task)) + goto requeue; rpc_delay(task, 2 * HZ); return false; + case -NFS4ERR_SEQ_MISORDERED: case -NFS4ERR_BADSLOT: + /* + * A SEQ_MISORDERED or BADSLOT error means that the client and + * server are out of sync as to the backchannel parameters. Mark + * the backchannel faulty and restart the RPC, but leak the slot + * so that it's no longer used. + */ + nfsd4_mark_cb_fault(cb->cb_clp); + cb->cb_held_slot = -1; goto retry_nowait; - case -NFS4ERR_SEQ_MISORDERED: - if (session->se_cb_seq_nr[cb->cb_held_slot] != 1) { - session->se_cb_seq_nr[cb->cb_held_slot] = 1; - goto retry_nowait; - } - break; default: nfsd4_mark_cb_fault(cb->cb_clp); } trace_nfsd_cb_free_slot(task, cb); nfsd41_cb_release_slot(cb); - - if (RPC_SIGNALLED(task)) - goto need_restart; -out: return ret; retry_nowait: - if (rpc_restart_call_prepare(task)) - ret = false; - goto out; -need_restart: - if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) { - trace_nfsd_cb_restart(clp, cb); - task->tk_status = 0; - cb->cb_need_restart = true; + /* + * RPC_SIGNALLED() means that the rpc_client is being torn down and + * (possibly) recreated. Requeue the call in that case. + */ + if (!RPC_SIGNALLED(task)) { + if (rpc_restart_call_prepare(task)) + return false; } +requeue: + nfsd41_cb_release_slot(cb); + nfsd4_requeue_cb(task, cb); return false; } @@ -1393,12 +1430,26 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) trace_nfsd_cb_rpc_done(clp); - if (!nfsd4_cb_sequence_done(task, cb)) + if (!clp->cl_minorversion) { + /* + * If the backchannel connection was shut down while this + * task was queued, we need to resubmit it after setting up + * a new backchannel connection. + * + * Note that if we lost our callback connection permanently + * the submission code will error out, so we don't need to + * handle that case here. + */ + if (RPC_SIGNALLED(task)) + nfsd4_requeue_cb(task, cb); + } else if (!nfsd4_cb_sequence_done(task, cb)) { return; + } if (cb->cb_status) { - WARN_ONCE(task->tk_status, "cb_status=%d tk_status=%d", - cb->cb_status, task->tk_status); + WARN_ONCE(task->tk_status, + "cb_status=%d tk_status=%d cb_opcode=%d", + cb->cb_status, task->tk_status, cb->cb_ops->opcode); task->tk_status = cb->cb_status; } @@ -1426,7 +1477,7 @@ static void nfsd4_cb_release(void *calldata) trace_nfsd_cb_rpc_release(cb->cb_clp); - if (cb->cb_need_restart) + if (test_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags)) nfsd4_queue_cb(cb); else nfsd41_destroy_cb(cb); @@ -1539,7 +1590,7 @@ nfsd4_run_cb_work(struct work_struct *work) container_of(work, struct nfsd4_callback, cb_work); struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *clnt; - int flags; + int flags, ret; trace_nfsd_cb_start(clp); @@ -1565,16 +1616,19 @@ nfsd4_run_cb_work(struct work_struct *work) return; } - if (cb->cb_need_restart) { - cb->cb_need_restart = false; - } else { + if (!test_and_clear_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags)) { if (cb->cb_ops && cb->cb_ops->prepare) cb->cb_ops->prepare(cb); } + cb->cb_msg.rpc_cred = clp->cl_cb_cred; flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN; - rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags, - cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb); + ret = rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags, + cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb); + if (ret != 0) { + set_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags); + nfsd4_queue_cb(cb); + } } void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, @@ -1584,10 +1638,10 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op]; cb->cb_msg.rpc_argp = cb; cb->cb_msg.rpc_resp = cb; + cb->cb_flags = 0; cb->cb_ops = ops; INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); cb->cb_status = 0; - cb->cb_need_restart = false; cb->cb_held_slot = -1; } diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index fbfddd3c4c94..290271ac4245 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -344,9 +344,10 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls) atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls); trace_nfsd_layout_recall(&ls->ls_stid.sc_stateid); - refcount_inc(&ls->ls_stid.sc_count); - nfsd4_run_cb(&ls->ls_recall); - + if (!test_and_set_bit(NFSD4_CALLBACK_RUNNING, &ls->ls_recall.cb_flags)) { + refcount_inc(&ls->ls_stid.sc_count); + nfsd4_run_cb(&ls->ls_recall); + } out_unlock: spin_unlock(&ls->ls_lock); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ad44ad49274f..d0358c801c42 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1135,18 +1135,43 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, .na_iattr = &setattr->sa_iattr, .na_seclabel = &setattr->sa_label, }; + bool save_no_wcc, deleg_attrs; + struct nfs4_stid *st = NULL; struct inode *inode; __be32 status = nfs_ok; - bool save_no_wcc; int err; - if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { + deleg_attrs = setattr->sa_bmval[2] & (FATTR4_WORD2_TIME_DELEG_ACCESS | + FATTR4_WORD2_TIME_DELEG_MODIFY); + + if (deleg_attrs || (setattr->sa_iattr.ia_valid & ATTR_SIZE)) { + int flags = WR_STATE; + + if (setattr->sa_bmval[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) + flags |= RD_STATE; + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &setattr->sa_stateid, - WR_STATE, NULL, NULL); + flags, NULL, &st); if (status) return status; } + + if (deleg_attrs) { + status = nfserr_bad_stateid; + if (st->sc_type & SC_TYPE_DELEG) { + struct nfs4_delegation *dp = delegstateid(st); + + /* Only for *_ATTRS_DELEG flavors */ + if (deleg_attrs_deleg(dp->dl_type)) + status = nfs_ok; + } + } + if (st) + nfs4_put_stid(st); + if (status) + return status; + err = fh_want_write(&cstate->current_fh); if (err) return nfserrno(err); @@ -1822,7 +1847,7 @@ static void nfsd4_send_cb_offload(struct nfsd4_copy *copy) NFSPROC4_CLNT_CB_OFFLOAD); trace_nfsd_cb_offload(copy->cp_clp, &cbo->co_res.cb_stateid, &cbo->co_fh, copy->cp_count, copy->nfserr); - nfsd4_run_cb(&cbo->co_cb); + nfsd4_try_run_cb(&cbo->co_cb); } /** @@ -3741,7 +3766,8 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp) struct nfs4_op_map *allow = &cstate->clp->cl_spo_must_allow; u32 opiter; - if (!cstate->minorversion) + if (rqstp->rq_procinfo != &nfsd_version4.vs_proc[NFSPROC4_COMPOUND] || + cstate->minorversion == 0) return false; if (cstate->spo_must_allowed) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 4a765555bf84..c1d9bd07285f 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -82,14 +82,13 @@ nfs4_save_creds(const struct cred **original_creds) new->fsuid = GLOBAL_ROOT_UID; new->fsgid = GLOBAL_ROOT_GID; *original_creds = override_creds(new); - put_cred(new); return 0; } static void nfs4_reset_creds(const struct cred *original) { - revert_creds(original); + put_cred(revert_creds(original)); } static void @@ -234,9 +233,12 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) * as well be forgiving and just succeed silently. */ goto out_put; - status = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU); + dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU); + if (IS_ERR(dentry)) + status = PTR_ERR(dentry); out_put: - dput(dentry); + if (!status) + dput(dentry); out_unlock: inode_unlock(d_inode(dir)); if (status == 0) { @@ -2052,7 +2054,6 @@ static inline int check_for_legacy_methods(int status, struct net *net) path_put(&path); if (status) return -ENOTDIR; - status = nn->client_tracking_ops->init(net); } return status; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 741b9449f727..59a693f22452 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -946,15 +946,6 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla spin_lock_init(&stid->sc_lock); INIT_LIST_HEAD(&stid->sc_cp_list); - /* - * It shouldn't be a problem to reuse an opaque stateid value. - * I don't think it is for 4.1. But with 4.0 I worry that, for - * example, a stray write retransmission could be accepted by - * the server when it should have been rejected. Therefore, - * adopt a trick from the sctp code to attempt to maximize the - * amount of time until an id is reused, by ensuring they always - * "increase" (mod INT_MAX): - */ return stid; out_free: kmem_cache_free(slab, stid); @@ -1050,6 +1041,12 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) return openlockstateid(stid); } +/* + * As the sc_free callback of deleg, this may be called by nfs4_put_stid + * in nfsd_break_one_deleg. + * Considering nfsd_break_one_deleg is called with the flc->flc_lock held, + * this function mustn't ever sleep. + */ static void nfs4_free_deleg(struct nfs4_stid *stid) { struct nfs4_delegation *dp = delegstateid(stid); @@ -1378,7 +1375,8 @@ static void revoke_delegation(struct nfs4_delegation *dp) struct nfs4_client *clp = dp->dl_stid.sc_client; WARN_ON(!list_empty(&dp->dl_recall_lru)); - WARN_ON_ONCE(!(dp->dl_stid.sc_status & + WARN_ON_ONCE(dp->dl_stid.sc_client->cl_minorversion > 0 && + !(dp->dl_stid.sc_status & (SC_STATUS_REVOKED | SC_STATUS_ADMIN_REVOKED))); trace_nfsd_stid_revoke(&dp->dl_stid); @@ -1909,15 +1907,84 @@ gen_sessionid(struct nfsd4_session *ses) */ #define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) +static struct shrinker *nfsd_slot_shrinker; +static DEFINE_SPINLOCK(nfsd_session_list_lock); +static LIST_HEAD(nfsd_session_list); +/* The sum of "target_slots-1" on every session. The shrinker can push this + * down, though it can take a little while for the memory to actually + * be freed. The "-1" is because we can never free slot 0 while the + * session is active. + */ +static atomic_t nfsd_total_target_slots = ATOMIC_INIT(0); + static void -free_session_slots(struct nfsd4_session *ses) +free_session_slots(struct nfsd4_session *ses, int from) { int i; - for (i = 0; i < ses->se_fchannel.maxreqs; i++) { - free_svc_cred(&ses->se_slots[i]->sl_cred); - kfree(ses->se_slots[i]); + if (from >= ses->se_fchannel.maxreqs) + return; + + for (i = from; i < ses->se_fchannel.maxreqs; i++) { + struct nfsd4_slot *slot = xa_load(&ses->se_slots, i); + + /* + * Save the seqid in case we reactivate this slot. + * This will never require a memory allocation so GFP + * flag is irrelevant + */ + xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0); + free_svc_cred(&slot->sl_cred); + kfree(slot); + } + ses->se_fchannel.maxreqs = from; + if (ses->se_target_maxslots > from) { + int new_target = from ?: 1; + atomic_sub(ses->se_target_maxslots - new_target, &nfsd_total_target_slots); + ses->se_target_maxslots = new_target; + } +} + +/** + * reduce_session_slots - reduce the target max-slots of a session if possible + * @ses: The session to affect + * @dec: how much to decrease the target by + * + * This interface can be used by a shrinker to reduce the target max-slots + * for a session so that some slots can eventually be freed. + * It uses spin_trylock() as it may be called in a context where another + * spinlock is held that has a dependency on client_lock. As shrinkers are + * best-effort, skiping a session is client_lock is already held has no + * great coast + * + * Return value: + * The number of slots that the target was reduced by. + */ +static int +reduce_session_slots(struct nfsd4_session *ses, int dec) +{ + struct nfsd_net *nn = net_generic(ses->se_client->net, + nfsd_net_id); + int ret = 0; + + if (ses->se_target_maxslots <= 1) + return ret; + if (!spin_trylock(&nn->client_lock)) + return ret; + ret = min(dec, ses->se_target_maxslots-1); + ses->se_target_maxslots -= ret; + atomic_sub(ret, &nfsd_total_target_slots); + ses->se_slot_gen += 1; + if (ses->se_slot_gen == 0) { + int i; + ses->se_slot_gen = 1; + for (i = 0; i < ses->se_fchannel.maxreqs; i++) { + struct nfsd4_slot *slot = xa_load(&ses->se_slots, i); + slot->sl_generation = 0; + } } + spin_unlock(&nn->client_lock); + return ret; } /* @@ -1935,89 +2002,46 @@ static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca) return size + sizeof(struct nfsd4_slot); } -/* - * XXX: If we run out of reserved DRC memory we could (up to a point) - * re-negotiate active sessions and reduce their slot usage to make - * room for new connections. For now we just fail the create session. - */ -static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn) -{ - u32 slotsize = slot_bytes(ca); - u32 num = ca->maxreqs; - unsigned long avail, total_avail; - unsigned int scale_factor; - - spin_lock(&nfsd_drc_lock); - if (nfsd_drc_max_mem > nfsd_drc_mem_used) - total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used; - else - /* We have handed out more space than we chose in - * set_max_drc() to allow. That isn't really a - * problem as long as that doesn't make us think we - * have lots more due to integer overflow. - */ - total_avail = 0; - avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail); - /* - * Never use more than a fraction of the remaining memory, - * unless it's the only way to give this client a slot. - * The chosen fraction is either 1/8 or 1/number of threads, - * whichever is smaller. This ensures there are adequate - * slots to support multiple clients per thread. - * Give the client one slot even if that would require - * over-allocation--it is better than failure. - */ - scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads); - - avail = clamp_t(unsigned long, avail, slotsize, - total_avail/scale_factor); - num = min_t(int, num, avail / slotsize); - num = max_t(int, num, 1); - nfsd_drc_mem_used += num * slotsize; - spin_unlock(&nfsd_drc_lock); - - return num; -} - -static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca) -{ - int slotsize = slot_bytes(ca); - - spin_lock(&nfsd_drc_lock); - nfsd_drc_mem_used -= slotsize * ca->maxreqs; - spin_unlock(&nfsd_drc_lock); -} - static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, struct nfsd4_channel_attrs *battrs) { int numslots = fattrs->maxreqs; int slotsize = slot_bytes(fattrs); struct nfsd4_session *new; + struct nfsd4_slot *slot; int i; - BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION) - > PAGE_SIZE); - - new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL); + new = kzalloc(sizeof(*new), GFP_KERNEL); if (!new) return NULL; + xa_init(&new->se_slots); /* allocate each struct nfsd4_slot and data cache in one piece */ - for (i = 0; i < numslots; i++) { - new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL); - if (!new->se_slots[i]) - goto out_free; - } + slot = kzalloc(slotsize, GFP_KERNEL); + if (!slot || xa_is_err(xa_store(&new->se_slots, 0, slot, GFP_KERNEL))) + goto out_free; + for (i = 1; i < numslots; i++) { + const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; + slot = kzalloc(slotsize, gfp); + if (!slot) + break; + if (xa_is_err(xa_store(&new->se_slots, i, slot, gfp))) { + kfree(slot); + break; + } + } + fattrs->maxreqs = i; memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs)); + new->se_target_maxslots = i; + atomic_add(i - 1, &nfsd_total_target_slots); new->se_cb_slot_avail = ~0U; new->se_cb_highest_slot = min(battrs->maxreqs - 1, NFSD_BC_SLOT_TABLE_SIZE - 1); spin_lock_init(&new->se_lock); return new; out_free: - while (i--) - kfree(new->se_slots[i]); + kfree(slot); + xa_destroy(&new->se_slots); kfree(new); return NULL; } @@ -2123,17 +2147,47 @@ static void nfsd4_del_conns(struct nfsd4_session *s) static void __free_session(struct nfsd4_session *ses) { - free_session_slots(ses); + free_session_slots(ses, 0); + xa_destroy(&ses->se_slots); kfree(ses); } static void free_session(struct nfsd4_session *ses) { nfsd4_del_conns(ses); - nfsd4_put_drc_mem(&ses->se_fchannel); __free_session(ses); } +static unsigned long +nfsd_slot_count(struct shrinker *s, struct shrink_control *sc) +{ + unsigned long cnt = atomic_read(&nfsd_total_target_slots); + + return cnt ? cnt : SHRINK_EMPTY; +} + +static unsigned long +nfsd_slot_scan(struct shrinker *s, struct shrink_control *sc) +{ + struct nfsd4_session *ses; + unsigned long scanned = 0; + unsigned long freed = 0; + + spin_lock(&nfsd_session_list_lock); + list_for_each_entry(ses, &nfsd_session_list, se_all_sessions) { + freed += reduce_session_slots(ses, 1); + scanned += 1; + if (scanned >= sc->nr_to_scan) { + /* Move starting point for next scan */ + list_move(&nfsd_session_list, &ses->se_all_sessions); + break; + } + } + spin_unlock(&nfsd_session_list_lock); + sc->nr_scanned = scanned; + return freed; +} + static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses) { int idx; @@ -2158,6 +2212,10 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru list_add(&new->se_perclnt, &clp->cl_sessions); spin_unlock(&clp->cl_lock); + spin_lock(&nfsd_session_list_lock); + list_add_tail(&new->se_all_sessions, &nfsd_session_list); + spin_unlock(&nfsd_session_list_lock); + { struct sockaddr *sa = svc_addr(rqstp); /* @@ -2227,6 +2285,9 @@ unhash_session(struct nfsd4_session *ses) spin_lock(&ses->se_client->cl_lock); list_del(&ses->se_perclnt); spin_unlock(&ses->se_client->cl_lock); + spin_lock(&nfsd_session_list_lock); + list_del(&ses->se_all_sessions); + spin_unlock(&nfsd_session_list_lock); } /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ @@ -2362,8 +2423,12 @@ unhash_client_locked(struct nfs4_client *clp) } list_del_init(&clp->cl_lru); spin_lock(&clp->cl_lock); - list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) + spin_lock(&nfsd_session_list_lock); + list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) { list_del_init(&ses->se_hash); + list_del_init(&ses->se_all_sessions); + } + spin_unlock(&nfsd_session_list_lock); spin_unlock(&clp->cl_lock); } @@ -2685,6 +2750,7 @@ static const char *cb_state2str(int state) static int client_info_show(struct seq_file *m, void *v) { struct inode *inode = file_inode(m->file); + struct nfsd4_session *ses; struct nfs4_client *clp; u64 clid; @@ -2721,6 +2787,16 @@ static int client_info_show(struct seq_file *m, void *v) seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr); seq_printf(m, "admin-revoked states: %d\n", atomic_read(&clp->cl_admin_revoked)); + spin_lock(&clp->cl_lock); + seq_printf(m, "session slots:"); + list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) + seq_printf(m, " %u", ses->se_fchannel.maxreqs); + seq_printf(m, "\nsession target slots:"); + list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) + seq_printf(m, " %u", ses->se_target_maxslots); + spin_unlock(&clp->cl_lock); + seq_puts(m, "\n"); + drop_client(clp); return 0; @@ -2873,6 +2949,21 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st) return 0; } +static char *nfs4_show_deleg_type(u32 dl_type) +{ + switch (dl_type) { + case OPEN_DELEGATE_READ: + return "r"; + case OPEN_DELEGATE_WRITE: + return "w"; + case OPEN_DELEGATE_READ_ATTRS_DELEG: + return "ra"; + case OPEN_DELEGATE_WRITE_ATTRS_DELEG: + return "wa"; + } + return "?"; +} + static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) { struct nfs4_delegation *ds; @@ -2886,8 +2977,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) nfs4_show_stateid(s, &st->sc_stateid); seq_puts(s, ": { type: deleg, "); - seq_printf(s, "access: %s", - ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w"); + seq_printf(s, "access: %s", nfs4_show_deleg_type(ds->dl_type)); /* XXX: lease time, whether it's being recalled. */ @@ -3076,7 +3166,6 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) { struct nfs4_client *clp = cb->cb_clp; - clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); drop_client(clp); } @@ -3107,7 +3196,6 @@ nfsd4_cb_getattr_release(struct nfsd4_callback *cb) struct nfs4_delegation *dp = container_of(ncf, struct nfs4_delegation, dl_cb_fattr); - clear_and_wake_up_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags); nfs4_put_stid(&dp->dl_stid); } @@ -3128,11 +3216,15 @@ static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf) struct nfs4_delegation *dp = container_of(ncf, struct nfs4_delegation, dl_cb_fattr); - if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags)) + if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &ncf->ncf_getattr.cb_flags)) return; + /* set to proper status when nfsd4_cb_getattr_done runs */ ncf->ncf_cb_status = NFS4ERR_IO; + /* ensure that wake_bit is done when RUNNING is cleared */ + set_bit(NFSD4_CALLBACK_WAKE, &ncf->ncf_getattr.cb_flags); + refcount_inc(&dp->dl_stid.sc_count); nfsd4_run_cb(&ncf->ncf_getattr); } @@ -3708,10 +3800,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u) kfree(exid->server_impl_name); } -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse) +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags) { /* The slot is in use, and no response has been sent. */ - if (slot_inuse) { + if (flags & NFSD4_SLOT_INUSE) { if (seqid == slot_seqid) return nfserr_jukebox; else @@ -3720,6 +3812,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse) /* Note unsigned 32-bit arithmetic handles wraparound: */ if (likely(seqid == slot_seqid + 1)) return nfs_ok; + if ((flags & NFSD4_SLOT_REUSED) && seqid == 1) + return nfs_ok; if (seqid == slot_seqid) return nfserr_replay_cache; return nfserr_seq_misordered; @@ -3778,17 +3872,6 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs ca->maxresp_cached = min_t(u32, ca->maxresp_cached, NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ); ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION); - /* - * Note decreasing slot size below client's request may make it - * difficult for client to function correctly, whereas - * decreasing the number of slots will (just?) affect - * performance. When short on memory we therefore prefer to - * decrease number of slots instead of their size. Clients that - * request larger slots than they need will get poor results: - * Note that we always allow at least one slot, because our - * accounting is soft and provides no guarantees either way. - */ - ca->maxreqs = nfsd4_get_drc_mem(ca, nn); return nfs_ok; } @@ -3866,11 +3949,11 @@ nfsd4_create_session(struct svc_rqst *rqstp, return status; status = check_backchannel_attrs(&cr_ses->back_channel); if (status) - goto out_release_drc_mem; + goto out_err; status = nfserr_jukebox; new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel); if (!new) - goto out_release_drc_mem; + goto out_err; conn = alloc_conn_from_crses(rqstp, cr_ses); if (!conn) goto out_free_session; @@ -3979,8 +4062,7 @@ out_free_conn: free_conn(conn); out_free_session: __free_session(new); -out_release_drc_mem: - nfsd4_put_drc_mem(&cr_ses->fore_channel); +out_err: return status; } @@ -4278,17 +4360,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (seq->slotid >= session->se_fchannel.maxreqs) goto out_put_session; - slot = session->se_slots[seq->slotid]; + slot = xa_load(&session->se_slots, seq->slotid); dprintk("%s: slotid %d\n", __func__, seq->slotid); - /* We do not negotiate the number of slots yet, so set the - * maxslots to the session maxreqs which is used to encode - * sr_highest_slotid and the sr_target_slot id to maxslots */ - seq->maxslots = session->se_fchannel.maxreqs; - trace_nfsd_slot_seqid_sequence(clp, seq, slot); - status = check_slot_seqid(seq->seqid, slot->sl_seqid, - slot->sl_flags & NFSD4_SLOT_INUSE); + status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags); if (status == nfserr_replay_cache) { status = nfserr_seq_misordered; if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED)) @@ -4313,6 +4389,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out_put_session; + if (session->se_target_maxslots < session->se_fchannel.maxreqs && + slot->sl_generation == session->se_slot_gen && + seq->maxslots <= session->se_target_maxslots) + /* Client acknowledged our reduce maxreqs */ + free_session_slots(session, session->se_target_maxslots); + buflen = (seq->cachethis) ? session->se_fchannel.maxresp_cached : session->se_fchannel.maxresp_sz; @@ -4323,9 +4405,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, svc_reserve(rqstp, buflen); status = nfs_ok; - /* Success! bump slot seqid */ + /* Success! accept new slot seqid */ slot->sl_seqid = seq->seqid; + slot->sl_flags &= ~NFSD4_SLOT_REUSED; slot->sl_flags |= NFSD4_SLOT_INUSE; + slot->sl_generation = session->se_slot_gen; if (seq->cachethis) slot->sl_flags |= NFSD4_SLOT_CACHETHIS; else @@ -4335,7 +4419,51 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, cstate->session = session; cstate->clp = clp; + /* + * If the client ever uses the highest available slot, + * gently try to allocate another 20%. This allows + * fairly quick growth without grossly over-shooting what + * the client might use. + */ + if (seq->slotid == session->se_fchannel.maxreqs - 1 && + session->se_target_maxslots >= session->se_fchannel.maxreqs && + session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) { + int s = session->se_fchannel.maxreqs; + int cnt = DIV_ROUND_UP(s, 5); + void *prev_slot; + + do { + /* + * GFP_NOWAIT both allows allocation under a + * spinlock, and only succeeds if there is + * plenty of memory. + */ + slot = kzalloc(slot_bytes(&session->se_fchannel), + GFP_NOWAIT); + prev_slot = xa_load(&session->se_slots, s); + if (xa_is_value(prev_slot) && slot) { + slot->sl_seqid = xa_to_value(prev_slot); + slot->sl_flags |= NFSD4_SLOT_REUSED; + } + if (slot && + !xa_is_err(xa_store(&session->se_slots, s, slot, + GFP_NOWAIT))) { + s += 1; + session->se_fchannel.maxreqs = s; + atomic_add(s - session->se_target_maxslots, + &nfsd_total_target_slots); + session->se_target_maxslots = s; + } else { + kfree(slot); + slot = NULL; + } + } while (slot && --cnt > 0); + } + out: + seq->maxslots = max(session->se_target_maxslots, seq->maxslots); + seq->target_maxslots = session->se_target_maxslots; + switch (clp->cl_cb_state) { case NFSD4_CB_DOWN: seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; @@ -4687,8 +4815,8 @@ out: static unsigned long nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { - int count; struct nfsd_net *nn = shrink->private_data; + long count; count = atomic_read(&nn->nfsd_courtesy_clients); if (!count) @@ -4739,7 +4867,7 @@ static void init_nfs4_replay(struct nfs4_replay *rp) rp->rp_status = nfserr_serverfault; rp->rp_buflen = 0; rp->rp_buf = rp->rp_ibuf; - atomic_set(&rp->rp_locked, RP_UNLOCKED); + rp->rp_locked = RP_UNLOCKED; } static int nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate, @@ -4747,9 +4875,9 @@ static int nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate, { if (!nfsd4_has_session(cstate)) { wait_var_event(&so->so_replay.rp_locked, - atomic_cmpxchg(&so->so_replay.rp_locked, - RP_UNLOCKED, RP_LOCKED) != RP_LOCKED); - if (atomic_read(&so->so_replay.rp_locked) == RP_UNHASHED) + cmpxchg(&so->so_replay.rp_locked, + RP_UNLOCKED, RP_LOCKED) != RP_LOCKED); + if (so->so_replay.rp_locked == RP_UNHASHED) return -EAGAIN; cstate->replay_owner = nfs4_get_stateowner(so); } @@ -4762,9 +4890,7 @@ void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate) if (so != NULL) { cstate->replay_owner = NULL; - atomic_set(&so->so_replay.rp_locked, RP_UNLOCKED); - smp_mb__after_atomic(); - wake_up_var(&so->so_replay.rp_locked); + store_release_wake_up(&so->so_replay.rp_locked, RP_UNLOCKED); nfs4_put_stateowner(so); } } @@ -5069,9 +5195,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) * Some threads with a reference might be waiting for rp_locked, * so tell them to stop waiting. */ - atomic_set(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED); - smp_mb__after_atomic(); - wake_up_var(&oo->oo_owner.so_replay.rp_locked); + store_release_wake_up(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED); wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2); release_all_access(s); @@ -5290,6 +5414,11 @@ static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = { static void nfsd_break_one_deleg(struct nfs4_delegation *dp) { + bool queued; + + if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &dp->dl_recall.cb_flags)) + return; + /* * We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease @@ -5298,7 +5427,10 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) * we know it's safe to take a reference. */ refcount_inc(&dp->dl_stid.sc_count); - WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall)); + queued = nfsd4_run_cb(&dp->dl_recall); + WARN_ON_ONCE(!queued); + if (!queued) + refcount_dec(&dp->dl_stid.sc_count); } /* Called from break_lease() with flc_lock held. */ @@ -5472,7 +5604,7 @@ retry: static inline __be32 nfs4_check_delegmode(struct nfs4_delegation *dp, int flags) { - if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ)) + if (!(flags & RD_STATE) && deleg_is_read(dp->dl_type)) return nfserr_openmode; else return nfs_ok; @@ -5704,8 +5836,7 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp) return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; } -static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp, - int flag) +static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp) { struct file_lease *fl; @@ -5714,7 +5845,7 @@ static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp, return NULL; fl->fl_lmops = &nfsd_lease_mng_ops; fl->c.flc_flags = FL_DELEG; - fl->c.flc_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; + fl->c.flc_type = deleg_is_read(dp->dl_type) ? F_RDLCK : F_WRLCK; fl->c.flc_owner = (fl_owner_t)dp; fl->c.flc_pid = current->tgid; fl->c.flc_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file; @@ -5825,17 +5956,30 @@ nfsd4_verify_setuid_write(struct nfsd4_open *open, struct nfsd_file *nf) return 0; } +#ifdef CONFIG_NFSD_V4_DELEG_TIMESTAMPS +static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open) +{ + return open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS; +} +#else /* CONFIG_NFSD_V4_DELEG_TIMESTAMPS */ +static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open) +{ + return false; +} +#endif /* CONFIG NFSD_V4_DELEG_TIMESTAMPS */ + static struct nfs4_delegation * nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, struct svc_fh *parent) { - int status = 0; + bool deleg_ts = nfsd4_want_deleg_timestamps(open); struct nfs4_client *clp = stp->st_stid.sc_client; struct nfs4_file *fp = stp->st_stid.sc_file; struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate; struct nfs4_delegation *dp; struct nfsd_file *nf = NULL; struct file_lease *fl; + int status = 0; u32 dl_type; /* @@ -5860,7 +6004,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, */ if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_BOTH) { nf = find_rw_file(fp); - dl_type = NFS4_OPEN_DELEGATE_WRITE; + dl_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : OPEN_DELEGATE_WRITE; } /* @@ -5869,12 +6013,21 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, */ if (!nf && (open->op_share_access & NFS4_SHARE_ACCESS_READ)) { nf = find_readable_file(fp); - dl_type = NFS4_OPEN_DELEGATE_READ; + dl_type = deleg_ts ? OPEN_DELEGATE_READ_ATTRS_DELEG : OPEN_DELEGATE_READ; } if (!nf) return ERR_PTR(-EAGAIN); + /* + * File delegations and associated locks cannot be recovered if the + * export is from an NFS proxy server. + */ + if (exportfs_cannot_lock(nf->nf_file->f_path.mnt->mnt_sb->s_export_op)) { + nfsd_file_put(nf); + return ERR_PTR(-EOPNOTSUPP); + } + spin_lock(&state_lock); spin_lock(&fp->fi_lock); if (nfs4_delegation_exists(clp, fp)) @@ -5901,7 +6054,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, if (!dp) goto out_delegees; - fl = nfs4_alloc_init_lease(dp, dl_type); + fl = nfs4_alloc_init_lease(dp); if (!fl) goto out_clnt_odstate; @@ -5958,20 +6111,20 @@ out_delegees: static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) { - open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; + open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; if (status == -EAGAIN) open->op_why_no_deleg = WND4_CONTENTION; else { open->op_why_no_deleg = WND4_RESOURCE; switch (open->op_deleg_want) { - case NFS4_SHARE_WANT_READ_DELEG: - case NFS4_SHARE_WANT_WRITE_DELEG: - case NFS4_SHARE_WANT_ANY_DELEG: + case OPEN4_SHARE_ACCESS_WANT_READ_DELEG: + case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG: + case OPEN4_SHARE_ACCESS_WANT_ANY_DELEG: break; - case NFS4_SHARE_WANT_CANCEL: + case OPEN4_SHARE_ACCESS_WANT_CANCEL: open->op_why_no_deleg = WND4_CANCELLED; break; - case NFS4_SHARE_WANT_NO_DELEG: + case OPEN4_SHARE_ACCESS_WANT_NO_DELEG: WARN_ON_ONCE(1); } } @@ -6027,13 +6180,14 @@ static void nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, struct svc_fh *currentfh) { - struct nfs4_delegation *dp; struct nfs4_openowner *oo = openowner(stp->st_stateowner); + bool deleg_ts = nfsd4_want_deleg_timestamps(open); struct nfs4_client *clp = stp->st_stid.sc_client; struct svc_fh *parent = NULL; - int cb_up; - int status = 0; + struct nfs4_delegation *dp; struct kstat stat; + int status = 0; + int cb_up; cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); open->op_recall = false; @@ -6074,20 +6228,22 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, destroy_delegation(dp); goto out_no_deleg; } - open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE; + open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : + OPEN_DELEGATE_WRITE; dp->dl_cb_fattr.ncf_cur_fsize = stat.size; dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat); trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid); } else { - open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; + open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_READ_ATTRS_DELEG : + OPEN_DELEGATE_READ; trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid); } nfs4_put_stid(&dp->dl_stid); return; out_no_deleg: - open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; + open->op_delegate_type = OPEN_DELEGATE_NONE; if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && - open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) { + open->op_delegate_type != OPEN_DELEGATE_NONE) { dprintk("NFSD: WARNING: refusing delegation reclaim\n"); open->op_recall = true; } @@ -6101,21 +6257,32 @@ out_no_deleg: static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, struct nfs4_delegation *dp) { - if (open->op_deleg_want == NFS4_SHARE_WANT_READ_DELEG && - dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) { - open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; - open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE; - } else if (open->op_deleg_want == NFS4_SHARE_WANT_WRITE_DELEG && - dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) { - open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; - open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE; + if (deleg_is_write(dp->dl_type)) { + if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_READ_DELEG) { + open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; + open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE; + } else if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG) { + open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; + open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE; + } } /* Otherwise the client must be confused wanting a delegation * it already has, therefore we don't return - * NFS4_OPEN_DELEGATE_NONE_EXT and reason. + * OPEN_DELEGATE_NONE_EXT and reason. */ } +/* Are we returning only a delegation stateid? */ +static bool open_xor_delegation(struct nfsd4_open *open) +{ + if (!(open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION)) + return false; + /* Did we actually get a delegation? */ + if (!deleg_is_read(open->op_delegate_type) && !deleg_is_write(open->op_delegate_type)) + return false; + return true; +} + /** * nfsd4_process_open2 - finish open processing * @rqstp: the RPC transaction being executed @@ -6201,8 +6368,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf mutex_unlock(&stp->st_mutex); if (nfsd4_has_session(&resp->cstate)) { - if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { - open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; + if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_NO_DELEG) { + open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; open->op_why_no_deleg = WND4_NOT_WANTED; goto nodeleg; } @@ -6213,12 +6380,23 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * OPEN succeeds even if we fail. */ nfs4_open_delegation(open, stp, &resp->cstate.current_fh); + + /* + * If there is an existing open stateid, it must be updated and + * returned. Only respect WANT_OPEN_XOR_DELEGATION when a new + * open stateid would have to be created. + */ + if (new_stp && open_xor_delegation(open)) { + memcpy(&open->op_stateid, &zero_stateid, sizeof(open->op_stateid)); + open->op_rflags |= OPEN4_RESULT_NO_OPEN_STATEID; + release_open_stateid(stp); + } nodeleg: status = nfs_ok; trace_nfsd_open(&stp->st_stid.sc_stateid); out: /* 4.1 client trying to upgrade/downgrade delegation? */ - if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp && + if (open->op_delegate_type == OPEN_DELEGATE_NONE && dp && open->op_deleg_want) nfsd4_deleg_xgrade_none_ext(open, dp); @@ -6229,7 +6407,7 @@ out: /* * To finish the open response, we just need to set the rflags. */ - open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; + open->op_rflags |= NFS4_OPEN_RESULT_LOCKTYPE_POSIX; if (nfsd4_has_session(&resp->cstate)) open->op_rflags |= NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK; else if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED)) @@ -6706,38 +6884,34 @@ deleg_reaper(struct nfsd_net *nn) { struct list_head *pos, *next; struct nfs4_client *clp; - LIST_HEAD(cblist); spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); - if (clp->cl_state != NFSD4_ACTIVE || - list_empty(&clp->cl_delegations) || - atomic_read(&clp->cl_delegs_in_recall) || - test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) || - (ktime_get_boottime_seconds() - - clp->cl_ra_time < 5)) { + + if (clp->cl_state != NFSD4_ACTIVE) + continue; + if (list_empty(&clp->cl_delegations)) + continue; + if (atomic_read(&clp->cl_delegs_in_recall)) + continue; + if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &clp->cl_ra->ra_cb.cb_flags)) + continue; + if (ktime_get_boottime_seconds() - clp->cl_ra_time < 5) + continue; + if (clp->cl_cb_state != NFSD4_CB_UP) continue; - } - list_add(&clp->cl_ra_cblist, &cblist); /* release in nfsd4_cb_recall_any_release */ kref_get(&clp->cl_nfsdfs.cl_ref); - set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); clp->cl_ra_time = ktime_get_boottime_seconds(); - } - spin_unlock(&nn->client_lock); - - while (!list_empty(&cblist)) { - clp = list_first_entry(&cblist, struct nfs4_client, - cl_ra_cblist); - list_del_init(&clp->cl_ra_cblist); clp->cl_ra->ra_keep = 0; clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) | BIT(RCA4_TYPE_MASK_WDATA_DLG); trace_nfsd_cb_recall_any(clp->cl_ra); nfsd4_run_cb(&clp->cl_ra->ra_cb); } + spin_unlock(&nn->client_lock); } static void @@ -6902,7 +7076,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, */ statusmask |= SC_STATUS_REVOKED; - statusmask |= SC_STATUS_ADMIN_REVOKED; + statusmask |= SC_STATUS_ADMIN_REVOKED | SC_STATUS_FREEABLE; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || CLOSE_STATEID(stateid)) @@ -7557,9 +7731,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) return status; - status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, - SC_STATUS_REVOKED | SC_STATUS_FREEABLE, - &s, nn); + status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, SC_STATUS_REVOKED, &s, nn); if (status) goto out; dp = delegstateid(s); @@ -7667,7 +7839,7 @@ nfsd4_lm_notify(struct file_lock *fl) if (queue) { trace_nfsd_cb_notify_lock(lo, nbl); - nfsd4_run_cb(&nbl->nbl_cb); + nfsd4_try_run_cb(&nbl->nbl_cb); } } @@ -7966,7 +8138,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_blocked_lock *nbl = NULL; struct file_lock *file_lock = NULL; struct file_lock *conflock = NULL; - struct super_block *sb; __be32 status = 0; int lkflg; int err; @@ -7986,7 +8157,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0); if (status != nfs_ok) return status; - sb = cstate->current_fh.fh_dentry->d_sb; + if (exportfs_cannot_lock(cstate->current_fh.fh_dentry->d_sb->s_export_op)) { + status = nfserr_notsupp; + goto out; + } if (lock->lk_is_new) { if (nfsd4_has_session(cstate)) @@ -8326,6 +8500,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_lock_range; goto put_stateid; } + if (exportfs_cannot_lock(nf->nf_file->f_path.mnt->mnt_sb->s_export_op)) { + status = nfserr_notsupp; + goto put_file; + } + file_lock = locks_alloc_lock(); if (!file_lock) { dprintk("NFSD: %s: unable to allocate lock!\n", __func__); @@ -8721,7 +8900,6 @@ skip_grace: } /* initialization to perform when the nfsd service is started: */ - int nfs4_state_start(void) { @@ -8731,6 +8909,15 @@ nfs4_state_start(void) if (ret) return ret; + nfsd_slot_shrinker = shrinker_alloc(0, "nfsd-DRC-slot"); + if (!nfsd_slot_shrinker) { + rhltable_destroy(&nfs4_file_rhltable); + return -ENOMEM; + } + nfsd_slot_shrinker->count_objects = nfsd_slot_count; + nfsd_slot_shrinker->scan_objects = nfsd_slot_scan; + shrinker_register(nfsd_slot_shrinker); + set_max_delegations(); return 0; } @@ -8772,6 +8959,7 @@ void nfs4_state_shutdown(void) { rhltable_destroy(&nfs4_file_rhltable); + shrinker_free(nfsd_slot_shrinker); } static void @@ -8889,6 +9077,78 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, } /** + * set_cb_time - vet and set the timespec for a cb_getattr update + * @cb: timestamp from the CB_GETATTR response + * @orig: original timestamp in the inode + * @now: current time + * + * Given a timestamp in a CB_GETATTR response, check it against the + * current timestamp in the inode and the current time. Returns true + * if the inode's timestamp needs to be updated, and false otherwise. + * @cb may also be changed if the timestamp needs to be clamped. + */ +static bool set_cb_time(struct timespec64 *cb, const struct timespec64 *orig, + const struct timespec64 *now) +{ + + /* + * "When the time presented is before the original time, then the + * update is ignored." Also no need to update if there is no change. + */ + if (timespec64_compare(cb, orig) <= 0) + return false; + + /* + * "When the time presented is in the future, the server can either + * clamp the new time to the current time, or it may + * return NFS4ERR_DELAY to the client, allowing it to retry." + */ + if (timespec64_compare(cb, now) > 0) { + /* clamp it */ + *cb = *now; + } + + return true; +} + +static int cb_getattr_update_times(struct dentry *dentry, struct nfs4_delegation *dp) +{ + struct inode *inode = d_inode(dentry); + struct timespec64 now = current_time(inode); + struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr; + struct iattr attrs = { }; + int ret; + + if (deleg_attrs_deleg(dp->dl_type)) { + struct timespec64 atime = inode_get_atime(inode); + struct timespec64 mtime = inode_get_mtime(inode); + + attrs.ia_atime = ncf->ncf_cb_atime; + attrs.ia_mtime = ncf->ncf_cb_mtime; + + if (set_cb_time(&attrs.ia_atime, &atime, &now)) + attrs.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET; + + if (set_cb_time(&attrs.ia_mtime, &mtime, &now)) { + attrs.ia_valid |= ATTR_CTIME | ATTR_MTIME | ATTR_MTIME_SET; + attrs.ia_ctime = attrs.ia_mtime; + } + } else { + attrs.ia_valid |= ATTR_MTIME | ATTR_CTIME; + attrs.ia_mtime = attrs.ia_ctime = now; + } + + if (!attrs.ia_valid) + return 0; + + attrs.ia_valid |= ATTR_DELEG; + inode_lock(inode); + ret = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL); + inode_unlock(inode); + return ret; +} + +/** * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict * @rqstp: RPC transaction context * @dentry: dentry of inode to be checked for a conflict @@ -8914,7 +9174,6 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, struct file_lock_context *ctx; struct nfs4_delegation *dp = NULL; struct file_lease *fl; - struct iattr attrs; struct nfs4_cb_fattr *ncf; struct inode *inode = d_inode(dentry); @@ -8955,8 +9214,8 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, nfs4_cb_getattr(&dp->dl_cb_fattr); spin_unlock(&ctx->flc_lock); - wait_on_bit_timeout(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, - TASK_INTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT); + wait_on_bit_timeout(&ncf->ncf_getattr.cb_flags, NFSD4_CALLBACK_RUNNING, + TASK_UNINTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT); if (ncf->ncf_cb_status) { /* Recall delegation only if client didn't respond */ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); @@ -8976,11 +9235,7 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, * not update the file's metadata with the client's * modified size */ - attrs.ia_mtime = attrs.ia_ctime = current_time(inode); - attrs.ia_valid = ATTR_MTIME | ATTR_CTIME | ATTR_DELEG; - inode_lock(inode); - err = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL); - inode_unlock(inode); + err = cb_getattr_update_times(dentry, dp); if (err) { status = nfserrno(err); goto out_status; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e8adf62d3484..9eb8e5704622 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -55,6 +55,7 @@ #include "netns.h" #include "pnfs.h" #include "filecache.h" +#include "nfs4xdr_gen.h" #include "trace.h" @@ -520,6 +521,26 @@ nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen, *umask = mask & S_IRWXUGO; iattr->ia_valid |= ATTR_MODE; } + if (bmval[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) { + fattr4_time_deleg_access access; + + if (!xdrgen_decode_fattr4_time_deleg_access(argp->xdr, &access)) + return nfserr_bad_xdr; + iattr->ia_atime.tv_sec = access.seconds; + iattr->ia_atime.tv_nsec = access.nseconds; + iattr->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET | ATTR_DELEG; + } + if (bmval[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) { + fattr4_time_deleg_modify modify; + + if (!xdrgen_decode_fattr4_time_deleg_modify(argp->xdr, &modify)) + return nfserr_bad_xdr; + iattr->ia_mtime.tv_sec = modify.seconds; + iattr->ia_mtime.tv_nsec = modify.nseconds; + iattr->ia_ctime.tv_sec = modify.seconds; + iattr->ia_ctime.tv_nsec = modify.seconds; + iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME | ATTR_MTIME_SET | ATTR_DELEG; + } /* request sanity: did attrlist4 contain the expected number of words? */ if (attrlist4_count != xdr_stream_pos(argp->xdr) - starting_pos) @@ -1066,13 +1087,13 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh return nfs_ok; if (!argp->minorversion) return nfserr_bad_xdr; - switch (w & NFS4_SHARE_WANT_MASK) { - case NFS4_SHARE_WANT_NO_PREFERENCE: - case NFS4_SHARE_WANT_READ_DELEG: - case NFS4_SHARE_WANT_WRITE_DELEG: - case NFS4_SHARE_WANT_ANY_DELEG: - case NFS4_SHARE_WANT_NO_DELEG: - case NFS4_SHARE_WANT_CANCEL: + switch (w & NFS4_SHARE_WANT_TYPE_MASK) { + case OPEN4_SHARE_ACCESS_WANT_NO_PREFERENCE: + case OPEN4_SHARE_ACCESS_WANT_READ_DELEG: + case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG: + case OPEN4_SHARE_ACCESS_WANT_ANY_DELEG: + case OPEN4_SHARE_ACCESS_WANT_NO_DELEG: + case OPEN4_SHARE_ACCESS_WANT_CANCEL: break; default: return nfserr_bad_xdr; @@ -1884,7 +1905,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, return nfserr_bad_xdr; seq->seqid = be32_to_cpup(p++); seq->slotid = be32_to_cpup(p++); - seq->maxslots = be32_to_cpup(p++); + /* sa_highest_slotid counts from 0 but maxslots counts from 1 ... */ + seq->maxslots = be32_to_cpup(p++) + 1; seq->cachethis = be32_to_cpup(p); seq->status_flags = 0; @@ -2818,11 +2840,11 @@ static __be32 nfsd4_encode_nfsace4(struct xdr_stream *xdr, struct svc_rqst *rqst #ifdef CONFIG_NFSD_V4_SECURITY_LABEL static inline __be32 nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, - void *context, int len) + const struct lsm_context *context) { __be32 *p; - p = xdr_reserve_space(xdr, len + 4 + 4 + 4); + p = xdr_reserve_space(xdr, context->len + 4 + 4 + 4); if (!p) return nfserr_resource; @@ -2832,13 +2854,13 @@ nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, */ *p++ = cpu_to_be32(0); /* lfs */ *p++ = cpu_to_be32(0); /* pi */ - p = xdr_encode_opaque(p, context, len); + p = xdr_encode_opaque(p, context->context, context->len); return 0; } #else static inline __be32 nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, - void *context, int len) + struct lsm_context *context) { return 0; } #endif @@ -2919,9 +2941,9 @@ struct nfsd4_fattr_args { struct kstat stat; struct kstatfs statfs; struct nfs4_acl *acl; + u64 change_attr; #ifdef CONFIG_NFSD_V4_SECURITY_LABEL - void *context; - int contextlen; + struct lsm_context context; #endif u32 rdattr_err; bool contextsupport; @@ -3018,7 +3040,6 @@ static __be32 nfsd4_encode_fattr4_change(struct xdr_stream *xdr, const struct nfsd4_fattr_args *args) { const struct svc_export *exp = args->exp; - u64 c; if (unlikely(exp->ex_flags & NFSEXP_V4ROOT)) { u32 flush_time = convert_to_wallclock(exp->cd->flush_time); @@ -3029,9 +3050,7 @@ static __be32 nfsd4_encode_fattr4_change(struct xdr_stream *xdr, return nfserr_resource; return nfs_ok; } - - c = nfsd4_change_attribute(&args->stat); - return nfsd4_encode_changeid4(xdr, c); + return nfsd4_encode_changeid4(xdr, args->change_attr); } static __be32 nfsd4_encode_fattr4_size(struct xdr_stream *xdr, @@ -3372,12 +3391,28 @@ static __be32 nfsd4_encode_fattr4_suppattr_exclcreat(struct xdr_stream *xdr, return nfsd4_encode_bitmap4(xdr, supp[0], supp[1], supp[2]); } +/* + * Copied from generic_remap_checks/generic_remap_file_range_prep. + * + * These generic functions use the file system's s_blocksize, but + * individual file systems aren't required to use + * generic_remap_file_range_prep. Until there is a mechanism for + * determining a particular file system's (or file's) clone block + * size, this is the best NFSD can do. + */ +static __be32 nfsd4_encode_fattr4_clone_blksize(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + struct inode *inode = d_inode(args->dentry); + + return nfsd4_encode_uint32_t(xdr, inode->i_sb->s_blocksize); +} + #ifdef CONFIG_NFSD_V4_SECURITY_LABEL static __be32 nfsd4_encode_fattr4_sec_label(struct xdr_stream *xdr, const struct nfsd4_fattr_args *args) { - return nfsd4_encode_security_label(xdr, args->rqstp, - args->context, args->contextlen); + return nfsd4_encode_security_label(xdr, args->rqstp, &args->context); } #endif @@ -3389,6 +3424,56 @@ static __be32 nfsd4_encode_fattr4_xattr_support(struct xdr_stream *xdr, return nfsd4_encode_bool(xdr, err == 0); } +#define NFSD_OA_SHARE_ACCESS (BIT(OPEN_ARGS_SHARE_ACCESS_READ) | \ + BIT(OPEN_ARGS_SHARE_ACCESS_WRITE) | \ + BIT(OPEN_ARGS_SHARE_ACCESS_BOTH)) + +#define NFSD_OA_SHARE_DENY (BIT(OPEN_ARGS_SHARE_DENY_NONE) | \ + BIT(OPEN_ARGS_SHARE_DENY_READ) | \ + BIT(OPEN_ARGS_SHARE_DENY_WRITE) | \ + BIT(OPEN_ARGS_SHARE_DENY_BOTH)) + +#define NFSD_OA_SHARE_ACCESS_WANT (BIT(OPEN_ARGS_SHARE_ACCESS_WANT_ANY_DELEG) | \ + BIT(OPEN_ARGS_SHARE_ACCESS_WANT_NO_DELEG) | \ + BIT(OPEN_ARGS_SHARE_ACCESS_WANT_CANCEL) | \ + BIT(OPEN_ARGS_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS) | \ + BIT(OPEN_ARGS_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION)) + +#define NFSD_OA_OPEN_CLAIM (BIT(OPEN_ARGS_OPEN_CLAIM_NULL) | \ + BIT(OPEN_ARGS_OPEN_CLAIM_PREVIOUS) | \ + BIT(OPEN_ARGS_OPEN_CLAIM_DELEGATE_CUR) | \ + BIT(OPEN_ARGS_OPEN_CLAIM_DELEGATE_PREV)| \ + BIT(OPEN_ARGS_OPEN_CLAIM_FH) | \ + BIT(OPEN_ARGS_OPEN_CLAIM_DELEG_CUR_FH) | \ + BIT(OPEN_ARGS_OPEN_CLAIM_DELEG_PREV_FH)) + +#define NFSD_OA_CREATE_MODE (BIT(OPEN_ARGS_CREATEMODE_UNCHECKED4) | \ + BIT(OPEN_ARGS_CREATE_MODE_GUARDED) | \ + BIT(OPEN_ARGS_CREATEMODE_EXCLUSIVE4) | \ + BIT(OPEN_ARGS_CREATE_MODE_EXCLUSIVE4_1)) + +static uint32_t oa_share_access = NFSD_OA_SHARE_ACCESS; +static uint32_t oa_share_deny = NFSD_OA_SHARE_DENY; +static uint32_t oa_share_access_want = NFSD_OA_SHARE_ACCESS_WANT; +static uint32_t oa_open_claim = NFSD_OA_OPEN_CLAIM; +static uint32_t oa_create_mode = NFSD_OA_CREATE_MODE; + +static const struct open_arguments4 nfsd_open_arguments = { + .oa_share_access = { .count = 1, .element = &oa_share_access }, + .oa_share_deny = { .count = 1, .element = &oa_share_deny }, + .oa_share_access_want = { .count = 1, .element = &oa_share_access_want }, + .oa_open_claim = { .count = 1, .element = &oa_open_claim }, + .oa_create_mode = { .count = 1, .element = &oa_create_mode }, +}; + +static __be32 nfsd4_encode_fattr4_open_arguments(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + if (!xdrgen_encode_fattr4_open_arguments(xdr, &nfsd_open_arguments)) + return nfserr_resource; + return nfs_ok; +} + static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = { [FATTR4_SUPPORTED_ATTRS] = nfsd4_encode_fattr4_supported_attrs, [FATTR4_TYPE] = nfsd4_encode_fattr4_type, @@ -3477,7 +3562,7 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = { [FATTR4_MODE_SET_MASKED] = nfsd4_encode_fattr4__noop, [FATTR4_SUPPATTR_EXCLCREAT] = nfsd4_encode_fattr4_suppattr_exclcreat, [FATTR4_FS_CHARSET_CAP] = nfsd4_encode_fattr4__noop, - [FATTR4_CLONE_BLKSIZE] = nfsd4_encode_fattr4__noop, + [FATTR4_CLONE_BLKSIZE] = nfsd4_encode_fattr4_clone_blksize, [FATTR4_SPACE_FREED] = nfsd4_encode_fattr4__noop, [FATTR4_CHANGE_ATTR_TYPE] = nfsd4_encode_fattr4__noop, @@ -3489,6 +3574,7 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = { [FATTR4_MODE_UMASK] = nfsd4_encode_fattr4__noop, [FATTR4_XATTR_SUPPORT] = nfsd4_encode_fattr4_xattr_support, + [FATTR4_OPEN_ARGUMENTS] = nfsd4_encode_fattr4_open_arguments, }; /* @@ -3506,8 +3592,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct nfsd4_fattr_args args; struct svc_fh *tempfh = NULL; int starting_len = xdr->buf->len; - __be32 *attrlen_p, status; - int attrlen_offset; + unsigned int attrlen_offset; + __be32 attrlen, status; u32 attrmask[3]; int err; struct nfsd4_compoundres *resp = rqstp->rq_resp; @@ -3527,7 +3613,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, args.ignore_crossmnt = (ignore_crossmnt != 0); args.acl = NULL; #ifdef CONFIG_NFSD_V4_SECURITY_LABEL - args.context = NULL; + args.context.context = NULL; #endif /* @@ -3544,7 +3630,11 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, if (status) goto out; } - if (attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { + if ((attrmask[0] & (FATTR4_WORD0_CHANGE | + FATTR4_WORD0_SIZE)) || + (attrmask[1] & (FATTR4_WORD1_TIME_ACCESS | + FATTR4_WORD1_TIME_MODIFY | + FATTR4_WORD1_TIME_METADATA))) { status = nfsd4_deleg_getattr_conflict(rqstp, dentry, &dp); if (status) goto out; @@ -3556,11 +3646,22 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, if (dp) { struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr; - if (ncf->ncf_file_modified) + if (ncf->ncf_file_modified) { + ++ncf->ncf_initial_cinfo; args.stat.size = ncf->ncf_cur_fsize; + if (!timespec64_is_epoch(&ncf->ncf_cb_mtime)) + args.stat.mtime = ncf->ncf_cb_mtime; + } + args.change_attr = ncf->ncf_initial_cinfo; + + if (!timespec64_is_epoch(&ncf->ncf_cb_atime)) + args.stat.atime = ncf->ncf_cb_atime; nfs4_put_stid(&dp->dl_stid); + } else { + args.change_attr = nfsd4_change_attribute(&args.stat); } + if (err) goto out_nfserr; @@ -3607,7 +3708,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { if (exp->ex_flags & NFSEXP_SECURITY_LABEL) err = security_inode_getsecctx(d_inode(dentry), - &args.context, &args.contextlen); + &args.context); else err = -EOPNOTSUPP; args.contextsupport = (err == 0); @@ -3628,8 +3729,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, /* attr_vals */ attrlen_offset = xdr->buf->len; - attrlen_p = xdr_reserve_space(xdr, XDR_UNIT); - if (!attrlen_p) + if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT))) goto out_resource; bitmap_from_arr32(attr_bitmap, attrmask, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)); @@ -3639,13 +3739,14 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, if (status != nfs_ok) goto out; } - *attrlen_p = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT); + attrlen = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT); + write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, XDR_UNIT); status = nfs_ok; out: #ifdef CONFIG_NFSD_V4_SECURITY_LABEL - if (args.context) - security_release_secctx(args.context, args.contextlen); + if (args.context.context) + security_release_secctx(&args.context); #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ kfree(args.acl); if (tempfh) { @@ -4229,18 +4330,20 @@ nfsd4_encode_open_delegation4(struct xdr_stream *xdr, struct nfsd4_open *open) if (xdr_stream_encode_u32(xdr, open->op_delegate_type) != XDR_UNIT) return nfserr_resource; switch (open->op_delegate_type) { - case NFS4_OPEN_DELEGATE_NONE: + case OPEN_DELEGATE_NONE: status = nfs_ok; break; - case NFS4_OPEN_DELEGATE_READ: + case OPEN_DELEGATE_READ: + case OPEN_DELEGATE_READ_ATTRS_DELEG: /* read */ status = nfsd4_encode_open_read_delegation4(xdr, open); break; - case NFS4_OPEN_DELEGATE_WRITE: + case OPEN_DELEGATE_WRITE: + case OPEN_DELEGATE_WRITE_ATTRS_DELEG: /* write */ status = nfsd4_encode_open_write_delegation4(xdr, open); break; - case NFS4_OPEN_DELEGATE_NONE_EXT: + case OPEN_DELEGATE_NONE_EXT: /* od_whynone */ status = nfsd4_encode_open_none_delegation4(xdr, open); break; @@ -4317,6 +4420,15 @@ static __be32 nfsd4_encode_splice_read( __be32 nfserr; /* + * Splice read doesn't work if encoding has already wandered + * into the XDR buf's page array. + */ + if (unlikely(xdr->buf->page_len)) { + WARN_ON_ONCE(1); + return nfserr_serverfault; + } + + /* * Make sure there is room at the end of buf->head for * svcxdr_encode_opaque_pages() to create a tail buffer * to XDR-pad the payload. @@ -4398,25 +4510,23 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_compoundargs *argp = resp->rqstp->rq_argp; struct nfsd4_read *read = &u->read; struct xdr_stream *xdr = resp->xdr; - int starting_len = xdr->buf->len; bool splice_ok = argp->splice_ok; + unsigned int eof_offset; unsigned long maxcount; + __be32 wire_data[2]; struct file *file; - __be32 *p; if (nfserr) return nfserr; + + eof_offset = xdr->buf->len; file = read->rd_nf->nf_file; - p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ - if (!p) { + /* Reserve space for the eof flag and byte count */ + if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT * 2))) { WARN_ON_ONCE(splice_ok); return nfserr_resource; } - if (resp->xdr->buf->page_len && splice_ok) { - WARN_ON_ONCE(1); - return nfserr_serverfault; - } xdr_commit_encode(xdr); maxcount = min_t(unsigned long, read->rd_length, @@ -4427,12 +4537,13 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, else nfserr = nfsd4_encode_readv(resp, read, file, maxcount); if (nfserr) { - xdr_truncate_encode(xdr, starting_len); + xdr_truncate_encode(xdr, eof_offset); return nfserr; } - p = xdr_encode_bool(p, read->rd_eof); - *p = cpu_to_be32(read->rd_length); + wire_data[0] = read->rd_eof ? xdr_one : xdr_zero; + wire_data[1] = cpu_to_be32(read->rd_length); + write_bytes_to_xdr_buf(xdr->buf, eof_offset, &wire_data, XDR_UNIT * 2); return nfs_ok; } @@ -4441,25 +4552,21 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, union nfsd4_op_u *u) { struct nfsd4_readlink *readlink = &u->readlink; - __be32 *p, *maxcount_p, zero = xdr_zero; + __be32 *p, wire_count, zero = xdr_zero; struct xdr_stream *xdr = resp->xdr; - int length_offset = xdr->buf->len; + unsigned int length_offset; int maxcount, status; - maxcount_p = xdr_reserve_space(xdr, XDR_UNIT); - if (!maxcount_p) + /* linktext4.count */ + length_offset = xdr->buf->len; + if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT))) return nfserr_resource; - maxcount = PAGE_SIZE; + /* linktext4.data */ + maxcount = PAGE_SIZE; p = xdr_reserve_space(xdr, maxcount); if (!p) return nfserr_resource; - /* - * XXX: By default, vfs_readlink() will truncate symlinks if they - * would overflow the buffer. Is this kosher in NFSv4? If not, one - * easy fix is: if vfs_readlink() precisely fills the buffer, assume - * that truncation occurred, and return NFS4ERR_RESOURCE. - */ nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, (char *)p, &maxcount); if (nfserr == nfserr_isdir) @@ -4472,7 +4579,9 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, nfserr = nfserrno(status); goto out_err; } - *maxcount_p = cpu_to_be32(maxcount); + + wire_count = cpu_to_be32(maxcount); + write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, XDR_UNIT); xdr_truncate_encode(xdr, length_offset + 4 + xdr_align_size(maxcount)); write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, &zero, xdr_pad_size(maxcount)); @@ -4607,14 +4716,42 @@ nfsd4_encode_rpcsec_gss_info(struct xdr_stream *xdr, } static __be32 -nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp) +nfsd4_encode_secinfo4(struct xdr_stream *xdr, rpc_authflavor_t pf, + u32 *supported) +{ + struct rpcsec_gss_info info; + __be32 status; + + if (rpcauth_get_gssinfo(pf, &info) == 0) { + (*supported)++; + + /* flavor */ + status = nfsd4_encode_uint32_t(xdr, RPC_AUTH_GSS); + if (status != nfs_ok) + return status; + /* flavor_info */ + status = nfsd4_encode_rpcsec_gss_info(xdr, &info); + if (status != nfs_ok) + return status; + } else if (pf < RPC_AUTH_MAXFLAVOR) { + (*supported)++; + + /* flavor */ + status = nfsd4_encode_uint32_t(xdr, pf); + if (status != nfs_ok) + return status; + } + return nfs_ok; +} + +static __be32 +nfsd4_encode_SECINFO4resok(struct xdr_stream *xdr, struct svc_export *exp) { u32 i, nflavs, supported; struct exp_flavor_info *flavs; struct exp_flavor_info def_flavs[2]; - static bool report = true; - __be32 *flavorsp; - __be32 status; + unsigned int count_offset; + __be32 status, wire_count; if (exp->ex_nflavors) { flavs = exp->ex_flavors; @@ -4636,43 +4773,20 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp) } } - supported = 0; - flavorsp = xdr_reserve_space(xdr, XDR_UNIT); - if (!flavorsp) + count_offset = xdr->buf->len; + if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT))) return nfserr_resource; - for (i = 0; i < nflavs; i++) { - rpc_authflavor_t pf = flavs[i].pseudoflavor; - struct rpcsec_gss_info info; - - if (rpcauth_get_gssinfo(pf, &info) == 0) { - supported++; - - /* flavor */ - status = nfsd4_encode_uint32_t(xdr, RPC_AUTH_GSS); - if (status != nfs_ok) - return status; - /* flavor_info */ - status = nfsd4_encode_rpcsec_gss_info(xdr, &info); - if (status != nfs_ok) - return status; - } else if (pf < RPC_AUTH_MAXFLAVOR) { - supported++; - - /* flavor */ - status = nfsd4_encode_uint32_t(xdr, pf); - if (status != nfs_ok) - return status; - } else { - if (report) - pr_warn("NFS: SECINFO: security flavor %u " - "is not supported\n", pf); - } + for (i = 0, supported = 0; i < nflavs; i++) { + status = nfsd4_encode_secinfo4(xdr, flavs[i].pseudoflavor, + &supported); + if (status != nfs_ok) + return status; } - if (nflavs != supported) - report = false; - *flavorsp = cpu_to_be32(supported); + wire_count = cpu_to_be32(supported); + write_bytes_to_xdr_buf(xdr->buf, count_offset, &wire_count, + XDR_UNIT); return 0; } @@ -4683,7 +4797,7 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_secinfo *secinfo = &u->secinfo; struct xdr_stream *xdr = resp->xdr; - return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp); + return nfsd4_encode_SECINFO4resok(xdr, secinfo->si_exp); } static __be32 @@ -4693,7 +4807,7 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_secinfo_no_name *secinfo = &u->secinfo_no_name; struct xdr_stream *xdr = resp->xdr; - return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp); + return nfsd4_encode_SECINFO4resok(xdr, secinfo->sin_exp); } static __be32 @@ -4968,7 +5082,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr != nfs_ok) return nfserr; /* sr_target_highest_slotid */ - nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1); + nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1); if (nfserr != nfs_ok) return nfserr; /* sr_status_flags */ @@ -5296,17 +5410,20 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, struct file *file = read->rd_nf->nf_file; struct xdr_stream *xdr = resp->xdr; bool splice_ok = argp->splice_ok; + unsigned int offset_offset; + __be32 nfserr, wire_count; unsigned long maxcount; - __be32 nfserr, *p; + __be64 wire_offset; - /* Content type, offset, byte count */ - p = xdr_reserve_space(xdr, 4 + 8 + 4); - if (!p) + if (xdr_stream_encode_u32(xdr, NFS4_CONTENT_DATA) != XDR_UNIT) return nfserr_io; - if (resp->xdr->buf->page_len && splice_ok) { - WARN_ON_ONCE(splice_ok); - return nfserr_serverfault; - } + + offset_offset = xdr->buf->len; + + /* Reserve space for the byte offset and count */ + if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT * 3))) + return nfserr_io; + xdr_commit_encode(xdr); maxcount = min_t(unsigned long, read->rd_length, (xdr->buf->buflen - xdr->buf->len)); @@ -5318,10 +5435,12 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, if (nfserr) return nfserr; - *p++ = cpu_to_be32(NFS4_CONTENT_DATA); - p = xdr_encode_hyper(p, read->rd_offset); - *p = cpu_to_be32(read->rd_length); - + wire_offset = cpu_to_be64(read->rd_offset); + write_bytes_to_xdr_buf(xdr->buf, offset_offset, &wire_offset, + XDR_UNIT * 2); + wire_count = cpu_to_be32(read->rd_length); + write_bytes_to_xdr_buf(xdr->buf, offset_offset + XDR_UNIT * 2, + &wire_count, XDR_UNIT); return nfs_ok; } @@ -5332,16 +5451,17 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_read *read = &u->read; struct file *file = read->rd_nf->nf_file; struct xdr_stream *xdr = resp->xdr; - int starting_len = xdr->buf->len; + unsigned int eof_offset; + __be32 wire_data[2]; u32 segments = 0; - __be32 *p; if (nfserr) return nfserr; - /* eof flag, segment count */ - p = xdr_reserve_space(xdr, 4 + 4); - if (!p) + eof_offset = xdr->buf->len; + + /* Reserve space for the eof flag and segment count */ + if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT * 2))) return nfserr_io; xdr_commit_encode(xdr); @@ -5351,15 +5471,16 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, nfserr = nfsd4_encode_read_plus_data(resp, read); if (nfserr) { - xdr_truncate_encode(xdr, starting_len); + xdr_truncate_encode(xdr, eof_offset); return nfserr; } segments++; out: - p = xdr_encode_bool(p, read->rd_eof); - *p = cpu_to_be32(segments); + wire_data[0] = read->rd_eof ? xdr_one : xdr_zero; + wire_data[1] = cpu_to_be32(segments); + write_bytes_to_xdr_buf(xdr->buf, eof_offset, &wire_data, XDR_UNIT * 2); return nfserr; } diff --git a/fs/nfsd/nfs4xdr_gen.c b/fs/nfsd/nfs4xdr_gen.c new file mode 100644 index 000000000000..a17b5d8e60b3 --- /dev/null +++ b/fs/nfsd/nfs4xdr_gen.c @@ -0,0 +1,256 @@ +// SPDX-License-Identifier: GPL-2.0 +// Generated by xdrgen. Manual edits will be lost. +// XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x +// XDR specification modification time: Mon Oct 14 09:10:13 2024 + +#include <linux/sunrpc/svc.h> + +#include "nfs4xdr_gen.h" + +static bool __maybe_unused +xdrgen_decode_int64_t(struct xdr_stream *xdr, int64_t *ptr) +{ + return xdrgen_decode_hyper(xdr, ptr); +}; + +static bool __maybe_unused +xdrgen_decode_uint32_t(struct xdr_stream *xdr, uint32_t *ptr) +{ + return xdrgen_decode_unsigned_int(xdr, ptr); +}; + +static bool __maybe_unused +xdrgen_decode_bitmap4(struct xdr_stream *xdr, bitmap4 *ptr) +{ + if (xdr_stream_decode_u32(xdr, &ptr->count) < 0) + return false; + for (u32 i = 0; i < ptr->count; i++) + if (!xdrgen_decode_uint32_t(xdr, &ptr->element[i])) + return false; + return true; +}; + +static bool __maybe_unused +xdrgen_decode_nfstime4(struct xdr_stream *xdr, struct nfstime4 *ptr) +{ + if (!xdrgen_decode_int64_t(xdr, &ptr->seconds)) + return false; + if (!xdrgen_decode_uint32_t(xdr, &ptr->nseconds)) + return false; + return true; +}; + +static bool __maybe_unused +xdrgen_decode_fattr4_offline(struct xdr_stream *xdr, fattr4_offline *ptr) +{ + return xdrgen_decode_bool(xdr, ptr); +}; + +static bool __maybe_unused +xdrgen_decode_open_arguments4(struct xdr_stream *xdr, struct open_arguments4 *ptr) +{ + if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_share_access)) + return false; + if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_share_deny)) + return false; + if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_share_access_want)) + return false; + if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_open_claim)) + return false; + if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_create_mode)) + return false; + return true; +}; + +static bool __maybe_unused +xdrgen_decode_open_args_share_access4(struct xdr_stream *xdr, open_args_share_access4 *ptr) +{ + u32 val; + + if (xdr_stream_decode_u32(xdr, &val) < 0) + return false; + *ptr = val; + return true; +} + +static bool __maybe_unused +xdrgen_decode_open_args_share_deny4(struct xdr_stream *xdr, open_args_share_deny4 *ptr) +{ + u32 val; + + if (xdr_stream_decode_u32(xdr, &val) < 0) + return false; + *ptr = val; + return true; +} + +static bool __maybe_unused +xdrgen_decode_open_args_share_access_want4(struct xdr_stream *xdr, open_args_share_access_want4 *ptr) +{ + u32 val; + + if (xdr_stream_decode_u32(xdr, &val) < 0) + return false; + *ptr = val; + return true; +} + +static bool __maybe_unused +xdrgen_decode_open_args_open_claim4(struct xdr_stream *xdr, open_args_open_claim4 *ptr) +{ + u32 val; + + if (xdr_stream_decode_u32(xdr, &val) < 0) + return false; + *ptr = val; + return true; +} + +static bool __maybe_unused +xdrgen_decode_open_args_createmode4(struct xdr_stream *xdr, open_args_createmode4 *ptr) +{ + u32 val; + + if (xdr_stream_decode_u32(xdr, &val) < 0) + return false; + *ptr = val; + return true; +} + +bool +xdrgen_decode_fattr4_open_arguments(struct xdr_stream *xdr, fattr4_open_arguments *ptr) +{ + return xdrgen_decode_open_arguments4(xdr, ptr); +}; + +bool +xdrgen_decode_fattr4_time_deleg_access(struct xdr_stream *xdr, fattr4_time_deleg_access *ptr) +{ + return xdrgen_decode_nfstime4(xdr, ptr); +}; + +bool +xdrgen_decode_fattr4_time_deleg_modify(struct xdr_stream *xdr, fattr4_time_deleg_modify *ptr) +{ + return xdrgen_decode_nfstime4(xdr, ptr); +}; + +static bool __maybe_unused +xdrgen_decode_open_delegation_type4(struct xdr_stream *xdr, open_delegation_type4 *ptr) +{ + u32 val; + + if (xdr_stream_decode_u32(xdr, &val) < 0) + return false; + *ptr = val; + return true; +} + +static bool __maybe_unused +xdrgen_encode_int64_t(struct xdr_stream *xdr, const int64_t value) +{ + return xdrgen_encode_hyper(xdr, value); +}; + +static bool __maybe_unused +xdrgen_encode_uint32_t(struct xdr_stream *xdr, const uint32_t value) +{ + return xdrgen_encode_unsigned_int(xdr, value); +}; + +static bool __maybe_unused +xdrgen_encode_bitmap4(struct xdr_stream *xdr, const bitmap4 value) +{ + if (xdr_stream_encode_u32(xdr, value.count) != XDR_UNIT) + return false; + for (u32 i = 0; i < value.count; i++) + if (!xdrgen_encode_uint32_t(xdr, value.element[i])) + return false; + return true; +}; + +static bool __maybe_unused +xdrgen_encode_nfstime4(struct xdr_stream *xdr, const struct nfstime4 *value) +{ + if (!xdrgen_encode_int64_t(xdr, value->seconds)) + return false; + if (!xdrgen_encode_uint32_t(xdr, value->nseconds)) + return false; + return true; +}; + +static bool __maybe_unused +xdrgen_encode_fattr4_offline(struct xdr_stream *xdr, const fattr4_offline value) +{ + return xdrgen_encode_bool(xdr, value); +}; + +static bool __maybe_unused +xdrgen_encode_open_arguments4(struct xdr_stream *xdr, const struct open_arguments4 *value) +{ + if (!xdrgen_encode_bitmap4(xdr, value->oa_share_access)) + return false; + if (!xdrgen_encode_bitmap4(xdr, value->oa_share_deny)) + return false; + if (!xdrgen_encode_bitmap4(xdr, value->oa_share_access_want)) + return false; + if (!xdrgen_encode_bitmap4(xdr, value->oa_open_claim)) + return false; + if (!xdrgen_encode_bitmap4(xdr, value->oa_create_mode)) + return false; + return true; +}; + +static bool __maybe_unused +xdrgen_encode_open_args_share_access4(struct xdr_stream *xdr, open_args_share_access4 value) +{ + return xdr_stream_encode_u32(xdr, value) == XDR_UNIT; +} + +static bool __maybe_unused +xdrgen_encode_open_args_share_deny4(struct xdr_stream *xdr, open_args_share_deny4 value) +{ + return xdr_stream_encode_u32(xdr, value) == XDR_UNIT; +} + +static bool __maybe_unused +xdrgen_encode_open_args_share_access_want4(struct xdr_stream *xdr, open_args_share_access_want4 value) +{ + return xdr_stream_encode_u32(xdr, value) == XDR_UNIT; +} + +static bool __maybe_unused +xdrgen_encode_open_args_open_claim4(struct xdr_stream *xdr, open_args_open_claim4 value) +{ + return xdr_stream_encode_u32(xdr, value) == XDR_UNIT; +} + +static bool __maybe_unused +xdrgen_encode_open_args_createmode4(struct xdr_stream *xdr, open_args_createmode4 value) +{ + return xdr_stream_encode_u32(xdr, value) == XDR_UNIT; +} + +bool +xdrgen_encode_fattr4_open_arguments(struct xdr_stream *xdr, const fattr4_open_arguments *value) +{ + return xdrgen_encode_open_arguments4(xdr, value); +}; + +bool +xdrgen_encode_fattr4_time_deleg_access(struct xdr_stream *xdr, const fattr4_time_deleg_access *value) +{ + return xdrgen_encode_nfstime4(xdr, value); +}; + +bool +xdrgen_encode_fattr4_time_deleg_modify(struct xdr_stream *xdr, const fattr4_time_deleg_modify *value) +{ + return xdrgen_encode_nfstime4(xdr, value); +}; + +static bool __maybe_unused +xdrgen_encode_open_delegation_type4(struct xdr_stream *xdr, open_delegation_type4 value) +{ + return xdr_stream_encode_u32(xdr, value) == XDR_UNIT; +} diff --git a/fs/nfsd/nfs4xdr_gen.h b/fs/nfsd/nfs4xdr_gen.h new file mode 100644 index 000000000000..41a0033b7256 --- /dev/null +++ b/fs/nfsd/nfs4xdr_gen.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Generated by xdrgen. Manual edits will be lost. */ +/* XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x */ +/* XDR specification modification time: Mon Oct 14 09:10:13 2024 */ + +#ifndef _LINUX_XDRGEN_NFS4_1_DECL_H +#define _LINUX_XDRGEN_NFS4_1_DECL_H + +#include <linux/types.h> + +#include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/xdrgen/_defs.h> +#include <linux/sunrpc/xdrgen/_builtins.h> +#include <linux/sunrpc/xdrgen/nfs4_1.h> + +bool xdrgen_decode_fattr4_open_arguments(struct xdr_stream *xdr, fattr4_open_arguments *ptr); +bool xdrgen_encode_fattr4_open_arguments(struct xdr_stream *xdr, const fattr4_open_arguments *value); + +bool xdrgen_decode_fattr4_time_deleg_access(struct xdr_stream *xdr, fattr4_time_deleg_access *ptr); +bool xdrgen_encode_fattr4_time_deleg_access(struct xdr_stream *xdr, const fattr4_time_deleg_access *value); + +bool xdrgen_decode_fattr4_time_deleg_modify(struct xdr_stream *xdr, fattr4_time_deleg_modify *ptr); +bool xdrgen_encode_fattr4_time_deleg_modify(struct xdr_stream *xdr, const fattr4_time_deleg_modify *value); + +#endif /* _LINUX_XDRGEN_NFS4_1_DECL_H */ diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 3adbc05ebaac..332bfb508c20 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -48,7 +48,6 @@ enum { NFSD_Versions, NFSD_Ports, NFSD_MaxBlkSize, - NFSD_MaxConnections, NFSD_Filecache, NFSD_Leasetime, NFSD_Gracetime, @@ -68,7 +67,6 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size); static ssize_t write_versions(struct file *file, char *buf, size_t size); static ssize_t write_ports(struct file *file, char *buf, size_t size); static ssize_t write_maxblksize(struct file *file, char *buf, size_t size); -static ssize_t write_maxconn(struct file *file, char *buf, size_t size); #ifdef CONFIG_NFSD_V4 static ssize_t write_leasetime(struct file *file, char *buf, size_t size); static ssize_t write_gracetime(struct file *file, char *buf, size_t size); @@ -87,7 +85,6 @@ static ssize_t (*const write_op[])(struct file *, char *, size_t) = { [NFSD_Versions] = write_versions, [NFSD_Ports] = write_ports, [NFSD_MaxBlkSize] = write_maxblksize, - [NFSD_MaxConnections] = write_maxconn, #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = write_leasetime, [NFSD_Gracetime] = write_gracetime, @@ -902,44 +899,6 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) nfsd_max_blksize); } -/* - * write_maxconn - Set or report the current max number of connections - * - * Input: - * buf: ignored - * size: zero - * OR - * - * Input: - * buf: C string containing an unsigned - * integer value representing the new - * number of max connections - * size: non-zero length of C string in @buf - * Output: - * On success: passed-in buffer filled with '\n'-terminated C string - * containing numeric value of max_connections setting - * for this net namespace; - * return code is the size in bytes of the string - * On error: return code is zero or a negative errno value - */ -static ssize_t write_maxconn(struct file *file, char *buf, size_t size) -{ - char *mesg = buf; - struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); - unsigned int maxconn = nn->max_connections; - - if (size > 0) { - int rv = get_uint(&mesg, &maxconn); - - if (rv) - return rv; - trace_nfsd_ctl_maxconn(netns(file), maxconn); - nn->max_connections = maxconn; - } - - return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%u\n", maxconn); -} - #ifdef CONFIG_NFSD_V4 static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time64_t *time, struct nfsd_net *nn) @@ -1372,7 +1331,6 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, - [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO}, #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, @@ -1653,7 +1611,7 @@ out_unlock: */ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info) { - int *nthreads, count = 0, nrpools, i, ret = -EOPNOTSUPP, rem; + int *nthreads, nrpools = 0, i, ret = -EOPNOTSUPP, rem; struct net *net = genl_info_net(info); struct nfsd_net *nn = net_generic(net, nfsd_net_id); const struct nlattr *attr; @@ -1665,12 +1623,11 @@ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info) /* count number of SERVER_THREADS values */ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { if (nla_type(attr) == NFSD_A_SERVER_THREADS) - count++; + nrpools++; } mutex_lock(&nfsd_mutex); - nrpools = max(count, nfsd_nrpools(net)); nthreads = kcalloc(nrpools, sizeof(int), GFP_KERNEL); if (!nthreads) { ret = -ENOMEM; @@ -1959,6 +1916,7 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info) struct svc_serv *serv; LIST_HEAD(permsocks); struct nfsd_net *nn; + bool delete = false; int err, rem; mutex_lock(&nfsd_mutex); @@ -2019,34 +1977,28 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info) } } - /* For now, no removing old sockets while server is running */ - if (serv->sv_nrthreads && !list_empty(&permsocks)) { + /* + * If there are listener transports remaining on the permsocks list, + * it means we were asked to remove a listener. + */ + if (!list_empty(&permsocks)) { list_splice_init(&permsocks, &serv->sv_permsocks); - spin_unlock_bh(&serv->sv_lock); - err = -EBUSY; - goto out_unlock_mtx; + delete = true; } + spin_unlock_bh(&serv->sv_lock); - /* Close the remaining sockets on the permsocks list */ - while (!list_empty(&permsocks)) { - xprt = list_first_entry(&permsocks, struct svc_xprt, xpt_list); - list_move(&xprt->xpt_list, &serv->sv_permsocks); - - /* - * Newly-created sockets are born with the BUSY bit set. Clear - * it if there are no threads, since nothing can pick it up - * in that case. - */ - if (!serv->sv_nrthreads) - clear_bit(XPT_BUSY, &xprt->xpt_flags); - - set_bit(XPT_CLOSE, &xprt->xpt_flags); - spin_unlock_bh(&serv->sv_lock); - svc_xprt_close(xprt); - spin_lock_bh(&serv->sv_lock); + /* Do not remove listeners while there are active threads. */ + if (serv->sv_nrthreads) { + err = -EBUSY; + goto out_unlock_mtx; } - spin_unlock_bh(&serv->sv_lock); + /* + * Since we can't delete an arbitrary llist entry, destroy the + * remaining listeners and recreate the list. + */ + if (delete) + svc_xprt_destroy_all(serv, net); /* walk list of addrs again, open any that still don't exist */ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { @@ -2073,6 +2025,9 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info) xprt = svc_find_listener(serv, xcl_name, net, sa); if (xprt) { + if (delete) + WARN_ONCE(1, "Transport type=%s already exists\n", + xcl_name); svc_xprt_put(xprt); continue; } @@ -2246,8 +2201,14 @@ static __net_init int nfsd_net_init(struct net *net) NFSD_STATS_COUNTERS_NUM); if (retval) goto out_repcache_error; + memset(&nn->nfsd_svcstats, 0, sizeof(nn->nfsd_svcstats)); nn->nfsd_svcstats.program = &nfsd_programs[0]; + if (!nfsd_proc_stat_init(net)) { + retval = -ENOMEM; + goto out_proc_error; + } + for (i = 0; i < sizeof(nn->nfsd_versions); i++) nn->nfsd_versions[i] = nfsd_support_version(i); for (i = 0; i < sizeof(nn->nfsd4_minorversions); i++) @@ -2257,12 +2218,14 @@ static __net_init int nfsd_net_init(struct net *net) nfsd4_init_leases_net(nn); get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); seqlock_init(&nn->writeverf_lock); - nfsd_proc_stat_init(net); #if IS_ENABLED(CONFIG_NFS_LOCALIO) + spin_lock_init(&nn->local_clients_lock); INIT_LIST_HEAD(&nn->local_clients); #endif return 0; +out_proc_error: + percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM); out_repcache_error: nfsd_idmap_shutdown(net); out_idmap_error: @@ -2276,14 +2239,15 @@ out_export_error: * nfsd_net_pre_exit - Disconnect localio clients from net namespace * @net: a network namespace that is about to be destroyed * - * This invalidated ->net pointers held by localio clients + * This invalidates ->net pointers held by localio clients * while they can still safely access nn->counter. */ static __net_exit void nfsd_net_pre_exit(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - nfs_uuid_invalidate_clients(&nn->local_clients); + nfs_localio_invalidate_clients(&nn->local_clients, + &nn->local_clients_lock); } #endif @@ -2326,12 +2290,9 @@ static int __init init_nfsd(void) if (retval) goto out_free_pnfs; nfsd_lockd_init(); /* lockd->nfsd callbacks */ - retval = create_proc_exports_entry(); - if (retval) - goto out_free_lockd; retval = register_pernet_subsys(&nfsd_net_ops); if (retval < 0) - goto out_free_exports; + goto out_free_lockd; retval = register_cld_notifier(); if (retval) goto out_free_subsys; @@ -2340,22 +2301,26 @@ static int __init init_nfsd(void) goto out_free_cld; retval = register_filesystem(&nfsd_fs_type); if (retval) - goto out_free_all; + goto out_free_nfsd4; retval = genl_register_family(&nfsd_nl_family); if (retval) + goto out_free_filesystem; + retval = create_proc_exports_entry(); + if (retval) goto out_free_all; nfsd_localio_ops_init(); return 0; out_free_all: + genl_unregister_family(&nfsd_nl_family); +out_free_filesystem: + unregister_filesystem(&nfsd_fs_type); +out_free_nfsd4: nfsd4_destroy_laundry_wq(); out_free_cld: unregister_cld_notifier(); out_free_subsys: unregister_pernet_subsys(&nfsd_net_ops); -out_free_exports: - remove_proc_entry("fs/nfs/exports", NULL); - remove_proc_entry("fs/nfs", NULL); out_free_lockd: nfsd_lockd_shutdown(); nfsd_drc_slab_free(); @@ -2368,14 +2333,14 @@ out_free_slabs: static void __exit exit_nfsd(void) { + remove_proc_entry("fs/nfs/exports", NULL); + remove_proc_entry("fs/nfs", NULL); genl_unregister_family(&nfsd_nl_family); unregister_filesystem(&nfsd_fs_type); nfsd4_destroy_laundry_wq(); unregister_cld_notifier(); unregister_pernet_subsys(&nfsd_net_ops); nfsd_drc_slab_free(); - remove_proc_entry("fs/nfs/exports", NULL); - remove_proc_entry("fs/nfs", NULL); nfsd_lockd_shutdown(); nfsd4_free_slabs(); nfsd4_exit_pnfs(); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 4b56ba1e8e48..e2997f0ffbc5 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -88,9 +88,6 @@ struct nfsd_genl_rqstp { extern struct svc_program nfsd_programs[]; extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; extern struct mutex nfsd_mutex; -extern spinlock_t nfsd_drc_lock; -extern unsigned long nfsd_drc_max_mem; -extern unsigned long nfsd_drc_mem_used; extern atomic_t nfsd_th_cnt; /* number of available threads */ extern const struct seq_operations nfs_exports_op; @@ -458,7 +455,10 @@ enum { (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \ FATTR4_WORD2_MODE_UMASK | \ NFSD4_2_SECURITY_ATTRS | \ - FATTR4_WORD2_XATTR_SUPPORT) + FATTR4_WORD2_XATTR_SUPPORT | \ + FATTR4_WORD2_TIME_DELEG_ACCESS | \ + FATTR4_WORD2_TIME_DELEG_MODIFY | \ + FATTR4_WORD2_OPEN_ARGUMENTS) extern const u32 nfsd_suppattrs[3][3]; @@ -528,7 +528,10 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval) #endif #define NFSD_WRITEABLE_ATTRS_WORD2 \ (FATTR4_WORD2_MODE_UMASK \ - | MAYBE_FATTR4_WORD2_SECURITY_LABEL) + | MAYBE_FATTR4_WORD2_SECURITY_LABEL \ + | FATTR4_WORD2_TIME_DELEG_ACCESS \ + | FATTR4_WORD2_TIME_DELEG_MODIFY \ + ) #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ NFSD_WRITEABLE_ATTRS_WORD0 diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 6a831cb242df..aef474f1b84b 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -222,7 +222,6 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net, cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); put_cred(override_creds(new)); - put_cred(new); } else { error = nfsd_setuser_and_check_port(rqstp, cred, exp); if (error) @@ -381,6 +380,9 @@ __fh_verify(struct svc_rqst *rqstp, error = check_nfsd_access(exp, rqstp, may_bypass_gss); if (error) goto out; + /* During LOCALIO call to fh_verify will be called with a NULL rqstp */ + if (rqstp) + svc_xprt_set_valid(rqstp->rq_xprt); /* Finally, check access permissions. */ error = nfsd_permission(cred, exp, dentry, access); diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 876152a91f12..5103c2f4d225 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -267,7 +267,6 @@ static inline bool fh_fsid_match(const struct knfsd_fh *fh1, return true; } -#ifdef CONFIG_CRC32 /** * knfsd_fh_hash - calculate the crc32 hash for the filehandle * @fh - pointer to filehandle @@ -279,12 +278,6 @@ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) { return ~crc32_le(0xFFFFFFFF, fh->fh_raw, fh->fh_size); } -#else -static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) -{ - return 0; -} -#endif /** * fh_clear_pre_post_attrs - Reset pre/post attributes diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 49e2f32102ab..8ed143ef8b41 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -70,16 +70,6 @@ static __be32 nfsd_init_request(struct svc_rqst *, */ DEFINE_MUTEX(nfsd_mutex); -/* - * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used. - * nfsd_drc_max_pages limits the total amount of memory available for - * version 4.1 DRC caches. - * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. - */ -DEFINE_SPINLOCK(nfsd_drc_lock); -unsigned long nfsd_drc_max_mem; -unsigned long nfsd_drc_mem_used; - #if IS_ENABLED(CONFIG_NFS_LOCALIO) static const struct svc_version *localio_versions[] = { [1] = &localio_version1, @@ -214,32 +204,32 @@ int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change return 0; } -bool nfsd_serv_try_get(struct net *net) __must_hold(rcu) +bool nfsd_net_try_get(struct net *net) __must_hold(rcu) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - return (nn && percpu_ref_tryget_live(&nn->nfsd_serv_ref)); + return (nn && percpu_ref_tryget_live(&nn->nfsd_net_ref)); } -void nfsd_serv_put(struct net *net) __must_hold(rcu) +void nfsd_net_put(struct net *net) __must_hold(rcu) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - percpu_ref_put(&nn->nfsd_serv_ref); + percpu_ref_put(&nn->nfsd_net_ref); } -static void nfsd_serv_done(struct percpu_ref *ref) +static void nfsd_net_done(struct percpu_ref *ref) { - struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_serv_ref); + struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_net_ref); - complete(&nn->nfsd_serv_confirm_done); + complete(&nn->nfsd_net_confirm_done); } -static void nfsd_serv_free(struct percpu_ref *ref) +static void nfsd_net_free(struct percpu_ref *ref) { - struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_serv_ref); + struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_net_ref); - complete(&nn->nfsd_serv_free_done); + complete(&nn->nfsd_net_free_done); } /* @@ -406,13 +396,13 @@ static int nfsd_startup_net(struct net *net, const struct cred *cred) if (ret) goto out_filecache; +#ifdef CONFIG_NFSD_V4_2_INTER_SSC + nfsd4_ssc_init_umount_work(nn); +#endif ret = nfs4_state_start_net(net); if (ret) goto out_reply_cache; -#ifdef CONFIG_NFSD_V4_2_INTER_SSC - nfsd4_ssc_init_umount_work(nn); -#endif nn->nfsd_net_up = true; return 0; @@ -436,6 +426,10 @@ static void nfsd_shutdown_net(struct net *net) if (!nn->nfsd_net_up) return; + + percpu_ref_kill_and_confirm(&nn->nfsd_net_ref, nfsd_net_done); + wait_for_completion(&nn->nfsd_net_confirm_done); + nfsd_export_flush(net); nfs4_state_shutdown_net(net); nfsd_reply_cache_shutdown(nn); @@ -444,7 +438,10 @@ static void nfsd_shutdown_net(struct net *net) lockd_down(net); nn->lockd_up = false; } - percpu_ref_exit(&nn->nfsd_serv_ref); + + wait_for_completion(&nn->nfsd_net_free_done); + percpu_ref_exit(&nn->nfsd_net_ref); + nn->nfsd_net_up = false; nfsd_shutdown_generic(); } @@ -526,11 +523,6 @@ void nfsd_destroy_serv(struct net *net) lockdep_assert_held(&nfsd_mutex); - percpu_ref_kill_and_confirm(&nn->nfsd_serv_ref, nfsd_serv_done); - wait_for_completion(&nn->nfsd_serv_confirm_done); - wait_for_completion(&nn->nfsd_serv_free_done); - /* percpu_ref_exit is called in nfsd_shutdown_net */ - spin_lock(&nfsd_notifier_lock); nn->nfsd_serv = NULL; spin_unlock(&nfsd_notifier_lock); @@ -575,27 +567,6 @@ void nfsd_reset_versions(struct nfsd_net *nn) } } -/* - * Each session guarantees a negotiated per slot memory cache for replies - * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated - * NFSv4.1 server might want to use more memory for a DRC than a machine - * with mutiple services. - * - * Impose a hard limit on the number of pages for the DRC which varies - * according to the machines free pages. This is of course only a default. - * - * For now this is a #defined shift which could be under admin control - * in the future. - */ -static void set_max_drc(void) -{ - #define NFSD_DRC_SIZE_SHIFT 7 - nfsd_drc_max_mem = (nr_free_buffer_pages() - >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; - nfsd_drc_mem_used = 0; - dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem); -} - static int nfsd_get_default_max_blksize(void) { struct sysinfo i; @@ -652,12 +623,12 @@ int nfsd_create_serv(struct net *net) if (nn->nfsd_serv) return 0; - error = percpu_ref_init(&nn->nfsd_serv_ref, nfsd_serv_free, + error = percpu_ref_init(&nn->nfsd_net_ref, nfsd_net_free, 0, GFP_KERNEL); if (error) return error; - init_completion(&nn->nfsd_serv_free_done); - init_completion(&nn->nfsd_serv_confirm_done); + init_completion(&nn->nfsd_net_free_done); + init_completion(&nn->nfsd_net_confirm_done); if (nfsd_max_blksize == 0) nfsd_max_blksize = nfsd_get_default_max_blksize(); @@ -668,7 +639,6 @@ int nfsd_create_serv(struct net *net) if (serv == NULL) return -ENOMEM; - serv->sv_maxconn = nn->max_connections; error = svc_bind(serv, net); if (error < 0) { svc_destroy(&serv); @@ -678,7 +648,6 @@ int nfsd_create_serv(struct net *net) nn->nfsd_serv = serv; spin_unlock(&nfsd_notifier_lock); - set_max_drc(); /* check if the notifier is already set */ if (atomic_inc_return(&nfsd_notifier_refcount) == 1) { register_inetaddr_notifier(&nfsd_inetaddr_notifier); @@ -954,11 +923,7 @@ nfsd(void *vrqstp) * The main request loop */ while (!svc_thread_should_stop(rqstp)) { - /* Update sv_maxconn if it has changed */ - rqstp->rq_server->sv_maxconn = nn->max_connections; - svc_recv(rqstp); - nfsd_file_net_dispose(nn); } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index e16bb3717fb9..290e29dd43eb 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -67,12 +67,15 @@ typedef struct { struct nfsd4_callback { struct nfs4_client *cb_clp; struct rpc_message cb_msg; +#define NFSD4_CALLBACK_RUNNING (0) +#define NFSD4_CALLBACK_WAKE (1) +#define NFSD4_CALLBACK_REQUEUE (2) + unsigned long cb_flags; const struct nfsd4_callback_ops *cb_ops; struct work_struct cb_work; int cb_seq_status; int cb_status; int cb_held_slot; - bool cb_need_restart; }; struct nfsd4_callback_ops { @@ -159,16 +162,14 @@ struct nfs4_cb_fattr { /* from CB_GETATTR reply */ u64 ncf_cb_change; u64 ncf_cb_fsize; + struct timespec64 ncf_cb_mtime; + struct timespec64 ncf_cb_atime; - unsigned long ncf_cb_flags; bool ncf_file_modified; u64 ncf_initial_cinfo; u64 ncf_cur_fsize; }; -/* bits for ncf_cb_flags */ -#define CB_GETATTR_BUSY 0 - /* * Represents a delegation stateid. The nfs4_client holds references to these * and they are put when it is being destroyed or when the delegation is @@ -196,8 +197,8 @@ struct nfs4_delegation { struct list_head dl_perclnt; struct list_head dl_recall_lru; /* delegation recalled */ struct nfs4_clnt_odstate *dl_clnt_odstate; - u32 dl_type; time64_t dl_time; + u32 dl_type; /* For recall: */ int dl_retries; struct nfsd4_callback dl_recall; @@ -207,6 +208,22 @@ struct nfs4_delegation { struct nfs4_cb_fattr dl_cb_fattr; }; +static inline bool deleg_is_read(u32 dl_type) +{ + return (dl_type == OPEN_DELEGATE_READ || dl_type == OPEN_DELEGATE_READ_ATTRS_DELEG); +} + +static inline bool deleg_is_write(u32 dl_type) +{ + return (dl_type == OPEN_DELEGATE_WRITE || dl_type == OPEN_DELEGATE_WRITE_ATTRS_DELEG); +} + +static inline bool deleg_attrs_deleg(u32 dl_type) +{ + return dl_type == OPEN_DELEGATE_READ_ATTRS_DELEG || + dl_type == OPEN_DELEGATE_WRITE_ATTRS_DELEG; +} + #define cb_to_delegation(cb) \ container_of(cb, struct nfs4_delegation, dl_recall) @@ -227,8 +244,11 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) return container_of(s, struct nfs4_delegation, dl_stid); } -/* Maximum number of slots per session. 160 is useful for long haul TCP */ -#define NFSD_MAX_SLOTS_PER_SESSION 160 +/* Maximum number of slots per session. This is for sanity-check only. + * It could be increased if we had a mechanism to shutdown misbehaving clients. + * A large number can be needed to get good throughput on high-latency servers. + */ +#define NFSD_MAX_SLOTS_PER_SESSION 2048 /* Maximum session per slot cache size */ #define NFSD_SLOT_CACHE_SIZE 2048 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ @@ -242,10 +262,12 @@ struct nfsd4_slot { struct svc_cred sl_cred; u32 sl_datalen; u16 sl_opcnt; + u16 sl_generation; #define NFSD4_SLOT_INUSE (1 << 0) #define NFSD4_SLOT_CACHETHIS (1 << 1) #define NFSD4_SLOT_INITIALIZED (1 << 2) #define NFSD4_SLOT_CACHED (1 << 3) +#define NFSD4_SLOT_REUSED (1 << 4) u8 sl_flags; char sl_data[]; }; @@ -318,16 +340,19 @@ struct nfsd4_session { u32 se_cb_slot_avail; /* bitmap of available slots */ u32 se_cb_highest_slot; /* highest slot client wants */ u32 se_cb_prog; - bool se_dead; struct list_head se_hash; /* hash by sessionid */ struct list_head se_perclnt; + struct list_head se_all_sessions;/* global list of sessions */ struct nfs4_client *se_client; struct nfs4_sessionid se_sessionid; struct nfsd4_channel_attrs se_fchannel; struct nfsd4_cb_sec se_cb_sec; struct list_head se_conns; u32 se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE]; - struct nfsd4_slot *se_slots[]; /* forward channel slots */ + struct xarray se_slots; /* forward channel slots */ + u16 se_slot_gen; + bool se_dead; + u32 se_target_maxslots; }; /* formatted contents of nfs4_sessionid */ @@ -426,7 +451,6 @@ struct nfs4_client { #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */ #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ 1 << NFSD4_CLIENT_CB_KILL) -#define NFSD4_CLIENT_CB_RECALL_ANY (6) unsigned long cl_flags; struct workqueue_struct *cl_callback_wq; @@ -472,7 +496,6 @@ struct nfs4_client { struct nfsd4_cb_recall_any *cl_ra; time64_t cl_ra_time; - struct list_head cl_ra_cblist; }; /* struct nfs4_client_reset @@ -505,7 +528,7 @@ struct nfs4_replay { unsigned int rp_buflen; char *rp_buf; struct knfsd_fh rp_openfh; - atomic_t rp_locked; + int rp_locked; char rp_ibuf[NFSD4_REPLAY_ISIZE]; }; @@ -754,6 +777,13 @@ extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn * extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op); extern bool nfsd4_run_cb(struct nfsd4_callback *cb); + +static inline void nfsd4_try_run_cb(struct nfsd4_callback *cb) +{ + if (!test_and_set_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags)) + WARN_ON_ONCE(!nfsd4_run_cb(cb)); +} + extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfsd4_shutdown_copy(struct nfs4_client *clp); void nfsd4_async_copy_reaper(struct nfsd_net *nn); diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index bb22893f1157..f7eaf95e20fc 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -73,11 +73,11 @@ static int nfsd_show(struct seq_file *seq, void *v) DEFINE_PROC_SHOW_ATTRIBUTE(nfsd); -void nfsd_proc_stat_init(struct net *net) +struct proc_dir_entry *nfsd_proc_stat_init(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops); + return svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops); } void nfsd_proc_stat_shutdown(struct net *net) diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h index 04aacb6c36e2..e4efb0e4e56d 100644 --- a/fs/nfsd/stats.h +++ b/fs/nfsd/stats.h @@ -10,7 +10,7 @@ #include <uapi/linux/nfsd/stats.h> #include <linux/percpu_counter.h> -void nfsd_proc_stat_init(struct net *net); +struct proc_dir_entry *nfsd_proc_stat_init(struct net *net); void nfsd_proc_stat_shutdown(struct net *net); static inline void nfsd_stats_rc_hits_inc(struct nfsd_net *nn) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 696c89f68a9e..a7630e9f6577 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -626,7 +626,6 @@ DEFINE_STATEID_EVENT(open); DEFINE_STATEID_EVENT(deleg_read); DEFINE_STATEID_EVENT(deleg_write); DEFINE_STATEID_EVENT(deleg_return); -DEFINE_STATEID_EVENT(deleg_recall); DECLARE_EVENT_CLASS(nfsd_stateseqid_class, TP_PROTO(u32 seqid, const stateid_t *stp), @@ -804,6 +803,14 @@ DEFINE_EVENT(nfsd_cs_slot_class, nfsd_##name, \ DEFINE_CS_SLOT_EVENT(slot_seqid_conf); DEFINE_CS_SLOT_EVENT(slot_seqid_unconf); +#define show_nfs_slot_flags(val) \ + __print_flags(val, "|", \ + { NFSD4_SLOT_INUSE, "INUSE" }, \ + { NFSD4_SLOT_CACHETHIS, "CACHETHIS" }, \ + { NFSD4_SLOT_INITIALIZED, "INITIALIZED" }, \ + { NFSD4_SLOT_CACHED, "CACHED" }, \ + { NFSD4_SLOT_REUSED, "REUSED" }) + TRACE_EVENT(nfsd_slot_seqid_sequence, TP_PROTO( const struct nfs4_client *clp, @@ -814,10 +821,11 @@ TRACE_EVENT(nfsd_slot_seqid_sequence, TP_STRUCT__entry( __field(u32, seqid) __field(u32, slot_seqid) + __field(u32, slot_index) + __field(unsigned long, slot_flags) __field(u32, cl_boot) __field(u32, cl_id) __sockaddr(addr, clp->cl_cb_conn.cb_addrlen) - __field(bool, in_use) ), TP_fast_assign( __entry->cl_boot = clp->cl_clientid.cl_boot; @@ -826,11 +834,13 @@ TRACE_EVENT(nfsd_slot_seqid_sequence, clp->cl_cb_conn.cb_addrlen); __entry->seqid = seq->seqid; __entry->slot_seqid = slot->sl_seqid; + __entry->slot_index = seq->slotid; + __entry->slot_flags = slot->sl_flags; ), - TP_printk("addr=%pISpc client %08x:%08x seqid=%u slot_seqid=%u (%sin use)", + TP_printk("addr=%pISpc client %08x:%08x idx=%u seqid=%u slot_seqid=%u flags=%s", __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, - __entry->seqid, __entry->slot_seqid, - __entry->in_use ? "" : "not " + __entry->slot_index, __entry->seqid, __entry->slot_seqid, + show_nfs_slot_flags(__entry->slot_flags) ) ); @@ -1040,6 +1050,7 @@ DEFINE_CLID_EVENT(confirmed_r); { 1 << NFSD_FILE_HASHED, "HASHED" }, \ { 1 << NFSD_FILE_PENDING, "PENDING" }, \ { 1 << NFSD_FILE_REFERENCED, "REFERENCED" }, \ + { 1 << NFSD_FILE_RECENT, "RECENT" }, \ { 1 << NFSD_FILE_GC, "GC" }) DECLARE_EVENT_CLASS(nfsd_file_class, @@ -1318,6 +1329,7 @@ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_aged); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed); DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class, @@ -1347,6 +1359,7 @@ DEFINE_EVENT(nfsd_file_lruwalk_class, name, \ TP_ARGS(removed, remaining)) DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed); +DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_recent); DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed); TRACE_EVENT(nfsd_file_close, @@ -1603,7 +1616,7 @@ DECLARE_EVENT_CLASS(nfsd_cb_lifetime_class, __entry->cl_id = clp->cl_clientid.cl_id; __entry->cb = cb; __entry->opcode = cb->cb_ops ? cb->cb_ops->opcode : _CB_NULL; - __entry->need_restart = cb->cb_need_restart; + __entry->need_restart = test_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags); __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, clp->cl_cb_conn.cb_addrlen) ), diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 29cb7b812d71..a7c8eac5761a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -71,7 +71,6 @@ nfserrno (int errno) { nfserr_acces, -EACCES }, { nfserr_exist, -EEXIST }, { nfserr_xdev, -EXDEV }, - { nfserr_mlink, -EMLINK }, { nfserr_nodev, -ENODEV }, { nfserr_notdir, -ENOTDIR }, { nfserr_isdir, -EISDIR }, @@ -467,7 +466,15 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap) if (!iap->ia_valid) return 0; - iap->ia_valid |= ATTR_CTIME; + /* + * If ATTR_DELEG is set, then this is an update from a client that + * holds a delegation. If this is an update for only the atime, the + * ctime should not be changed. If the update contains the mtime + * too, then ATTR_CTIME should already be set. + */ + if (!(iap->ia_valid & ATTR_DELEG)) + iap->ia_valid |= ATTR_CTIME; + return notify_change(&nop_mnt_idmap, dentry, iap, NULL); } @@ -1461,7 +1468,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, struct inode *dirp; struct iattr *iap = attrs->na_iattr; __be32 err; - int host_err; + int host_err = 0; dentry = fhp->fh_dentry; dirp = d_inode(dentry); @@ -1488,28 +1495,15 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_check_ignore_resizing(iap); break; case S_IFDIR: - host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); - if (!host_err && unlikely(d_unhashed(dchild))) { - struct dentry *d; - d = lookup_one_len(dchild->d_name.name, - dchild->d_parent, - dchild->d_name.len); - if (IS_ERR(d)) { - host_err = PTR_ERR(d); - break; - } - if (unlikely(d_is_negative(d))) { - dput(d); - err = nfserr_serverfault; - goto out; - } + dchild = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); + if (IS_ERR(dchild)) { + host_err = PTR_ERR(dchild); + } else if (d_is_negative(dchild)) { + err = nfserr_serverfault; + goto out; + } else if (unlikely(dchild != resfhp->fh_dentry)) { dput(resfhp->fh_dentry); - resfhp->fh_dentry = dget(d); - err = fh_update(resfhp); - dput(dchild); - dchild = d; - if (err) - goto out; + resfhp->fh_dentry = dget(dchild); } break; case S_IFCHR: @@ -1530,7 +1524,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs); out: - dput(dchild); + if (!IS_ERR(dchild)) + dput(dchild); return err; out_nfserr: @@ -1699,9 +1694,17 @@ out: return err; } -/* - * Create a hardlink - * N.B. After this call _both_ ffhp and tfhp need an fh_put +/** + * nfsd_link - create a link + * @rqstp: RPC transaction context + * @ffhp: the file handle of the directory where the new link is to be created + * @name: the filename of the new link + * @len: the length of @name in octets + * @tfhp: the file handle of an existing file object + * + * After this call _both_ ffhp and tfhp need an fh_put. + * + * Returns a generic NFS status code in network byte-order. */ __be32 nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, @@ -1709,6 +1712,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, { struct dentry *ddir, *dnew, *dold; struct inode *dirp; + int type; __be32 err; int host_err; @@ -1728,11 +1732,11 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, if (isdotent(name, len)) goto out; + err = nfs_ok; + type = d_inode(tfhp->fh_dentry)->i_mode & S_IFMT; host_err = fh_want_write(tfhp); - if (host_err) { - err = nfserrno(host_err); + if (host_err) goto out; - } ddir = ffhp->fh_dentry; dirp = d_inode(ddir); @@ -1740,7 +1744,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, dnew = lookup_one_len(name, ddir, len); if (IS_ERR(dnew)) { - err = nfserrno(PTR_ERR(dnew)); + host_err = PTR_ERR(dnew); goto out_unlock; } @@ -1756,17 +1760,26 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, fh_fill_post_attrs(ffhp); inode_unlock(dirp); if (!host_err) { - err = nfserrno(commit_metadata(ffhp)); - if (!err) - err = nfserrno(commit_metadata(tfhp)); - } else { - err = nfserrno(host_err); + host_err = commit_metadata(ffhp); + if (!host_err) + host_err = commit_metadata(tfhp); } + dput(dnew); out_drop_write: fh_drop_write(tfhp); + if (host_err == -EBUSY) { + /* + * See RFC 8881 Section 18.9.4 para 1-2: NFSv4 LINK + * wants a status unique to the object type. + */ + if (type != S_IFDIR) + err = nfserr_file_open; + else + err = nfserr_acces; + } out: - return err; + return err != nfs_ok ? err : nfserrno(host_err); out_dput: dput(dnew); @@ -1795,9 +1808,19 @@ nfsd_has_cached_files(struct dentry *dentry) return ret; } -/* - * Rename a file - * N.B. After this call _both_ ffhp and tfhp need an fh_put +/** + * nfsd_rename - rename a directory entry + * @rqstp: RPC transaction context + * @ffhp: the file handle of parent directory containing the entry to be renamed + * @fname: the filename of directory entry to be renamed + * @flen: the length of @fname in octets + * @tfhp: the file handle of parent directory to contain the renamed entry + * @tname: the filename of the new entry + * @tlen: the length of @tlen in octets + * + * After this call _both_ ffhp and tfhp need an fh_put. + * + * Returns a generic NFS status code in network byte-order. */ __be32 nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, @@ -1805,6 +1828,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, { struct dentry *fdentry, *tdentry, *odentry, *ndentry, *trap; struct inode *fdir, *tdir; + int type = S_IFDIR; __be32 err; int host_err; bool close_cached = false; @@ -1862,11 +1886,14 @@ retry: host_err = -EINVAL; if (odentry == trap) goto out_dput_old; + type = d_inode(odentry)->i_mode & S_IFMT; ndentry = lookup_one_len(tname, tdentry, tlen); host_err = PTR_ERR(ndentry); if (IS_ERR(ndentry)) goto out_dput_old; + if (d_inode(ndentry)) + type = d_inode(ndentry)->i_mode & S_IFMT; host_err = -ENOTEMPTY; if (ndentry == trap) goto out_dput_new; @@ -1904,7 +1931,18 @@ retry: out_dput_old: dput(odentry); out_nfserr: - err = nfserrno(host_err); + if (host_err == -EBUSY) { + /* + * See RFC 8881 Section 18.26.4 para 1-3: NFSv4 RENAME + * wants a status unique to the object type. + */ + if (type != S_IFDIR) + err = nfserr_file_open; + else + err = nfserr_acces; + } else { + err = nfserrno(host_err); + } if (!close_cached) { fh_fill_post_attrs(ffhp); @@ -1931,9 +1969,17 @@ out: return err; } -/* - * Unlink a file or directory - * N.B. After this call fhp needs an fh_put +/** + * nfsd_unlink - remove a directory entry + * @rqstp: RPC transaction context + * @fhp: the file handle of the parent directory to be modified + * @type: enforced file type of the object to be removed + * @fname: the name of directory entry to be removed + * @flen: length of @fname in octets + * + * After this call fhp needs an fh_put. + * + * Returns a generic NFS status code in network byte-order. */ __be32 nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, @@ -2007,15 +2053,17 @@ out_drop_write: fh_drop_write(fhp); out_nfserr: if (host_err == -EBUSY) { - /* name is mounted-on. There is no perfect - * error status. + /* + * See RFC 8881 Section 18.25.4 para 4: NFSv4 REMOVE + * wants a status unique to the object type. */ - err = nfserr_file_open; - } else { - err = nfserrno(host_err); + if (type != S_IFDIR) + err = nfserr_file_open; + else + err = nfserr_acces; } out: - return err; + return err != nfs_ok ? err : nfserrno(host_err); out_unlock: inode_unlock(dirp); goto out_drop_write; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 382cc1389396..c26ba86dbdfd 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -576,9 +576,7 @@ struct nfsd4_sequence { u32 slotid; /* request/response */ u32 maxslots; /* request/response */ u32 cachethis; /* request */ -#if 0 u32 target_maxslots; /* response */ -#endif /* not yet */ u32 status_flags; /* response */ }; diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h index e8b00309c449..f1a315cd31b7 100644 --- a/fs/nfsd/xdr4cb.h +++ b/fs/nfsd/xdr4cb.h @@ -59,16 +59,20 @@ * 1: CB_GETATTR opcode (32-bit) * N: file_handle * 1: number of entry in attribute array (32-bit) - * 1: entry 0 in attribute array (32-bit) + * 3: entry 0-2 in attribute array (32-bit * 3) */ #define NFS4_enc_cb_getattr_sz (cb_compound_enc_hdr_sz + \ cb_sequence_enc_sz + \ - 1 + enc_nfs4_fh_sz + 1 + 1) + 1 + enc_nfs4_fh_sz + 1 + 3) /* * 4: fattr_bitmap_maxsz * 1: attribute array len * 2: change attr (64-bit) * 2: size (64-bit) + * 2: atime.seconds (64-bit) + * 1: atime.nanoseconds (32-bit) + * 2: mtime.seconds (64-bit) + * 1: mtime.nanoseconds (32-bit) */ #define NFS4_dec_cb_getattr_sz (cb_compound_dec_hdr_sz + \ - cb_sequence_dec_sz + 4 + 1 + 2 + 2 + op_dec_sz) + cb_sequence_dec_sz + 4 + 1 + 2 + 2 + 2 + 1 + 2 + 1 + op_dec_sz) |