summaryrefslogtreecommitdiff
path: root/fs/nfsd
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfsd')
-rw-r--r--fs/nfsd/Kconfig15
-rw-r--r--fs/nfsd/Makefile17
-rw-r--r--fs/nfsd/auth.c3
-rw-r--r--fs/nfsd/debugfs.c47
-rw-r--r--fs/nfsd/export.c48
-rw-r--r--fs/nfsd/export.h3
-rw-r--r--fs/nfsd/filecache.c203
-rw-r--r--fs/nfsd/filecache.h10
-rw-r--r--fs/nfsd/localio.c71
-rw-r--r--fs/nfsd/lockd.c13
-rw-r--r--fs/nfsd/netns.h18
-rw-r--r--fs/nfsd/nfs2acl.c2
-rw-r--r--fs/nfsd/nfs3acl.c2
-rw-r--r--fs/nfsd/nfs3proc.c72
-rw-r--r--fs/nfsd/nfs3xdr.c4
-rw-r--r--fs/nfsd/nfs4acl.c2
-rw-r--r--fs/nfsd/nfs4callback.c465
-rw-r--r--fs/nfsd/nfs4layouts.c7
-rw-r--r--fs/nfsd/nfs4proc.c194
-rw-r--r--fs/nfsd/nfs4recover.c88
-rw-r--r--fs/nfsd/nfs4state.c797
-rw-r--r--fs/nfsd/nfs4xdr.c452
-rw-r--r--fs/nfsd/nfs4xdr_gen.c256
-rw-r--r--fs/nfsd/nfs4xdr_gen.h25
-rw-r--r--fs/nfsd/nfsctl.c131
-rw-r--r--fs/nfsd/nfsd.h47
-rw-r--r--fs/nfsd/nfsfh.c45
-rw-r--r--fs/nfsd/nfsfh.h10
-rw-r--r--fs/nfsd/nfsproc.c53
-rw-r--r--fs/nfsd/nfssvc.c85
-rw-r--r--fs/nfsd/nfsxdr.c4
-rw-r--r--fs/nfsd/state.h117
-rw-r--r--fs/nfsd/stats.c4
-rw-r--r--fs/nfsd/stats.h2
-rw-r--r--fs/nfsd/trace.h356
-rw-r--r--fs/nfsd/vfs.c286
-rw-r--r--fs/nfsd/vfs.h16
-rw-r--r--fs/nfsd/xdr4.h14
-rw-r--r--fs/nfsd/xdr4cb.h15
39 files changed, 2780 insertions, 1219 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index c0bd1509ccd4..879e0b104d1c 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -4,6 +4,7 @@ config NFSD
depends on INET
depends on FILE_LOCKING
depends on FSNOTIFY
+ select CRC32
select LOCKD
select SUNRPC
select EXPORTFS
@@ -76,8 +77,8 @@ config NFSD_V4
select FS_POSIX_ACL
select RPCSEC_GSS_KRB5
select CRYPTO
+ select CRYPTO_LIB_SHA256
select CRYPTO_MD5
- select CRYPTO_SHA256
select GRACE_PERIOD
select NFS_V4_2_SSC_HELPER if NFS_V4_2
help
@@ -172,6 +173,16 @@ config NFSD_LEGACY_CLIENT_TRACKING
recoverydir, or spawn a process directly using a usermodehelper
upcall.
- These legacy client tracking methods have proven to be probelmatic
+ These legacy client tracking methods have proven to be problematic
and will be removed in the future. Say Y here if you need support
for them in the interim.
+
+config NFSD_V4_DELEG_TIMESTAMPS
+ bool "Support delegated timestamps"
+ depends on NFSD_V4
+ default n
+ help
+ NFSD implements delegated timestamps according to
+ draft-ietf-nfsv4-delstid-08 "Extending the Opening of Files". This
+ is currently an experimental feature and is therefore left disabled
+ by default.
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 18cbd3fa7691..55744bb786c9 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -18,9 +18,24 @@ nfsd-$(CONFIG_NFSD_V2) += nfsproc.o nfsxdr.o
nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
- nfs4acl.o nfs4callback.o nfs4recover.o
+ nfs4acl.o nfs4callback.o nfs4recover.o nfs4xdr_gen.o
nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o
nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o
nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o
nfsd-$(CONFIG_NFSD_FLEXFILELAYOUT) += flexfilelayout.o flexfilelayoutxdr.o
nfsd-$(CONFIG_NFS_LOCALIO) += localio.o
+nfsd-$(CONFIG_DEBUG_FS) += debugfs.o
+
+
+.PHONY: xdrgen
+
+xdrgen: ../../include/linux/sunrpc/xdrgen/nfs4_1.h nfs4xdr_gen.h nfs4xdr_gen.c
+
+../../include/linux/sunrpc/xdrgen/nfs4_1.h: ../../Documentation/sunrpc/xdr/nfs4_1.x
+ ../../tools/net/sunrpc/xdrgen/xdrgen definitions $< > $@
+
+nfs4xdr_gen.h: ../../Documentation/sunrpc/xdr/nfs4_1.x
+ ../../tools/net/sunrpc/xdrgen/xdrgen declarations $< > $@
+
+nfs4xdr_gen.c: ../../Documentation/sunrpc/xdr/nfs4_1.x
+ ../../tools/net/sunrpc/xdrgen/xdrgen source $< > $@
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 93e33d1ee891..4dc327e02456 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -27,7 +27,7 @@ int nfsd_setuser(struct svc_cred *cred, struct svc_export *exp)
int flags = nfsexp_flags(cred, exp);
/* discard any old override before preparing the new set */
- revert_creds(get_cred(current_real_cred()));
+ put_cred(revert_creds(get_cred(current_real_cred())));
new = prepare_creds();
if (!new)
return -ENOMEM;
@@ -80,7 +80,6 @@ int nfsd_setuser(struct svc_cred *cred, struct svc_export *exp)
new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
new->cap_permitted);
put_cred(override_creds(new));
- put_cred(new);
return 0;
oom:
diff --git a/fs/nfsd/debugfs.c b/fs/nfsd/debugfs.c
new file mode 100644
index 000000000000..84b0c8b559dc
--- /dev/null
+++ b/fs/nfsd/debugfs.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/debugfs.h>
+
+#include "nfsd.h"
+
+static struct dentry *nfsd_top_dir __read_mostly;
+
+/*
+ * /sys/kernel/debug/nfsd/disable-splice-read
+ *
+ * Contents:
+ * %0: NFS READ is allowed to use page splicing
+ * %1: NFS READ uses only iov iter read
+ *
+ * The default value of this setting is zero (page splicing is
+ * allowed). This setting takes immediate effect for all NFS
+ * versions, all exports, and in all NFSD net namespaces.
+ */
+
+static int nfsd_dsr_get(void *data, u64 *val)
+{
+ *val = nfsd_disable_splice_read ? 1 : 0;
+ return 0;
+}
+
+static int nfsd_dsr_set(void *data, u64 val)
+{
+ nfsd_disable_splice_read = (val > 0) ? true : false;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(nfsd_dsr_fops, nfsd_dsr_get, nfsd_dsr_set, "%llu\n");
+
+void nfsd_debugfs_exit(void)
+{
+ debugfs_remove_recursive(nfsd_top_dir);
+ nfsd_top_dir = NULL;
+}
+
+void nfsd_debugfs_init(void)
+{
+ nfsd_top_dir = debugfs_create_dir("nfsd", NULL);
+
+ debugfs_create_file("disable-splice-read", S_IWUSR | S_IRUGO,
+ nfsd_top_dir, NULL, &nfsd_dsr_fops);
+}
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c82d8e3e0d4f..88ae410b4113 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -355,16 +355,25 @@ static void export_stats_destroy(struct export_stats *stats)
EXP_STATS_COUNTERS_NUM);
}
-static void svc_export_put(struct kref *ref)
+static void svc_export_release(struct rcu_head *rcu_head)
{
- struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
- path_put(&exp->ex_path);
- auth_domain_put(exp->ex_client);
+ struct svc_export *exp = container_of(rcu_head, struct svc_export,
+ ex_rcu);
+
nfsd4_fslocs_free(&exp->ex_fslocs);
export_stats_destroy(exp->ex_stats);
kfree(exp->ex_stats);
kfree(exp->ex_uuid);
- kfree_rcu(exp, ex_rcu);
+ kfree(exp);
+}
+
+static void svc_export_put(struct kref *ref)
+{
+ struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
+
+ path_put(&exp->ex_path);
+ auth_domain_put(exp->ex_client);
+ call_rcu(&exp->ex_rcu, svc_export_release);
}
static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
@@ -1078,12 +1087,14 @@ static struct svc_export *exp_find(struct cache_detail *cd,
* check_nfsd_access - check if access to export is allowed.
* @exp: svc_export that is being accessed.
* @rqstp: svc_rqst attempting to access @exp (will be NULL for LOCALIO).
+ * @may_bypass_gss: reduce strictness of authorization check
*
* Return values:
* %nfs_ok if access is granted, or
* %nfserr_wrongsec if access is denied
*/
-__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
+__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp,
+ bool may_bypass_gss)
{
struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors;
struct svc_xprt *xprt;
@@ -1113,7 +1124,8 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
goto ok;
}
- goto denied;
+ if (!may_bypass_gss)
+ goto denied;
ok:
/* legacy gss-only clients are always OK: */
@@ -1140,6 +1152,23 @@ ok:
if (nfsd4_spo_must_allow(rqstp))
return nfs_ok;
+ /* Some calls may be processed without authentication
+ * on GSS exports. For example NFS2/3 calls on root
+ * directory, see section 2.3.2 of rfc 2623.
+ * For "may_bypass_gss" check that export has really
+ * enabled some flavor with authentication (GSS or any
+ * other) and also check that the used auth flavor is
+ * without authentication (none or sys).
+ */
+ if (may_bypass_gss && (
+ rqstp->rq_cred.cr_flavor == RPC_AUTH_NULL ||
+ rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX)) {
+ for (f = exp->ex_flavors; f < end; f++) {
+ if (f->pseudoflavor >= RPC_AUTH_DES)
+ return 0;
+ }
+ }
+
denied:
return nfserr_wrongsec;
}
@@ -1406,10 +1435,9 @@ static int e_show(struct seq_file *m, void *p)
return 0;
}
- exp_get(exp);
- if (cache_check(cd, &exp->h, NULL))
+ if (cache_check_rcu(cd, &exp->h, NULL))
return 0;
- exp_put(exp);
+
return svc_export_show(m, cd, cp);
}
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index 3794ae253a70..4d92b99c1ffd 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -101,7 +101,8 @@ struct svc_expkey {
struct svc_cred;
int nfsexp_flags(struct svc_cred *cred, struct svc_export *exp);
-__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp);
+__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp,
+ bool may_bypass_gss);
/*
* Function declarations
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 2e6783f63712..e108b6c705b4 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -39,6 +39,7 @@
#include <linux/fsnotify.h>
#include <linux/seq_file.h>
#include <linux/rhashtable.h>
+#include <linux/nfslocalio.h>
#include "vfs.h"
#include "nfsd.h"
@@ -318,15 +319,14 @@ nfsd_file_check_writeback(struct nfsd_file *nf)
mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
}
-
-static bool nfsd_file_lru_add(struct nfsd_file *nf)
+static void nfsd_file_lru_add(struct nfsd_file *nf)
{
- set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
- if (list_lru_add_obj(&nfsd_file_lru, &nf->nf_lru)) {
+ refcount_inc(&nf->nf_ref);
+ if (list_lru_add_obj(&nfsd_file_lru, &nf->nf_lru))
trace_nfsd_file_lru_add(nf);
- return true;
- }
- return false;
+ else
+ WARN_ON(1);
+ nfsd_file_schedule_laundrette();
}
static bool nfsd_file_lru_remove(struct nfsd_file *nf)
@@ -362,48 +362,54 @@ nfsd_file_put(struct nfsd_file *nf)
if (test_bit(NFSD_FILE_GC, &nf->nf_flags) &&
test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
- /*
- * If this is the last reference (nf_ref == 1), then try to
- * transfer it to the LRU.
- */
- if (refcount_dec_not_one(&nf->nf_ref))
- return;
-
- /* Try to add it to the LRU. If that fails, decrement. */
- if (nfsd_file_lru_add(nf)) {
- /* If it's still hashed, we're done */
- if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
- nfsd_file_schedule_laundrette();
- return;
- }
-
- /*
- * We're racing with unhashing, so try to remove it from
- * the LRU. If removal fails, then someone else already
- * has our reference.
- */
- if (!nfsd_file_lru_remove(nf))
- return;
- }
+ set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+ set_bit(NFSD_FILE_RECENT, &nf->nf_flags);
}
+
if (refcount_dec_and_test(&nf->nf_ref))
nfsd_file_free(nf);
}
/**
- * nfsd_file_put_local - put the reference to nfsd_file and local nfsd_serv
- * @nf: nfsd_file of which to put the references
+ * nfsd_file_put_local - put nfsd_file reference and arm nfsd_net_put in caller
+ * @nf: nfsd_file of which to put the reference
*
- * First put the reference of the nfsd_file and then put the
- * reference to the associated nn->nfsd_serv.
+ * First save the associated net to return to caller, then put
+ * the reference of the nfsd_file.
*/
-void
-nfsd_file_put_local(struct nfsd_file *nf) __must_hold(rcu)
+struct net *
+nfsd_file_put_local(struct nfsd_file __rcu **pnf)
+{
+ struct nfsd_file *nf;
+ struct net *net = NULL;
+
+ nf = unrcu_pointer(xchg(pnf, NULL));
+ if (nf) {
+ net = nf->nf_net;
+ nfsd_file_put(nf);
+ }
+ return net;
+}
+
+/**
+ * nfsd_file_get_local - get nfsd_file reference and reference to net
+ * @nf: nfsd_file of which to put the reference
+ *
+ * Get reference to both the nfsd_file and nf->nf_net.
+ */
+struct nfsd_file *
+nfsd_file_get_local(struct nfsd_file *nf)
{
struct net *net = nf->nf_net;
- nfsd_file_put(nf);
- nfsd_serv_put(net);
+ if (nfsd_net_try_get(net)) {
+ nf = nfsd_file_get(nf);
+ if (!nf)
+ nfsd_net_put(net);
+ } else {
+ nf = NULL;
+ }
+ return nf;
}
/**
@@ -445,11 +451,20 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose)
struct nfsd_file, nf_gc);
struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id);
struct nfsd_fcache_disposal *l = nn->fcache_disposal;
+ struct svc_serv *serv;
spin_lock(&l->lock);
list_move_tail(&nf->nf_gc, &l->freeme);
spin_unlock(&l->lock);
- svc_wake_up(nn->nfsd_serv);
+
+ /*
+ * The filecache laundrette is shut down after the
+ * nn->nfsd_serv pointer is cleared, but before the
+ * svc_serv is freed.
+ */
+ serv = nn->nfsd_serv;
+ if (serv)
+ svc_wake_up(serv);
}
}
@@ -487,7 +502,6 @@ void nfsd_file_net_dispose(struct nfsd_net *nn)
* nfsd_file_lru_cb - Examine an entry on the LRU list
* @item: LRU entry to examine
* @lru: controlling LRU
- * @lock: LRU list lock (unused)
* @arg: dispose list
*
* Return values:
@@ -497,9 +511,7 @@ void nfsd_file_net_dispose(struct nfsd_net *nn)
*/
static enum lru_status
nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
- spinlock_t *lock, void *arg)
- __releases(lock)
- __acquires(lock)
+ void *arg)
{
struct list_head *head = arg;
struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
@@ -523,13 +535,12 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
}
/*
- * Put the reference held on behalf of the LRU. If it wasn't the last
- * one, then just remove it from the LRU and ignore it.
+ * Put the reference held on behalf of the LRU if it is the last
+ * reference, else rotate.
*/
- if (!refcount_dec_and_test(&nf->nf_ref)) {
+ if (!refcount_dec_if_one(&nf->nf_ref)) {
trace_nfsd_file_gc_in_use(nf);
- list_lru_isolate(lru, &nf->nf_lru);
- return LRU_REMOVED;
+ return LRU_ROTATE;
}
/* Refcount went to zero. Unhash it and queue it to the dispose list */
@@ -541,14 +552,54 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
return LRU_REMOVED;
}
+static enum lru_status
+nfsd_file_gc_cb(struct list_head *item, struct list_lru_one *lru,
+ void *arg)
+{
+ struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
+
+ if (test_and_clear_bit(NFSD_FILE_RECENT, &nf->nf_flags)) {
+ /*
+ * "REFERENCED" really means "should be at the end of the
+ * LRU. As we are putting it there we can clear the flag.
+ */
+ clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+ trace_nfsd_file_gc_aged(nf);
+ return LRU_ROTATE;
+ }
+ return nfsd_file_lru_cb(item, lru, arg);
+}
+
+/* If the shrinker runs between calls to list_lru_walk_node() in
+ * nfsd_file_gc(), the "remaining" count will be wrong. This could
+ * result in premature freeing of some files. This may not matter much
+ * but is easy to fix with this spinlock which temporarily disables
+ * the shrinker.
+ */
+static DEFINE_SPINLOCK(nfsd_gc_lock);
static void
nfsd_file_gc(void)
{
+ unsigned long ret = 0;
LIST_HEAD(dispose);
- unsigned long ret;
+ int nid;
+
+ spin_lock(&nfsd_gc_lock);
+ for_each_node_state(nid, N_NORMAL_MEMORY) {
+ unsigned long remaining = list_lru_count_node(&nfsd_file_lru, nid);
- ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb,
- &dispose, list_lru_count(&nfsd_file_lru));
+ while (remaining > 0) {
+ unsigned long nr = min(remaining, NFSD_FILE_GC_BATCH);
+
+ remaining -= nr;
+ ret += list_lru_walk_node(&nfsd_file_lru, nid, nfsd_file_gc_cb,
+ &dispose, &nr);
+ if (nr)
+ /* walk aborted early */
+ remaining = 0;
+ }
+ }
+ spin_unlock(&nfsd_gc_lock);
trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru));
nfsd_file_dispose_list_delayed(&dispose);
}
@@ -556,9 +607,9 @@ nfsd_file_gc(void)
static void
nfsd_file_gc_worker(struct work_struct *work)
{
- nfsd_file_gc();
if (list_lru_count(&nfsd_file_lru))
- nfsd_file_schedule_laundrette();
+ nfsd_file_gc();
+ nfsd_file_schedule_laundrette();
}
static unsigned long
@@ -573,8 +624,12 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
LIST_HEAD(dispose);
unsigned long ret;
+ if (!spin_trylock(&nfsd_gc_lock))
+ return SHRINK_STOP;
+
ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
nfsd_file_lru_cb, &dispose);
+ spin_unlock(&nfsd_gc_lock);
trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru));
nfsd_file_dispose_list_delayed(&dispose);
return ret;
@@ -679,17 +734,12 @@ nfsd_file_close_inode(struct inode *inode)
void
nfsd_file_close_inode_sync(struct inode *inode)
{
- struct nfsd_file *nf;
LIST_HEAD(dispose);
trace_nfsd_file_close(inode);
nfsd_file_queue_for_close(inode, &dispose);
- while (!list_empty(&dispose)) {
- nf = list_first_entry(&dispose, struct nfsd_file, nf_gc);
- list_del_init(&nf->nf_gc);
- nfsd_file_free(nf);
- }
+ nfsd_file_dispose_list(&dispose);
}
static int
@@ -836,6 +886,14 @@ __nfsd_file_cache_purge(struct net *net)
struct nfsd_file *nf;
LIST_HEAD(dispose);
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ if (net) {
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ nfs_localio_invalidate_clients(&nn->local_clients,
+ &nn->local_clients_lock);
+ }
+#endif
+
rhltable_walk_enter(&nfsd_file_rhltable, &iter);
do {
rhashtable_walk_start(&iter);
@@ -1043,16 +1101,8 @@ retry:
nf = nfsd_file_lookup_locked(net, current_cred(), inode, need, want_gc);
rcu_read_unlock();
- if (nf) {
- /*
- * If the nf is on the LRU then it holds an extra reference
- * that must be put if it's removed. It had better not be
- * the last one however, since we should hold another.
- */
- if (nfsd_file_lru_remove(nf))
- WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref));
+ if (nf)
goto wait_for_construction;
- }
new = nfsd_file_alloc(net, inode, need, want_gc);
if (!new) {
@@ -1123,8 +1173,7 @@ open_file:
status = nfs_ok;
trace_nfsd_file_opened(nf, status);
} else {
- ret = nfsd_open_verified(rqstp, fhp, may_flags,
- &nf->nf_file);
+ ret = nfsd_open_verified(fhp, may_flags, &nf->nf_file);
if (ret == -EOPENSTALE && stale_retry) {
stale_retry = false;
nfsd_file_unhash(nf);
@@ -1147,6 +1196,9 @@ open_file:
*/
if (status != nfs_ok || inode->i_nlink == 0)
nfsd_file_unhash(nf);
+ else if (want_gc)
+ nfsd_file_lru_add(nf);
+
clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags);
if (status == nfs_ok)
goto out;
@@ -1226,10 +1278,9 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
* a file. The security implications of this should be carefully
* considered before use.
*
- * The nfsd_file object returned by this API is reference-counted
- * and garbage-collected. The object is retained for a few
- * seconds after the final nfsd_file_put() in case the caller
- * wants to re-use it.
+ * The nfsd_file_object returned by this API is reference-counted
+ * but not garbage-collected. The object is unhashed after the
+ * final nfsd_file_put().
*
* Return values:
* %nfs_ok - @pnf points to an nfsd_file with its reference
@@ -1251,8 +1302,8 @@ nfsd_file_acquire_local(struct net *net, struct svc_cred *cred,
__be32 beres;
beres = nfsd_file_do_acquire(NULL, net, cred, client,
- fhp, may_flags, NULL, pnf, true);
- revert_creds(save_cred);
+ fhp, may_flags, NULL, pnf, false);
+ put_cred(revert_creds(save_cred));
return beres;
}
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index cadf3c2689c4..722b26c71e45 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -4,6 +4,12 @@
#include <linux/fsnotify_backend.h>
/*
+ * Limit the time that the list_lru_one lock is held during
+ * an LRU scan.
+ */
+#define NFSD_FILE_GC_BATCH (16UL)
+
+/*
* This is the fsnotify_mark container that nfsd attaches to the files that it
* is holding open. Note that we have a separate refcount here aside from the
* one in the fsnotify_mark. We only want a single fsnotify_mark attached to
@@ -38,6 +44,7 @@ struct nfsd_file {
#define NFSD_FILE_PENDING (1)
#define NFSD_FILE_REFERENCED (2)
#define NFSD_FILE_GC (3)
+#define NFSD_FILE_RECENT (4)
unsigned long nf_flags;
refcount_t nf_ref;
unsigned char nf_may;
@@ -55,7 +62,8 @@ void nfsd_file_cache_shutdown(void);
int nfsd_file_cache_start_net(struct net *net);
void nfsd_file_cache_shutdown_net(struct net *net);
void nfsd_file_put(struct nfsd_file *nf);
-void nfsd_file_put_local(struct nfsd_file *nf);
+struct net *nfsd_file_put_local(struct nfsd_file __rcu **nf);
+struct nfsd_file *nfsd_file_get_local(struct nfsd_file *nf);
struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
struct file *nfsd_file_file(struct nfsd_file *nf);
void nfsd_file_close_inode_sync(struct inode *inode);
diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c
index f441cb9f74d5..80d9ff6608a7 100644
--- a/fs/nfsd/localio.c
+++ b/fs/nfsd/localio.c
@@ -24,19 +24,6 @@
#include "filecache.h"
#include "cache.h"
-static const struct nfsd_localio_operations nfsd_localio_ops = {
- .nfsd_serv_try_get = nfsd_serv_try_get,
- .nfsd_serv_put = nfsd_serv_put,
- .nfsd_open_local_fh = nfsd_open_local_fh,
- .nfsd_file_put_local = nfsd_file_put_local,
- .nfsd_file_file = nfsd_file_file,
-};
-
-void nfsd_localio_ops_init(void)
-{
- nfs_to = &nfsd_localio_ops;
-}
-
/**
* nfsd_open_local_fh - lookup a local filehandle @nfs_fh and map to nfsd_file
*
@@ -45,6 +32,7 @@ void nfsd_localio_ops_init(void)
* @rpc_clnt: rpc_clnt that the client established
* @cred: cred that the client established
* @nfs_fh: filehandle to lookup
+ * @nfp: place to find the nfsd_file, or store it if it was non-NULL
* @fmode: fmode_t to use for open
*
* This function maps a local fh to a path on a local filesystem.
@@ -52,13 +40,14 @@ void nfsd_localio_ops_init(void)
* avoid all the NFS overhead with reads, writes and commits.
*
* On successful return, returned nfsd_file will have its nf_net member
- * set. Caller (NFS client) is responsible for calling nfsd_serv_put and
+ * set. Caller (NFS client) is responsible for calling nfsd_net_put and
* nfsd_file_put (via nfs_to_nfsd_file_put_local).
*/
-struct nfsd_file *
+static struct nfsd_file *
nfsd_open_local_fh(struct net *net, struct auth_domain *dom,
struct rpc_clnt *rpc_clnt, const struct cred *cred,
- const struct nfs_fh *nfs_fh, const fmode_t fmode)
+ const struct nfs_fh *nfs_fh, struct nfsd_file __rcu **pnf,
+ const fmode_t fmode)
{
int mayflags = NFSD_MAY_LOCALIO;
struct svc_cred rq_cred;
@@ -69,6 +58,15 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom,
if (nfs_fh->size > NFS4_FHSIZE)
return ERR_PTR(-EINVAL);
+ if (!nfsd_net_try_get(net))
+ return ERR_PTR(-ENXIO);
+
+ rcu_read_lock();
+ localio = nfsd_file_get(rcu_dereference(*pnf));
+ rcu_read_unlock();
+ if (localio)
+ return localio;
+
/* nfs_fh -> svc_fh */
fh_init(&fh, NFS4_FHSIZE);
fh.fh_handle.fh_size = nfs_fh->size;
@@ -90,9 +88,47 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom,
if (rq_cred.cr_group_info)
put_group_info(rq_cred.cr_group_info);
+ if (!IS_ERR(localio)) {
+ struct nfsd_file *new;
+ if (!nfsd_net_try_get(net)) {
+ nfsd_file_put(localio);
+ nfsd_net_put(net);
+ return ERR_PTR(-ENXIO);
+ }
+ nfsd_file_get(localio);
+ again:
+ new = unrcu_pointer(cmpxchg(pnf, NULL, RCU_INITIALIZER(localio)));
+ if (new) {
+ /* Some other thread installed an nfsd_file */
+ if (nfsd_file_get(new) == NULL)
+ goto again;
+ /*
+ * Drop the ref we were going to install and the
+ * one we were going to return.
+ */
+ nfsd_file_put(localio);
+ nfsd_file_put(localio);
+ localio = new;
+ }
+ } else
+ nfsd_net_put(net);
+
return localio;
}
-EXPORT_SYMBOL_GPL(nfsd_open_local_fh);
+
+static const struct nfsd_localio_operations nfsd_localio_ops = {
+ .nfsd_net_try_get = nfsd_net_try_get,
+ .nfsd_net_put = nfsd_net_put,
+ .nfsd_open_local_fh = nfsd_open_local_fh,
+ .nfsd_file_put_local = nfsd_file_put_local,
+ .nfsd_file_get_local = nfsd_file_get_local,
+ .nfsd_file_file = nfsd_file_file,
+};
+
+void nfsd_localio_ops_init(void)
+{
+ nfs_to = &nfsd_localio_ops;
+}
/*
* UUID_IS_LOCAL XDR functions
@@ -114,6 +150,7 @@ static __be32 localio_proc_uuid_is_local(struct svc_rqst *rqstp)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
nfs_uuid_is_local(&argp->uuid, &nn->local_clients,
+ &nn->local_clients_lock,
net, rqstp->rq_client, THIS_MODULE);
return rpc_success;
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 46a7f9b813e5..edc9f75dc75c 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -38,11 +38,20 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
memcpy(&fh.fh_handle.fh_raw, f->data, f->size);
fh.fh_export = NULL;
+ /*
+ * Allow BYPASS_GSS as some client implementations use AUTH_SYS
+ * for NLM even when GSS is used for NFS.
+ * Allow OWNER_OVERRIDE as permission might have been changed
+ * after the file was opened.
+ * Pass MAY_NLM so that authentication can be completely bypassed
+ * if NFSEXP_NOAUTHNLM is set. Some older clients use AUTH_NULL
+ * for NLM requests.
+ */
access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ;
- access |= NFSD_MAY_LOCK;
+ access |= NFSD_MAY_NLM | NFSD_MAY_OWNER_OVERRIDE | NFSD_MAY_BYPASS_GSS;
nfserr = nfsd_open(rqstp, &fh, S_IFREG, access, filp);
fh_put(&fh);
- /* We return nlm error codes as nlm doesn't know
+ /* We return nlm error codes as nlm doesn't know
* about nfsd, but nfsd does know about nlm..
*/
switch (nfserr) {
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 26f7b34d1a03..3e2d0fde80a7 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -128,21 +128,16 @@ struct nfsd_net {
seqlock_t writeverf_lock;
unsigned char writeverf[8];
- /*
- * Max number of connections this nfsd container will allow. Defaults
- * to '0' which is means that it bases this on the number of threads.
- */
- unsigned int max_connections;
-
u32 clientid_base;
u32 clientid_counter;
u32 clverifier_counter;
struct svc_info nfsd_info;
#define nfsd_serv nfsd_info.serv
- struct percpu_ref nfsd_serv_ref;
- struct completion nfsd_serv_confirm_done;
- struct completion nfsd_serv_free_done;
+
+ struct percpu_ref nfsd_net_ref;
+ struct completion nfsd_net_confirm_done;
+ struct completion nfsd_net_free_done;
/*
* clientid and stateid data for construction of net unique COPY
@@ -219,6 +214,7 @@ struct nfsd_net {
#if IS_ENABLED(CONFIG_NFS_LOCALIO)
/* Local clients to be invalidated when net is shut down */
+ spinlock_t local_clients_lock;
struct list_head local_clients;
#endif
};
@@ -229,8 +225,8 @@ struct nfsd_net {
extern bool nfsd_support_version(int vers);
extern unsigned int nfsd_net_id;
-bool nfsd_serv_try_get(struct net *net);
-void nfsd_serv_put(struct net *net);
+bool nfsd_net_try_get(struct net *net);
+void nfsd_net_put(struct net *net);
void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn);
void nfsd_reset_write_verifier(struct nfsd_net *nn);
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 4e3be7201b1c..5fb202acb0fd 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -84,6 +84,8 @@ out:
fail:
posix_acl_release(resp->acl_access);
posix_acl_release(resp->acl_default);
+ resp->acl_access = NULL;
+ resp->acl_default = NULL;
goto out;
}
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 5e34e98db969..7b5433bd3019 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -76,6 +76,8 @@ out:
fail:
posix_acl_release(resp->acl_access);
posix_acl_release(resp->acl_default);
+ resp->acl_access = NULL;
+ resp->acl_default = NULL;
goto out;
}
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 372bdcf5e07a..a817d8485d21 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -14,6 +14,7 @@
#include "xdr3.h"
#include "vfs.h"
#include "filecache.h"
+#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
@@ -69,8 +70,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp)
struct nfsd_fhandle *argp = rqstp->rq_argp;
struct nfsd3_attrstat *resp = rqstp->rq_resp;
- dprintk("nfsd: GETATTR(3) %s\n",
- SVCFH_fmt(&argp->fh));
+ trace_nfsd_vfs_getattr(rqstp, &argp->fh);
fh_copy(&resp->fh, &argp->fh);
resp->status = fh_verify(rqstp, &resp->fh, 0,
@@ -220,7 +220,6 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
struct nfsd3_writeargs *argp = rqstp->rq_argp;
struct nfsd3_writeres *resp = rqstp->rq_resp;
unsigned long cnt = argp->len;
- unsigned int nvecs;
dprintk("nfsd: WRITE(3) %s %d bytes at %Lu%s\n",
SVCFH_fmt(&argp->fh),
@@ -235,10 +234,8 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
fh_copy(&resp->fh, &argp->fh);
resp->committed = argp->stable;
- nvecs = svc_fill_write_vector(rqstp, &argp->payload);
-
resp->status = nfsd_write(rqstp, &resp->fh, argp->offset,
- rqstp->rq_vec, nvecs, &cnt,
+ &argp->payload, &cnt,
resp->committed, resp->verf);
resp->count = cnt;
resp->status = nfsd3_map_status(resp->status);
@@ -266,6 +263,8 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
__be32 status;
int host_err;
+ trace_nfsd_vfs_create(rqstp, fhp, S_IFREG, argp->name, argp->len);
+
if (isdotent(argp->name, argp->len))
return nfserr_exist;
if (!(iap->ia_valid & ATTR_MODE))
@@ -284,7 +283,9 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
inode_lock_nested(inode, I_MUTEX_PARENT);
- child = lookup_one_len(argp->name, parent, argp->len);
+ child = lookup_one(&nop_mnt_idmap,
+ &QSTR_LEN(argp->name, argp->len),
+ parent);
if (IS_ERR(child)) {
status = nfserrno(PTR_ERR(child));
goto out;
@@ -380,11 +381,6 @@ nfsd3_proc_create(struct svc_rqst *rqstp)
struct nfsd3_diropres *resp = rqstp->rq_resp;
svc_fh *dirfhp, *newfhp;
- dprintk("nfsd: CREATE(3) %s %.*s\n",
- SVCFH_fmt(&argp->fh),
- argp->len,
- argp->name);
-
dirfhp = fh_copy(&resp->dirfh, &argp->fh);
newfhp = fh_init(&resp->fh, NFS3_FHSIZE);
@@ -405,11 +401,6 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp)
.na_iattr = &argp->attrs,
};
- dprintk("nfsd: MKDIR(3) %s %.*s\n",
- SVCFH_fmt(&argp->fh),
- argp->len,
- argp->name);
-
argp->attrs.ia_valid &= ~ATTR_SIZE;
fh_copy(&resp->dirfh, &argp->fh);
fh_init(&resp->fh, NFS3_FHSIZE);
@@ -445,11 +436,6 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp)
goto out;
}
- dprintk("nfsd: SYMLINK(3) %s %.*s -> %.*s\n",
- SVCFH_fmt(&argp->ffh),
- argp->flen, argp->fname,
- argp->tlen, argp->tname);
-
fh_copy(&resp->dirfh, &argp->ffh);
fh_init(&resp->fh, NFS3_FHSIZE);
resp->status = nfsd_symlink(rqstp, &resp->dirfh, argp->fname,
@@ -474,11 +460,6 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp)
int type;
dev_t rdev = 0;
- dprintk("nfsd: MKNOD(3) %s %.*s\n",
- SVCFH_fmt(&argp->fh),
- argp->len,
- argp->name);
-
fh_copy(&resp->dirfh, &argp->fh);
fh_init(&resp->fh, NFS3_FHSIZE);
@@ -511,11 +492,6 @@ nfsd3_proc_remove(struct svc_rqst *rqstp)
struct nfsd3_diropargs *argp = rqstp->rq_argp;
struct nfsd3_attrstat *resp = rqstp->rq_resp;
- dprintk("nfsd: REMOVE(3) %s %.*s\n",
- SVCFH_fmt(&argp->fh),
- argp->len,
- argp->name);
-
/* Unlink. -S_IFDIR means file must not be a directory */
fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR,
@@ -533,11 +509,6 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp)
struct nfsd3_diropargs *argp = rqstp->rq_argp;
struct nfsd3_attrstat *resp = rqstp->rq_resp;
- dprintk("nfsd: RMDIR(3) %s %.*s\n",
- SVCFH_fmt(&argp->fh),
- argp->len,
- argp->name);
-
fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_unlink(rqstp, &resp->fh, S_IFDIR,
argp->name, argp->len);
@@ -551,15 +522,6 @@ nfsd3_proc_rename(struct svc_rqst *rqstp)
struct nfsd3_renameargs *argp = rqstp->rq_argp;
struct nfsd3_renameres *resp = rqstp->rq_resp;
- dprintk("nfsd: RENAME(3) %s %.*s ->\n",
- SVCFH_fmt(&argp->ffh),
- argp->flen,
- argp->fname);
- dprintk("nfsd: -> %s %.*s\n",
- SVCFH_fmt(&argp->tfh),
- argp->tlen,
- argp->tname);
-
fh_copy(&resp->ffh, &argp->ffh);
fh_copy(&resp->tfh, &argp->tfh);
resp->status = nfsd_rename(rqstp, &resp->ffh, argp->fname, argp->flen,
@@ -574,13 +536,6 @@ nfsd3_proc_link(struct svc_rqst *rqstp)
struct nfsd3_linkargs *argp = rqstp->rq_argp;
struct nfsd3_linkres *resp = rqstp->rq_resp;
- dprintk("nfsd: LINK(3) %s ->\n",
- SVCFH_fmt(&argp->ffh));
- dprintk("nfsd: -> %s %.*s\n",
- SVCFH_fmt(&argp->tfh),
- argp->tlen,
- argp->tname);
-
fh_copy(&resp->fh, &argp->ffh);
fh_copy(&resp->tfh, &argp->tfh);
resp->status = nfsd_link(rqstp, &resp->tfh, argp->tname, argp->tlen,
@@ -619,9 +574,7 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
struct nfsd3_readdirres *resp = rqstp->rq_resp;
loff_t offset;
- dprintk("nfsd: READDIR(3) %s %d bytes at %d\n",
- SVCFH_fmt(&argp->fh),
- argp->count, (u32) argp->cookie);
+ trace_nfsd_vfs_readdir(rqstp, &argp->fh, argp->count, argp->cookie);
nfsd3_init_dirlist_pages(rqstp, resp, argp->count);
@@ -653,9 +606,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp)
struct nfsd3_readdirres *resp = rqstp->rq_resp;
loff_t offset;
- dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n",
- SVCFH_fmt(&argp->fh),
- argp->count, (u32) argp->cookie);
+ trace_nfsd_vfs_readdir(rqstp, &argp->fh, argp->count, argp->cookie);
nfsd3_init_dirlist_pages(rqstp, resp, argp->count);
@@ -696,9 +647,6 @@ nfsd3_proc_fsstat(struct svc_rqst *rqstp)
struct nfsd_fhandle *argp = rqstp->rq_argp;
struct nfsd3_fsstatres *resp = rqstp->rq_resp;
- dprintk("nfsd: FSSTAT(3) %s\n",
- SVCFH_fmt(&argp->fh));
-
resp->status = nfsd_statfs(rqstp, &argp->fh, &resp->stats, 0);
fh_put(&argp->fh);
resp->status = nfsd3_map_status(resp->status);
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index a7a07470c1f8..ef4971d71ac4 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -1001,7 +1001,9 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
} else
dchild = dget(dparent);
} else
- dchild = lookup_positive_unlocked(name, dparent, namlen);
+ dchild = lookup_one_positive_unlocked(&nop_mnt_idmap,
+ &QSTR_LEN(name, namlen),
+ dparent);
if (IS_ERR(dchild))
return rv;
if (d_mountpoint(dchild))
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 96e786b5e544..936ea1ad9586 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -198,8 +198,6 @@ summarize_posix_acl(struct posix_acl *acl, struct posix_acl_summary *pas)
memset(pas, 0, sizeof(*pas));
pas->mask = 07;
- pe = acl->a_entries + acl->a_count;
-
FOREACH_ACL_ENTRY(pa, acl, pe) {
switch (pa->e_tag) {
case ACL_USER_OBJ:
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index b5b3ab9d719a..e00b2aea8da2 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -42,11 +42,10 @@
#include "trace.h"
#include "xdr4cb.h"
#include "xdr4.h"
+#include "nfs4xdr_gen.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
-static void nfsd4_mark_cb_fault(struct nfs4_client *clp);
-
#define NFSPROC4_CB_NULL 0
#define NFSPROC4_CB_COMPOUND 1
@@ -93,12 +92,35 @@ static int decode_cb_fattr4(struct xdr_stream *xdr, uint32_t *bitmap,
{
fattr->ncf_cb_change = 0;
fattr->ncf_cb_fsize = 0;
+ fattr->ncf_cb_atime.tv_sec = 0;
+ fattr->ncf_cb_atime.tv_nsec = 0;
+ fattr->ncf_cb_mtime.tv_sec = 0;
+ fattr->ncf_cb_mtime.tv_nsec = 0;
+
if (bitmap[0] & FATTR4_WORD0_CHANGE)
if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_change) < 0)
- return -NFSERR_BAD_XDR;
+ return -EIO;
if (bitmap[0] & FATTR4_WORD0_SIZE)
if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_fsize) < 0)
- return -NFSERR_BAD_XDR;
+ return -EIO;
+ if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) {
+ fattr4_time_deleg_access access;
+
+ if (!xdrgen_decode_fattr4_time_deleg_access(xdr, &access))
+ return -EIO;
+ fattr->ncf_cb_atime.tv_sec = access.seconds;
+ fattr->ncf_cb_atime.tv_nsec = access.nseconds;
+
+ }
+ if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) {
+ fattr4_time_deleg_modify modify;
+
+ if (!xdrgen_decode_fattr4_time_deleg_modify(xdr, &modify))
+ return -EIO;
+ fattr->ncf_cb_mtime.tv_sec = modify.seconds;
+ fattr->ncf_cb_mtime.tv_nsec = modify.nseconds;
+
+ }
return 0;
}
@@ -287,17 +309,17 @@ static int decode_cb_compound4res(struct xdr_stream *xdr,
u32 length;
__be32 *p;
- p = xdr_inline_decode(xdr, 4 + 4);
+ p = xdr_inline_decode(xdr, XDR_UNIT);
if (unlikely(p == NULL))
goto out_overflow;
- hdr->status = be32_to_cpup(p++);
+ hdr->status = be32_to_cpup(p);
/* Ignore the tag */
- length = be32_to_cpup(p++);
- p = xdr_inline_decode(xdr, length + 4);
- if (unlikely(p == NULL))
+ if (xdr_stream_decode_u32(xdr, &length) < 0)
+ goto out_overflow;
+ if (xdr_inline_decode(xdr, length) == NULL)
+ goto out_overflow;
+ if (xdr_stream_decode_u32(xdr, &hdr->nops) < 0)
goto out_overflow;
- p += XDR_QUADLEN(length);
- hdr->nops = be32_to_cpup(p);
return 0;
out_overflow:
return -EIO;
@@ -361,16 +383,63 @@ static void
encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr,
struct nfs4_cb_fattr *fattr)
{
- struct nfs4_delegation *dp =
- container_of(fattr, struct nfs4_delegation, dl_cb_fattr);
+ struct nfs4_delegation *dp = container_of(fattr, struct nfs4_delegation, dl_cb_fattr);
struct knfsd_fh *fh = &dp->dl_stid.sc_file->fi_fhandle;
-
+ struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr;
+ u32 bmap_size = 1;
+ u32 bmap[3];
+
+ bmap[0] = FATTR4_WORD0_SIZE;
+ if (!ncf->ncf_file_modified)
+ bmap[0] |= FATTR4_WORD0_CHANGE;
+
+ if (deleg_attrs_deleg(dp->dl_type)) {
+ bmap[1] = 0;
+ bmap[2] = FATTR4_WORD2_TIME_DELEG_ACCESS | FATTR4_WORD2_TIME_DELEG_MODIFY;
+ bmap_size = 3;
+ }
encode_nfs_cb_opnum4(xdr, OP_CB_GETATTR);
encode_nfs_fh4(xdr, fh);
- encode_bitmap4(xdr, fattr->ncf_cb_bmap, ARRAY_SIZE(fattr->ncf_cb_bmap));
+ encode_bitmap4(xdr, bmap, bmap_size);
hdr->nops++;
}
+static u32 highest_slotid(struct nfsd4_session *ses)
+{
+ u32 idx;
+
+ spin_lock(&ses->se_lock);
+ idx = fls(~ses->se_cb_slot_avail);
+ if (idx > 0)
+ --idx;
+ idx = max(idx, ses->se_cb_highest_slot);
+ spin_unlock(&ses->se_lock);
+ return idx;
+}
+
+static void
+encode_referring_call4(struct xdr_stream *xdr,
+ const struct nfsd4_referring_call *rc)
+{
+ encode_uint32(xdr, rc->rc_sequenceid);
+ encode_uint32(xdr, rc->rc_slotid);
+}
+
+static void
+encode_referring_call_list4(struct xdr_stream *xdr,
+ const struct nfsd4_referring_call_list *rcl)
+{
+ struct nfsd4_referring_call *rc;
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN);
+ xdr_encode_opaque_fixed(p, rcl->rcl_sessionid.data,
+ NFS4_MAX_SESSIONID_LEN);
+ encode_uint32(xdr, rcl->__nr_referring_calls);
+ list_for_each_entry(rc, &rcl->rcl_referring_calls, __list)
+ encode_referring_call4(xdr, rc);
+}
+
/*
* CB_SEQUENCE4args
*
@@ -388,6 +457,7 @@ static void encode_cb_sequence4args(struct xdr_stream *xdr,
struct nfs4_cb_compound_hdr *hdr)
{
struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
+ struct nfsd4_referring_call_list *rcl;
__be32 *p;
if (hdr->minorversion == 0)
@@ -396,16 +466,45 @@ static void encode_cb_sequence4args(struct xdr_stream *xdr,
encode_nfs_cb_opnum4(xdr, OP_CB_SEQUENCE);
encode_sessionid4(xdr, session);
- p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4 + 4);
- *p++ = cpu_to_be32(session->se_cb_seq_nr); /* csa_sequenceid */
- *p++ = xdr_zero; /* csa_slotid */
- *p++ = xdr_zero; /* csa_highest_slotid */
+ p = xdr_reserve_space(xdr, XDR_UNIT * 4);
+ *p++ = cpu_to_be32(session->se_cb_seq_nr[cb->cb_held_slot]); /* csa_sequenceid */
+ *p++ = cpu_to_be32(cb->cb_held_slot); /* csa_slotid */
+ *p++ = cpu_to_be32(highest_slotid(session)); /* csa_highest_slotid */
*p++ = xdr_zero; /* csa_cachethis */
- xdr_encode_empty_array(p); /* csa_referring_call_lists */
+
+ /* csa_referring_call_lists */
+ encode_uint32(xdr, cb->cb_nr_referring_call_list);
+ list_for_each_entry(rcl, &cb->cb_referring_call_list, __list)
+ encode_referring_call_list4(xdr, rcl);
hdr->nops++;
}
+static void update_cb_slot_table(struct nfsd4_session *ses, u32 target)
+{
+ /* No need to do anything if nothing changed */
+ if (likely(target == READ_ONCE(ses->se_cb_highest_slot)))
+ return;
+
+ spin_lock(&ses->se_lock);
+ if (target > ses->se_cb_highest_slot) {
+ int i;
+
+ target = min(target, NFSD_BC_SLOT_TABLE_SIZE - 1);
+
+ /*
+ * Growing the slot table. Reset any new sequences to 1.
+ *
+ * NB: There is some debate about whether the RFC requires this,
+ * but the Linux client expects it.
+ */
+ for (i = ses->se_cb_highest_slot + 1; i <= target; ++i)
+ ses->se_cb_seq_nr[i] = 1;
+ }
+ ses->se_cb_highest_slot = target;
+ spin_unlock(&ses->se_lock);
+}
+
/*
* CB_SEQUENCE4resok
*
@@ -433,7 +532,7 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
int status = -ESERVERFAULT;
__be32 *p;
- u32 dummy;
+ u32 seqid, slotid, target;
/*
* If the server returns different values for sessionID, slotID or
@@ -449,21 +548,22 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
}
p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
- dummy = be32_to_cpup(p++);
- if (dummy != session->se_cb_seq_nr) {
+ seqid = be32_to_cpup(p++);
+ if (seqid != session->se_cb_seq_nr[cb->cb_held_slot]) {
dprintk("NFS: %s Invalid sequence number\n", __func__);
goto out;
}
- dummy = be32_to_cpup(p++);
- if (dummy != 0) {
+ slotid = be32_to_cpup(p++);
+ if (slotid != cb->cb_held_slot) {
dprintk("NFS: %s Invalid slotid\n", __func__);
goto out;
}
- /*
- * FIXME: process highest slotid and target highest slotid
- */
+ p++; // ignore current highest slot value
+
+ target = be32_to_cpup(p++);
+ update_cb_slot_table(session, target);
status = 0;
out:
cb->cb_seq_status = status;
@@ -592,7 +692,7 @@ static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp,
struct nfs4_cb_compound_hdr hdr;
int status;
u32 bitmap[3] = {0};
- u32 attrlen;
+ u32 attrlen, maxlen;
struct nfs4_cb_fattr *ncf =
container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
@@ -605,14 +705,18 @@ static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp,
return status;
status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status);
- if (status)
+ if (unlikely(status || cb->cb_status))
return status;
if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0)
- return -NFSERR_BAD_XDR;
+ return -EIO;
if (xdr_stream_decode_u32(xdr, &attrlen) < 0)
- return -NFSERR_BAD_XDR;
- if (attrlen > (sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize)))
- return -NFSERR_BAD_XDR;
+ return -EIO;
+ maxlen = sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize);
+ if (bitmap[2] != 0)
+ maxlen += (sizeof(ncf->ncf_cb_mtime.tv_sec) +
+ sizeof(ncf->ncf_cb_mtime.tv_nsec)) * 2;
+ if (attrlen > maxlen)
+ return -EIO;
status = decode_cb_fattr4(xdr, bitmap, ncf);
return status;
}
@@ -986,6 +1090,17 @@ static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
return queue_work(clp->cl_callback_wq, &cb->cb_work);
}
+static void nfsd4_requeue_cb(struct rpc_task *task, struct nfsd4_callback *cb)
+{
+ struct nfs4_client *clp = cb->cb_clp;
+
+ if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
+ trace_nfsd_cb_restart(clp, cb);
+ task->tk_status = 0;
+ set_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags);
+ }
+}
+
static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
{
atomic_inc(&clp->cl_cb_inflight);
@@ -994,8 +1109,7 @@ static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
static void nfsd41_cb_inflight_end(struct nfs4_client *clp)
{
- if (atomic_dec_and_test(&clp->cl_cb_inflight))
- wake_up_var(&clp->cl_cb_inflight);
+ atomic_dec_and_wake_up(&clp->cl_cb_inflight);
}
static void nfsd41_cb_inflight_wait_complete(struct nfs4_client *clp)
@@ -1058,7 +1172,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
args.authflavor = clp->cl_cred.cr_flavor;
clp->cl_cb_ident = conn->cb_ident;
} else {
- if (!conn->cb_xprt)
+ if (!conn->cb_xprt || !ses)
return -EINVAL;
clp->cl_cb_session = ses;
args.bc_xprt = conn->cb_xprt;
@@ -1164,6 +1278,22 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
spin_unlock(&clp->cl_lock);
}
+static int grab_slot(struct nfsd4_session *ses)
+{
+ int idx;
+
+ spin_lock(&ses->se_lock);
+ idx = ffs(ses->se_cb_slot_avail) - 1;
+ if (idx < 0 || idx > ses->se_cb_highest_slot) {
+ spin_unlock(&ses->se_lock);
+ return -1;
+ }
+ /* clear the bit for the slot */
+ ses->se_cb_slot_avail &= ~BIT(idx);
+ spin_unlock(&ses->se_lock);
+ return idx;
+}
+
/*
* There's currently a single callback channel slot.
* If the slot is available, then mark it busy. Otherwise, set the
@@ -1172,28 +1302,32 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task)
{
struct nfs4_client *clp = cb->cb_clp;
+ struct nfsd4_session *ses = clp->cl_cb_session;
- if (!cb->cb_holds_slot &&
- test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
+ if (cb->cb_held_slot >= 0)
+ return true;
+ cb->cb_held_slot = grab_slot(ses);
+ if (cb->cb_held_slot < 0) {
rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
/* Race breaker */
- if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
- dprintk("%s slot is busy\n", __func__);
+ cb->cb_held_slot = grab_slot(ses);
+ if (cb->cb_held_slot < 0)
return false;
- }
rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
}
- cb->cb_holds_slot = true;
return true;
}
static void nfsd41_cb_release_slot(struct nfsd4_callback *cb)
{
struct nfs4_client *clp = cb->cb_clp;
+ struct nfsd4_session *ses = clp->cl_cb_session;
- if (cb->cb_holds_slot) {
- cb->cb_holds_slot = false;
- clear_bit(0, &clp->cl_cb_slot_busy);
+ if (cb->cb_held_slot >= 0) {
+ spin_lock(&ses->se_lock);
+ ses->se_cb_slot_avail |= BIT(cb->cb_held_slot);
+ spin_unlock(&ses->se_lock);
+ cb->cb_held_slot = -1;
rpc_wake_up_next(&clp->cl_cb_waitq);
}
}
@@ -1204,15 +1338,113 @@ static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
trace_nfsd_cb_destroy(clp, cb);
nfsd41_cb_release_slot(cb);
+ if (test_bit(NFSD4_CALLBACK_WAKE, &cb->cb_flags))
+ clear_and_wake_up_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags);
+ else
+ clear_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags);
+
if (cb->cb_ops && cb->cb_ops->release)
cb->cb_ops->release(cb);
nfsd41_cb_inflight_end(clp);
}
-/*
- * TODO: cb_sequence should support referring call lists, cachethis, multiple
- * slots, and mark callback channel down on communication errors.
+/**
+ * nfsd41_cb_referring_call - add a referring call to a callback operation
+ * @cb: context of callback to add the rc to
+ * @sessionid: referring call's session ID
+ * @slotid: referring call's session slot index
+ * @seqno: referring call's slot sequence number
+ *
+ * Caller serializes access to @cb.
+ *
+ * NB: If memory allocation fails, the referring call is not added.
*/
+void nfsd41_cb_referring_call(struct nfsd4_callback *cb,
+ struct nfs4_sessionid *sessionid,
+ u32 slotid, u32 seqno)
+{
+ struct nfsd4_referring_call_list *rcl;
+ struct nfsd4_referring_call *rc;
+ bool found;
+
+ might_sleep();
+
+ found = false;
+ list_for_each_entry(rcl, &cb->cb_referring_call_list, __list) {
+ if (!memcmp(rcl->rcl_sessionid.data, sessionid->data,
+ NFS4_MAX_SESSIONID_LEN)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ rcl = kmalloc(sizeof(*rcl), GFP_KERNEL);
+ if (!rcl)
+ return;
+ memcpy(rcl->rcl_sessionid.data, sessionid->data,
+ NFS4_MAX_SESSIONID_LEN);
+ rcl->__nr_referring_calls = 0;
+ INIT_LIST_HEAD(&rcl->rcl_referring_calls);
+ list_add(&rcl->__list, &cb->cb_referring_call_list);
+ cb->cb_nr_referring_call_list++;
+ }
+
+ found = false;
+ list_for_each_entry(rc, &rcl->rcl_referring_calls, __list) {
+ if (rc->rc_sequenceid == seqno && rc->rc_slotid == slotid) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ rc = kmalloc(sizeof(*rc), GFP_KERNEL);
+ if (!rc)
+ goto out;
+ rc->rc_sequenceid = seqno;
+ rc->rc_slotid = slotid;
+ rcl->__nr_referring_calls++;
+ list_add(&rc->__list, &rcl->rcl_referring_calls);
+ }
+
+out:
+ if (!rcl->__nr_referring_calls) {
+ cb->cb_nr_referring_call_list--;
+ list_del(&rcl->__list);
+ kfree(rcl);
+ }
+}
+
+/**
+ * nfsd41_cb_destroy_referring_call_list - release referring call info
+ * @cb: context of a callback that has completed
+ *
+ * Callers who allocate referring calls using nfsd41_cb_referring_call() must
+ * release those resources by calling nfsd41_cb_destroy_referring_call_list.
+ *
+ * Caller serializes access to @cb.
+ */
+void nfsd41_cb_destroy_referring_call_list(struct nfsd4_callback *cb)
+{
+ struct nfsd4_referring_call_list *rcl;
+ struct nfsd4_referring_call *rc;
+
+ while (!list_empty(&cb->cb_referring_call_list)) {
+ rcl = list_first_entry(&cb->cb_referring_call_list,
+ struct nfsd4_referring_call_list,
+ __list);
+
+ while (!list_empty(&rcl->rcl_referring_calls)) {
+ rc = list_first_entry(&rcl->rcl_referring_calls,
+ struct nfsd4_referring_call,
+ __list);
+ list_del(&rc->__list);
+ kfree(rc);
+ }
+ list_del(&rcl->__list);
+ kfree(rcl);
+ }
+}
+
static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
{
struct nfsd4_callback *cb = calldata;
@@ -1231,30 +1463,14 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
rpc_call_start(task);
}
+/* Returns true if CB_COMPOUND processing should continue */
static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback *cb)
{
- struct nfs4_client *clp = cb->cb_clp;
- struct nfsd4_session *session = clp->cl_cb_session;
- bool ret = true;
-
- if (!clp->cl_minorversion) {
- /*
- * If the backchannel connection was shut down while this
- * task was queued, we need to resubmit it after setting up
- * a new backchannel connection.
- *
- * Note that if we lost our callback connection permanently
- * the submission code will error out, so we don't need to
- * handle that case here.
- */
- if (RPC_SIGNALLED(task))
- goto need_restart;
-
- return true;
- }
+ struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
+ bool ret = false;
- if (!cb->cb_holds_slot)
- goto need_restart;
+ if (cb->cb_held_slot < 0)
+ goto requeue;
/* This is the operation status code for CB_SEQUENCE */
trace_nfsd_cb_seq_status(task, cb);
@@ -1267,12 +1483,17 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
* If CB_SEQUENCE returns an error, then the state of the slot
* (sequence ID, cached reply) MUST NOT change.
*/
- ++session->se_cb_seq_nr;
+ ++session->se_cb_seq_nr[cb->cb_held_slot];
+ ret = true;
break;
case -ESERVERFAULT:
- ++session->se_cb_seq_nr;
+ /*
+ * Call succeeded, but the session, slot index, or slot
+ * sequence number in the response do not match the same
+ * in the server's call. The sequence information is thus
+ * untrustworthy.
+ */
nfsd4_mark_cb_fault(cb->cb_clp);
- ret = false;
break;
case 1:
/*
@@ -1284,44 +1505,42 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
fallthrough;
case -NFS4ERR_BADSESSION:
nfsd4_mark_cb_fault(cb->cb_clp);
- ret = false;
- goto need_restart;
+ goto requeue;
case -NFS4ERR_DELAY:
cb->cb_seq_status = 1;
- if (!rpc_restart_call(task))
- goto out;
-
+ if (RPC_SIGNALLED(task) || !rpc_restart_call(task))
+ goto requeue;
rpc_delay(task, 2 * HZ);
return false;
+ case -NFS4ERR_SEQ_MISORDERED:
case -NFS4ERR_BADSLOT:
+ /*
+ * A SEQ_MISORDERED or BADSLOT error means that the client and
+ * server are out of sync as to the backchannel parameters. Mark
+ * the backchannel faulty and restart the RPC, but leak the slot
+ * so that it's no longer used.
+ */
+ nfsd4_mark_cb_fault(cb->cb_clp);
+ cb->cb_held_slot = -1;
goto retry_nowait;
- case -NFS4ERR_SEQ_MISORDERED:
- if (session->se_cb_seq_nr != 1) {
- session->se_cb_seq_nr = 1;
- goto retry_nowait;
- }
- break;
default:
nfsd4_mark_cb_fault(cb->cb_clp);
}
- nfsd41_cb_release_slot(cb);
-
trace_nfsd_cb_free_slot(task, cb);
-
- if (RPC_SIGNALLED(task))
- goto need_restart;
-out:
+ nfsd41_cb_release_slot(cb);
return ret;
retry_nowait:
- if (rpc_restart_call_prepare(task))
- ret = false;
- goto out;
-need_restart:
- if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
- trace_nfsd_cb_restart(clp, cb);
- task->tk_status = 0;
- cb->cb_need_restart = true;
+ /*
+ * RPC_SIGNALLED() means that the rpc_client is being torn down and
+ * (possibly) recreated. Requeue the call in that case.
+ */
+ if (!RPC_SIGNALLED(task)) {
+ if (rpc_restart_call_prepare(task))
+ return false;
}
+requeue:
+ nfsd41_cb_release_slot(cb);
+ nfsd4_requeue_cb(task, cb);
return false;
}
@@ -1332,12 +1551,26 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
trace_nfsd_cb_rpc_done(clp);
- if (!nfsd4_cb_sequence_done(task, cb))
+ if (!clp->cl_minorversion) {
+ /*
+ * If the backchannel connection was shut down while this
+ * task was queued, we need to resubmit it after setting up
+ * a new backchannel connection.
+ *
+ * Note that if we lost our callback connection permanently
+ * the submission code will error out, so we don't need to
+ * handle that case here.
+ */
+ if (RPC_SIGNALLED(task))
+ nfsd4_requeue_cb(task, cb);
+ } else if (!nfsd4_cb_sequence_done(task, cb)) {
return;
+ }
if (cb->cb_status) {
- WARN_ONCE(task->tk_status, "cb_status=%d tk_status=%d",
- cb->cb_status, task->tk_status);
+ WARN_ONCE(task->tk_status,
+ "cb_status=%d tk_status=%d cb_opcode=%d",
+ cb->cb_status, task->tk_status, cb->cb_ops->opcode);
task->tk_status = cb->cb_status;
}
@@ -1365,7 +1598,7 @@ static void nfsd4_cb_release(void *calldata)
trace_nfsd_cb_rpc_release(cb->cb_clp);
- if (cb->cb_need_restart)
+ if (test_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags))
nfsd4_queue_cb(cb);
else
nfsd41_destroy_cb(cb);
@@ -1478,7 +1711,7 @@ nfsd4_run_cb_work(struct work_struct *work)
container_of(work, struct nfsd4_callback, cb_work);
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt;
- int flags;
+ int flags, ret;
trace_nfsd_cb_start(clp);
@@ -1486,8 +1719,11 @@ nfsd4_run_cb_work(struct work_struct *work)
nfsd4_process_cb_update(cb);
clnt = clp->cl_cb_client;
- if (!clnt) {
- /* Callback channel broken, or client killed; give up: */
+ if (!clnt || clp->cl_state == NFSD4_COURTESY) {
+ /*
+ * Callback channel broken, client killed or
+ * nfs4_client in courtesy state; give up.
+ */
nfsd41_destroy_cb(cb);
return;
}
@@ -1501,16 +1737,19 @@ nfsd4_run_cb_work(struct work_struct *work)
return;
}
- if (cb->cb_need_restart) {
- cb->cb_need_restart = false;
- } else {
+ if (!test_and_clear_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags)) {
if (cb->cb_ops && cb->cb_ops->prepare)
cb->cb_ops->prepare(cb);
}
+
cb->cb_msg.rpc_cred = clp->cl_cb_cred;
flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN;
- rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags,
- cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
+ ret = rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags,
+ cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
+ if (ret != 0) {
+ set_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags);
+ nfsd4_queue_cb(cb);
+ }
}
void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
@@ -1520,11 +1759,13 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op];
cb->cb_msg.rpc_argp = cb;
cb->cb_msg.rpc_resp = cb;
+ cb->cb_flags = 0;
cb->cb_ops = ops;
INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
cb->cb_status = 0;
- cb->cb_need_restart = false;
- cb->cb_holds_slot = false;
+ cb->cb_held_slot = -1;
+ cb->cb_nr_referring_call_list = 0;
+ INIT_LIST_HEAD(&cb->cb_referring_call_list);
}
/**
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index fbfddd3c4c94..290271ac4245 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -344,9 +344,10 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls)
atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls);
trace_nfsd_layout_recall(&ls->ls_stid.sc_stateid);
- refcount_inc(&ls->ls_stid.sc_count);
- nfsd4_run_cb(&ls->ls_recall);
-
+ if (!test_and_set_bit(NFSD4_CALLBACK_RUNNING, &ls->ls_recall.cb_flags)) {
+ refcount_inc(&ls->ls_stid.sc_count);
+ nfsd4_run_cb(&ls->ls_recall);
+ }
out_unlock:
spin_unlock(&ls->ls_lock);
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index b5a6bf4f459f..f13abbb13b38 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -57,6 +57,8 @@ module_param(inter_copy_offload_enable, bool, 0644);
MODULE_PARM_DESC(inter_copy_offload_enable,
"Enable inter server to server copy offload. Default: false");
+static void cleanup_async_copy(struct nfsd4_copy *copy);
+
#ifdef CONFIG_NFSD_V4_2_INTER_SSC
static int nfsd4_ssc_umount_timeout = 900000; /* default to 15 mins */
module_param(nfsd4_ssc_umount_timeout, int, 0644);
@@ -264,7 +266,9 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
inode_lock_nested(inode, I_MUTEX_PARENT);
- child = lookup_one_len(open->op_fname, parent, open->op_fnamelen);
+ child = lookup_one(&nop_mnt_idmap,
+ &QSTR_LEN(open->op_fname, open->op_fnamelen),
+ parent);
if (IS_ERR(child)) {
status = nfserrno(PTR_ERR(child));
goto out;
@@ -874,6 +878,8 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_getattr *getattr = &u->getattr;
__be32 status;
+ trace_nfsd_vfs_getattr(rqstp, &cstate->current_fh);
+
status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
if (status)
return status;
@@ -996,6 +1002,9 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
u64 cookie = readdir->rd_cookie;
static const nfs4_verifier zeroverf;
+ trace_nfsd_vfs_readdir(rqstp, &cstate->current_fh,
+ readdir->rd_maxcount, readdir->rd_cookie);
+
/* no need to check permission - this will be done in nfsd_readdir() */
if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)
@@ -1133,18 +1142,43 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
.na_iattr = &setattr->sa_iattr,
.na_seclabel = &setattr->sa_label,
};
+ bool save_no_wcc, deleg_attrs;
+ struct nfs4_stid *st = NULL;
struct inode *inode;
__be32 status = nfs_ok;
- bool save_no_wcc;
int err;
- if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
+ deleg_attrs = setattr->sa_bmval[2] & (FATTR4_WORD2_TIME_DELEG_ACCESS |
+ FATTR4_WORD2_TIME_DELEG_MODIFY);
+
+ if (deleg_attrs || (setattr->sa_iattr.ia_valid & ATTR_SIZE)) {
+ int flags = WR_STATE;
+
+ if (setattr->sa_bmval[2] & FATTR4_WORD2_TIME_DELEG_ACCESS)
+ flags |= RD_STATE;
+
status = nfs4_preprocess_stateid_op(rqstp, cstate,
&cstate->current_fh, &setattr->sa_stateid,
- WR_STATE, NULL, NULL);
+ flags, NULL, &st);
if (status)
return status;
}
+
+ if (deleg_attrs) {
+ status = nfserr_bad_stateid;
+ if (st->sc_type & SC_TYPE_DELEG) {
+ struct nfs4_delegation *dp = delegstateid(st);
+
+ /* Only for *_ATTRS_DELEG flavors */
+ if (deleg_attrs_deleg(dp->dl_type))
+ status = nfs_ok;
+ }
+ }
+ if (st)
+ nfs4_put_stid(st);
+ if (status)
+ return status;
+
err = fh_want_write(&cstate->current_fh);
if (err)
return nfserrno(err);
@@ -1184,7 +1218,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd_file *nf = NULL;
__be32 status = nfs_ok;
unsigned long cnt;
- int nvecs;
if (write->wr_offset > (u64)OFFSET_MAX ||
write->wr_offset + write->wr_buflen > (u64)OFFSET_MAX)
@@ -1199,13 +1232,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status;
write->wr_how_written = write->wr_stable_how;
-
- nvecs = svc_fill_write_vector(rqstp, &write->wr_payload);
- WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
-
status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf,
- write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
- write->wr_how_written,
+ write->wr_offset, &write->wr_payload,
+ &cnt, write->wr_how_written,
(__be32 *)write->wr_verifier.data);
nfsd_file_put(nf);
@@ -1276,23 +1305,91 @@ out:
return status;
}
+/**
+ * nfsd4_has_active_async_copies - Check for ongoing copy operations
+ * @clp: Client to be checked
+ *
+ * NFSD maintains state for async COPY operations after they complete,
+ * and this state remains in the nfs4_client's async_copies list.
+ * Ongoing copies should block the destruction of the nfs4_client, but
+ * completed copies should not.
+ *
+ * Return values:
+ * %true: At least one active async COPY is ongoing
+ * %false: No active async COPY operations were found
+ */
+bool nfsd4_has_active_async_copies(struct nfs4_client *clp)
+{
+ struct nfsd4_copy *copy;
+ bool result = false;
+
+ spin_lock(&clp->async_lock);
+ list_for_each_entry(copy, &clp->async_copies, copies) {
+ if (!test_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags) &&
+ !test_bit(NFSD4_COPY_F_STOPPED, &copy->cp_flags)) {
+ result = true;
+ break;
+ }
+ }
+ spin_unlock(&clp->async_lock);
+ return result;
+}
+
+/**
+ * nfsd4_async_copy_reaper - Purge completed copies
+ * @nn: Network namespace with possible active copy information
+ */
+void nfsd4_async_copy_reaper(struct nfsd_net *nn)
+{
+ struct nfs4_client *clp;
+ struct nfsd4_copy *copy;
+ LIST_HEAD(reaplist);
+
+ spin_lock(&nn->client_lock);
+ list_for_each_entry(clp, &nn->client_lru, cl_lru) {
+ struct list_head *pos, *next;
+
+ spin_lock(&clp->async_lock);
+ list_for_each_safe(pos, next, &clp->async_copies) {
+ copy = list_entry(pos, struct nfsd4_copy, copies);
+ if (test_bit(NFSD4_COPY_F_OFFLOAD_DONE, &copy->cp_flags)) {
+ if (--copy->cp_ttl) {
+ list_del_init(&copy->copies);
+ list_add(&copy->copies, &reaplist);
+ }
+ }
+ }
+ spin_unlock(&clp->async_lock);
+ }
+ spin_unlock(&nn->client_lock);
+
+ while (!list_empty(&reaplist)) {
+ copy = list_first_entry(&reaplist, struct nfsd4_copy, copies);
+ list_del_init(&copy->copies);
+ cleanup_async_copy(copy);
+ }
+}
+
static void nfs4_put_copy(struct nfsd4_copy *copy)
{
if (!refcount_dec_and_test(&copy->refcount))
return;
- atomic_dec(&copy->cp_nn->pending_async_copies);
kfree(copy->cp_src);
kfree(copy);
}
static void nfsd4_stop_copy(struct nfsd4_copy *copy)
{
- if (!test_and_set_bit(NFSD4_COPY_F_STOPPED, &copy->cp_flags))
+ trace_nfsd_copy_async_cancel(copy);
+ if (!test_and_set_bit(NFSD4_COPY_F_STOPPED, &copy->cp_flags)) {
kthread_stop(copy->copy_task);
+ copy->nfserr = nfs_ok;
+ set_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags);
+ }
nfs4_put_copy(copy);
}
-static struct nfsd4_copy *nfsd4_get_copy(struct nfs4_client *clp)
+static struct nfsd4_copy *nfsd4_unhash_copy(struct nfs4_client *clp)
{
struct nfsd4_copy *copy = NULL;
@@ -1301,6 +1398,9 @@ static struct nfsd4_copy *nfsd4_get_copy(struct nfs4_client *clp)
copy = list_first_entry(&clp->async_copies, struct nfsd4_copy,
copies);
refcount_inc(&copy->refcount);
+ copy->cp_clp = NULL;
+ if (!list_empty(&copy->copies))
+ list_del_init(&copy->copies);
}
spin_unlock(&clp->async_lock);
return copy;
@@ -1310,7 +1410,7 @@ void nfsd4_shutdown_copy(struct nfs4_client *clp)
{
struct nfsd4_copy *copy;
- while ((copy = nfsd4_get_copy(clp)) != NULL)
+ while ((copy = nfsd4_unhash_copy(clp)) != NULL)
nfsd4_stop_copy(copy);
}
#ifdef CONFIG_NFSD_V4_2_INTER_SSC
@@ -1598,8 +1698,10 @@ static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
{
struct nfsd4_cb_offload *cbo =
container_of(cb, struct nfsd4_cb_offload, co_cb);
+ struct nfsd4_copy *copy =
+ container_of(cbo, struct nfsd4_copy, cp_cb_offload);
- kfree(cbo);
+ set_bit(NFSD4_COPY_F_OFFLOAD_DONE, &copy->cp_flags);
}
static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
@@ -1609,6 +1711,14 @@ static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
container_of(cb, struct nfsd4_cb_offload, co_cb);
trace_nfsd_cb_offload_done(&cbo->co_res.cb_stateid, task);
+ switch (task->tk_status) {
+ case -NFS4ERR_DELAY:
+ if (cbo->co_retries--) {
+ rpc_delay(task, HZ / 5);
+ return 0;
+ }
+ }
+ nfsd41_cb_destroy_referring_call_list(cb);
return 1;
}
@@ -1732,21 +1842,21 @@ static void cleanup_async_copy(struct nfsd4_copy *copy)
static void nfsd4_send_cb_offload(struct nfsd4_copy *copy)
{
- struct nfsd4_cb_offload *cbo;
-
- cbo = kzalloc(sizeof(*cbo), GFP_KERNEL);
- if (!cbo)
- return;
+ struct nfsd4_cb_offload *cbo = &copy->cp_cb_offload;
memcpy(&cbo->co_res, &copy->cp_res, sizeof(copy->cp_res));
memcpy(&cbo->co_fh, &copy->fh, sizeof(copy->fh));
cbo->co_nfserr = copy->nfserr;
+ cbo->co_retries = 5;
nfsd4_init_cb(&cbo->co_cb, copy->cp_clp, &nfsd4_cb_offload_ops,
NFSPROC4_CLNT_CB_OFFLOAD);
+ nfsd41_cb_referring_call(&cbo->co_cb, &cbo->co_referring_sessionid,
+ cbo->co_referring_slotid,
+ cbo->co_referring_seqno);
trace_nfsd_cb_offload(copy->cp_clp, &cbo->co_res.cb_stateid,
&cbo->co_fh, copy->cp_count, copy->nfserr);
- nfsd4_run_cb(&cbo->co_cb);
+ nfsd4_try_run_cb(&cbo->co_cb);
}
/**
@@ -1786,10 +1896,14 @@ static int nfsd4_do_async_copy(void *data)
}
do_callback:
+ /* The kthread exits forthwith. Ensure that a subsequent
+ * OFFLOAD_CANCEL won't try to kill it again. */
+ set_bit(NFSD4_COPY_F_STOPPED, &copy->cp_flags);
+
set_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags);
trace_nfsd_copy_async_done(copy);
nfsd4_send_cb_offload(copy);
- cleanup_async_copy(copy);
+ atomic_dec(&copy->cp_nn->pending_async_copies);
return 0;
}
@@ -1841,26 +1955,30 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (!async_copy)
goto out_err;
async_copy->cp_nn = nn;
- /* Arbitrary cap on number of pending async copy operations */
- if (atomic_inc_return(&nn->pending_async_copies) >
- (int)rqstp->rq_pool->sp_nrthreads) {
- atomic_dec(&nn->pending_async_copies);
- goto out_err;
- }
INIT_LIST_HEAD(&async_copy->copies);
refcount_set(&async_copy->refcount, 1);
+ async_copy->cp_ttl = NFSD_COPY_INITIAL_TTL;
+ /* Arbitrary cap on number of pending async copy operations */
+ if (atomic_inc_return(&nn->pending_async_copies) >
+ (int)rqstp->rq_pool->sp_nrthreads)
+ goto out_dec_async_copy_err;
async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
if (!async_copy->cp_src)
- goto out_err;
+ goto out_dec_async_copy_err;
if (!nfs4_init_copy_state(nn, copy))
- goto out_err;
+ goto out_dec_async_copy_err;
memcpy(&result->cb_stateid, &copy->cp_stateid.cs_stid,
sizeof(result->cb_stateid));
dup_copy_fields(copy, async_copy);
+ memcpy(async_copy->cp_cb_offload.co_referring_sessionid.data,
+ cstate->session->se_sessionid.data,
+ NFS4_MAX_SESSIONID_LEN);
+ async_copy->cp_cb_offload.co_referring_slotid = cstate->slot->sl_index;
+ async_copy->cp_cb_offload.co_referring_seqno = cstate->slot->sl_seqid;
async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
async_copy, "%s", "copy thread");
if (IS_ERR(async_copy->copy_task))
- goto out_err;
+ goto out_dec_async_copy_err;
spin_lock(&async_copy->cp_clp->async_lock);
list_add(&async_copy->copies,
&async_copy->cp_clp->async_copies);
@@ -1875,6 +1993,9 @@ out:
trace_nfsd_copy_done(copy, status);
release_copy_files(copy);
return status;
+out_dec_async_copy_err:
+ if (async_copy)
+ atomic_dec(&nn->pending_async_copies);
out_err:
if (nfsd4_ssc_is_inter(copy)) {
/*
@@ -2782,6 +2903,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
if (op->opdesc->op_get_currentstateid)
op->opdesc->op_get_currentstateid(cstate, &op->u);
op->status = op->opdesc->op_func(rqstp, cstate, &op->u);
+ trace_nfsd_compound_op_err(rqstp, op->opnum, op->status);
/* Only from SEQUENCE */
if (cstate->status == nfserr_replay_cache) {
@@ -2798,7 +2920,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
if (current_fh->fh_export &&
need_wrongsec_check(rqstp))
- op->status = check_nfsd_access(current_fh->fh_export, rqstp);
+ op->status = check_nfsd_access(current_fh->fh_export, rqstp, false);
}
encode_op:
if (op->status == nfserr_replay_me) {
@@ -3454,6 +3576,7 @@ static const struct nfsd4_operation nfsd4_ops[] = {
/* NFSv4.1 operations */
[OP_EXCHANGE_ID] = {
.op_func = nfsd4_exchange_id,
+ .op_release = nfsd4_exchange_id_release,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
| OP_MODIFIES_SOMETHING,
.op_name = "OP_EXCHANGE_ID",
@@ -3657,7 +3780,8 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
struct nfs4_op_map *allow = &cstate->clp->cl_spo_must_allow;
u32 opiter;
- if (!cstate->minorversion)
+ if (rqstp->rq_procinfo != &nfsd_version4.vs_proc[NFSPROC4_COMPOUND] ||
+ cstate->minorversion == 0)
return false;
if (cstate->spo_must_allowed)
@@ -3723,7 +3847,7 @@ static const struct svc_procedure nfsd_procedures4[2] = {
.pc_ressize = sizeof(struct nfsd4_compoundres),
.pc_release = nfsd4_release_compoundargs,
.pc_cachetype = RC_NOCACHE,
- .pc_xdrressize = NFSD_BUFSIZE/4,
+ .pc_xdrressize = 3+NFSSVC_MAXBLKSIZE/4,
.pc_name = "COMPOUND",
},
};
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index b7d61eb8afe9..82785db730d9 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -33,6 +33,7 @@
*/
#include <crypto/hash.h>
+#include <crypto/sha2.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/namei.h>
@@ -82,14 +83,13 @@ nfs4_save_creds(const struct cred **original_creds)
new->fsuid = GLOBAL_ROOT_UID;
new->fsgid = GLOBAL_ROOT_GID;
*original_creds = override_creds(new);
- put_cred(new);
return 0;
}
static void
nfs4_reset_creds(const struct cred *original)
{
- revert_creds(original);
+ put_cred(revert_creds(original));
}
static void
@@ -219,7 +219,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
/* lock the parent */
inode_lock(d_inode(dir));
- dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1);
+ dentry = lookup_one(&nop_mnt_idmap, &QSTR(dname), dir);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
goto out_unlock;
@@ -234,9 +234,12 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
* as well be forgiving and just succeed silently.
*/
goto out_put;
- status = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU);
+ dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU);
+ if (IS_ERR(dentry))
+ status = PTR_ERR(dentry);
out_put:
- dput(dentry);
+ if (!status)
+ dput(dentry);
out_unlock:
inode_unlock(d_inode(dir));
if (status == 0) {
@@ -314,7 +317,8 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
if (!status) {
struct dentry *dentry;
- dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
+ dentry = lookup_one(&nop_mnt_idmap,
+ &QSTR(entry->name), dir);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
break;
@@ -337,16 +341,16 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
}
static int
-nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
+nfsd4_unlink_clid_dir(char *name, struct nfsd_net *nn)
{
struct dentry *dir, *dentry;
int status;
- dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
+ dprintk("NFSD: nfsd4_unlink_clid_dir. name %s\n", name);
dir = nn->rec_file->f_path.dentry;
inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
- dentry = lookup_one_len(name, dir, namlen);
+ dentry = lookup_one(&nop_mnt_idmap, &QSTR(name), dir);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
goto out_unlock;
@@ -406,7 +410,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
if (status < 0)
goto out_drop_write;
- status = nfsd4_unlink_clid_dir(dname, HEXDIR_LEN-1, nn);
+ status = nfsd4_unlink_clid_dir(dname, nn);
nfs4_reset_creds(original_cred);
if (status == 0) {
vfs_fsync(nn->rec_file, 0);
@@ -659,7 +663,8 @@ nfs4_reset_recoverydir(char *recdir)
return status;
status = -ENOTDIR;
if (d_is_dir(path.dentry)) {
- strcpy(user_recovery_dirname, recdir);
+ strscpy(user_recovery_dirname, recdir,
+ sizeof(user_recovery_dirname));
status = 0;
}
path_put(&path);
@@ -733,7 +738,6 @@ struct cld_net {
spinlock_t cn_lock;
struct list_head cn_list;
unsigned int cn_xid;
- struct crypto_shash *cn_tfm;
#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
bool cn_has_legacy;
#endif
@@ -1059,8 +1063,6 @@ nfsd4_remove_cld_pipe(struct net *net)
nfsd4_cld_unregister_net(net, cn->cn_pipe);
rpc_destroy_pipe_data(cn->cn_pipe);
- if (cn->cn_tfm)
- crypto_free_shash(cn->cn_tfm);
kfree(nn->cld_net);
nn->cld_net = NULL;
}
@@ -1154,8 +1156,6 @@ nfsd4_cld_create_v2(struct nfs4_client *clp)
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
struct cld_net *cn = nn->cld_net;
struct cld_msg_v2 *cmsg;
- struct crypto_shash *tfm = cn->cn_tfm;
- struct xdr_netobj cksum;
char *principal = NULL;
/* Don't upcall if it's already stored */
@@ -1178,22 +1178,9 @@ nfsd4_cld_create_v2(struct nfs4_client *clp)
else if (clp->cl_cred.cr_principal)
principal = clp->cl_cred.cr_principal;
if (principal) {
- cksum.len = crypto_shash_digestsize(tfm);
- cksum.data = kmalloc(cksum.len, GFP_KERNEL);
- if (cksum.data == NULL) {
- ret = -ENOMEM;
- goto out;
- }
- ret = crypto_shash_tfm_digest(tfm, principal, strlen(principal),
- cksum.data);
- if (ret) {
- kfree(cksum.data);
- goto out;
- }
- cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = cksum.len;
- memcpy(cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data,
- cksum.data, cksum.len);
- kfree(cksum.data);
+ sha256(principal, strlen(principal),
+ cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data);
+ cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = SHA256_DIGEST_SIZE;
} else
cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0;
@@ -1203,7 +1190,6 @@ nfsd4_cld_create_v2(struct nfs4_client *clp)
set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
}
-out:
free_cld_upcall(cup);
out_err:
if (ret)
@@ -1342,12 +1328,11 @@ found:
static int
nfsd4_cld_check_v2(struct nfs4_client *clp)
{
- struct nfs4_client_reclaim *crp;
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
struct cld_net *cn = nn->cld_net;
- int status;
- struct crypto_shash *tfm = cn->cn_tfm;
- struct xdr_netobj cksum;
+#endif
+ struct nfs4_client_reclaim *crp;
char *principal = NULL;
/* did we already find that this client is stable? */
@@ -1363,6 +1348,7 @@ nfsd4_cld_check_v2(struct nfs4_client *clp)
if (cn->cn_has_legacy) {
struct xdr_netobj name;
char dname[HEXDIR_LEN];
+ int status;
status = nfs4_make_rec_clidname(dname, &clp->cl_name);
if (status)
@@ -1385,28 +1371,18 @@ nfsd4_cld_check_v2(struct nfs4_client *clp)
return -ENOENT;
found:
if (crp->cr_princhash.len) {
+ u8 digest[SHA256_DIGEST_SIZE];
+
if (clp->cl_cred.cr_raw_principal)
principal = clp->cl_cred.cr_raw_principal;
else if (clp->cl_cred.cr_principal)
principal = clp->cl_cred.cr_principal;
if (principal == NULL)
return -ENOENT;
- cksum.len = crypto_shash_digestsize(tfm);
- cksum.data = kmalloc(cksum.len, GFP_KERNEL);
- if (cksum.data == NULL)
- return -ENOENT;
- status = crypto_shash_tfm_digest(tfm, principal,
- strlen(principal), cksum.data);
- if (status) {
- kfree(cksum.data);
+ sha256(principal, strlen(principal), digest);
+ if (memcmp(crp->cr_princhash.data, digest,
+ crp->cr_princhash.len))
return -ENOENT;
- }
- if (memcmp(crp->cr_princhash.data, cksum.data,
- crp->cr_princhash.len)) {
- kfree(cksum.data);
- return -ENOENT;
- }
- kfree(cksum.data);
}
crp->cr_clp = clp;
return 0;
@@ -1586,7 +1562,6 @@ nfsd4_cld_tracking_init(struct net *net)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
bool running;
int retries = 10;
- struct crypto_shash *tfm;
status = nfs4_cld_state_init(net);
if (status)
@@ -1611,12 +1586,6 @@ nfsd4_cld_tracking_init(struct net *net)
status = -ETIMEDOUT;
goto err_remove;
}
- tfm = crypto_alloc_shash("sha256", 0, 0);
- if (IS_ERR(tfm)) {
- status = PTR_ERR(tfm);
- goto err_remove;
- }
- nn->cld_net->cn_tfm = tfm;
status = nfsd4_cld_get_version(nn);
if (status == -EOPNOTSUPP)
@@ -2051,7 +2020,6 @@ static inline int check_for_legacy_methods(int status, struct net *net)
path_put(&path);
if (status)
return -ENOTDIR;
- status = nn->client_tracking_ops->init(net);
}
return status;
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 551d2958ec29..d5694987f86f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -149,14 +149,14 @@ void nfsd4_destroy_laundry_wq(void)
static bool is_session_dead(struct nfsd4_session *ses)
{
- return ses->se_flags & NFS4_SESSION_DEAD;
+ return ses->se_dead;
}
static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me)
{
if (atomic_read(&ses->se_ref) > ref_held_by_me)
return nfserr_jukebox;
- ses->se_flags |= NFS4_SESSION_DEAD;
+ ses->se_dead = true;
return nfs_ok;
}
@@ -572,13 +572,6 @@ opaque_hashval(const void *ptr, int nbytes)
return x;
}
-static void nfsd4_free_file_rcu(struct rcu_head *rcu)
-{
- struct nfs4_file *fp = container_of(rcu, struct nfs4_file, fi_rcu);
-
- kmem_cache_free(file_slab, fp);
-}
-
void
put_nfs4_file(struct nfs4_file *fi)
{
@@ -586,7 +579,7 @@ put_nfs4_file(struct nfs4_file *fi)
nfsd4_file_hash_remove(fi);
WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
- call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
+ kfree_rcu(fi, fi_rcu);
}
}
@@ -953,15 +946,6 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
spin_lock_init(&stid->sc_lock);
INIT_LIST_HEAD(&stid->sc_cp_list);
- /*
- * It shouldn't be a problem to reuse an opaque stateid value.
- * I don't think it is for 4.1. But with 4.0 I worry that, for
- * example, a stray write retransmission could be accepted by
- * the server when it should have been rejected. Therefore,
- * adopt a trick from the sctp code to attempt to maximize the
- * amount of time until an id is reused, by ensuring they always
- * "increase" (mod INT_MAX):
- */
return stid;
out_free:
kmem_cache_free(slab, stid);
@@ -1057,6 +1041,12 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
return openlockstateid(stid);
}
+/*
+ * As the sc_free callback of deleg, this may be called by nfs4_put_stid
+ * in nfsd_break_one_deleg.
+ * Considering nfsd_break_one_deleg is called with the flc->flc_lock held,
+ * this function mustn't ever sleep.
+ */
static void nfs4_free_deleg(struct nfs4_stid *stid)
{
struct nfs4_delegation *dp = delegstateid(stid);
@@ -1184,7 +1174,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
&nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
dp->dl_cb_fattr.ncf_file_modified = false;
- dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
get_nfs4_file(fp);
dp->dl_stid.sc_file = fp;
return dp;
@@ -1386,7 +1375,8 @@ static void revoke_delegation(struct nfs4_delegation *dp)
struct nfs4_client *clp = dp->dl_stid.sc_client;
WARN_ON(!list_empty(&dp->dl_recall_lru));
- WARN_ON_ONCE(!(dp->dl_stid.sc_status &
+ WARN_ON_ONCE(dp->dl_stid.sc_client->cl_minorversion > 0 &&
+ !(dp->dl_stid.sc_status &
(SC_STATUS_REVOKED | SC_STATUS_ADMIN_REVOKED)));
trace_nfsd_stid_revoke(&dp->dl_stid);
@@ -1660,6 +1650,14 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp)
free_ol_stateid_reaplist(&reaplist);
}
+static bool nfs4_openowner_unhashed(struct nfs4_openowner *oo)
+{
+ lockdep_assert_held(&oo->oo_owner.so_client->cl_lock);
+
+ return list_empty(&oo->oo_owner.so_strhash) &&
+ list_empty(&oo->oo_perclient);
+}
+
static void unhash_openowner_locked(struct nfs4_openowner *oo)
{
struct nfs4_client *clp = oo->oo_owner.so_client;
@@ -1909,113 +1907,145 @@ gen_sessionid(struct nfsd4_session *ses)
*/
#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44)
+static struct shrinker *nfsd_slot_shrinker;
+static DEFINE_SPINLOCK(nfsd_session_list_lock);
+static LIST_HEAD(nfsd_session_list);
+/* The sum of "target_slots-1" on every session. The shrinker can push this
+ * down, though it can take a little while for the memory to actually
+ * be freed. The "-1" is because we can never free slot 0 while the
+ * session is active.
+ */
+static atomic_t nfsd_total_target_slots = ATOMIC_INIT(0);
+
static void
-free_session_slots(struct nfsd4_session *ses)
+free_session_slots(struct nfsd4_session *ses, int from)
{
int i;
- for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
- free_svc_cred(&ses->se_slots[i]->sl_cred);
- kfree(ses->se_slots[i]);
+ if (from >= ses->se_fchannel.maxreqs)
+ return;
+
+ for (i = from; i < ses->se_fchannel.maxreqs; i++) {
+ struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+
+ /*
+ * Save the seqid in case we reactivate this slot.
+ * This will never require a memory allocation so GFP
+ * flag is irrelevant
+ */
+ xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
+ free_svc_cred(&slot->sl_cred);
+ kfree(slot);
+ }
+ ses->se_fchannel.maxreqs = from;
+ if (ses->se_target_maxslots > from) {
+ int new_target = from ?: 1;
+ atomic_sub(ses->se_target_maxslots - new_target, &nfsd_total_target_slots);
+ ses->se_target_maxslots = new_target;
}
}
-/*
- * We don't actually need to cache the rpc and session headers, so we
- * can allocate a little less for each slot:
+/**
+ * reduce_session_slots - reduce the target max-slots of a session if possible
+ * @ses: The session to affect
+ * @dec: how much to decrease the target by
+ *
+ * This interface can be used by a shrinker to reduce the target max-slots
+ * for a session so that some slots can eventually be freed.
+ * It uses spin_trylock() as it may be called in a context where another
+ * spinlock is held that has a dependency on client_lock. As shrinkers are
+ * best-effort, skiping a session is client_lock is already held has no
+ * great coast
+ *
+ * Return value:
+ * The number of slots that the target was reduced by.
*/
-static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca)
+static int
+reduce_session_slots(struct nfsd4_session *ses, int dec)
{
- u32 size;
+ struct nfsd_net *nn = net_generic(ses->se_client->net,
+ nfsd_net_id);
+ int ret = 0;
- if (ca->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ)
- size = 0;
- else
- size = ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
- return size + sizeof(struct nfsd4_slot);
+ if (ses->se_target_maxslots <= 1)
+ return ret;
+ if (!spin_trylock(&nn->client_lock))
+ return ret;
+ ret = min(dec, ses->se_target_maxslots-1);
+ ses->se_target_maxslots -= ret;
+ atomic_sub(ret, &nfsd_total_target_slots);
+ ses->se_slot_gen += 1;
+ if (ses->se_slot_gen == 0) {
+ int i;
+ ses->se_slot_gen = 1;
+ for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+ struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+ slot->sl_generation = 0;
+ }
+ }
+ spin_unlock(&nn->client_lock);
+ return ret;
}
-/*
- * XXX: If we run out of reserved DRC memory we could (up to a point)
- * re-negotiate active sessions and reduce their slot usage to make
- * room for new connections. For now we just fail the create session.
- */
-static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
+static struct nfsd4_slot *nfsd4_alloc_slot(struct nfsd4_channel_attrs *fattrs,
+ int index, gfp_t gfp)
{
- u32 slotsize = slot_bytes(ca);
- u32 num = ca->maxreqs;
- unsigned long avail, total_avail;
- unsigned int scale_factor;
+ struct nfsd4_slot *slot;
+ size_t size;
- spin_lock(&nfsd_drc_lock);
- if (nfsd_drc_max_mem > nfsd_drc_mem_used)
- total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
- else
- /* We have handed out more space than we chose in
- * set_max_drc() to allow. That isn't really a
- * problem as long as that doesn't make us think we
- * have lots more due to integer overflow.
- */
- total_avail = 0;
- avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail);
/*
- * Never use more than a fraction of the remaining memory,
- * unless it's the only way to give this client a slot.
- * The chosen fraction is either 1/8 or 1/number of threads,
- * whichever is smaller. This ensures there are adequate
- * slots to support multiple clients per thread.
- * Give the client one slot even if that would require
- * over-allocation--it is better than failure.
+ * The RPC and NFS session headers are never saved in
+ * the slot reply cache buffer.
*/
- scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads);
-
- avail = clamp_t(unsigned long, avail, slotsize,
- total_avail/scale_factor);
- num = min_t(int, num, avail / slotsize);
- num = max_t(int, num, 1);
- nfsd_drc_mem_used += num * slotsize;
- spin_unlock(&nfsd_drc_lock);
-
- return num;
-}
-
-static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca)
-{
- int slotsize = slot_bytes(ca);
+ size = fattrs->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ ?
+ 0 : fattrs->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
- spin_lock(&nfsd_drc_lock);
- nfsd_drc_mem_used -= slotsize * ca->maxreqs;
- spin_unlock(&nfsd_drc_lock);
+ slot = kzalloc(struct_size(slot, sl_data, size), gfp);
+ if (!slot)
+ return NULL;
+ slot->sl_index = index;
+ return slot;
}
static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
struct nfsd4_channel_attrs *battrs)
{
int numslots = fattrs->maxreqs;
- int slotsize = slot_bytes(fattrs);
struct nfsd4_session *new;
+ struct nfsd4_slot *slot;
int i;
- BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION)
- > PAGE_SIZE);
-
- new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL);
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
if (!new)
return NULL;
- /* allocate each struct nfsd4_slot and data cache in one piece */
- for (i = 0; i < numslots; i++) {
- new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL);
- if (!new->se_slots[i])
- goto out_free;
- }
+ xa_init(&new->se_slots);
- memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
- memcpy(&new->se_bchannel, battrs, sizeof(struct nfsd4_channel_attrs));
+ slot = nfsd4_alloc_slot(fattrs, 0, GFP_KERNEL);
+ if (!slot || xa_is_err(xa_store(&new->se_slots, 0, slot, GFP_KERNEL)))
+ goto out_free;
+ for (i = 1; i < numslots; i++) {
+ const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
+ slot = nfsd4_alloc_slot(fattrs, i, gfp);
+ if (!slot)
+ break;
+ if (xa_is_err(xa_store(&new->se_slots, i, slot, gfp))) {
+ kfree(slot);
+ break;
+ }
+ }
+ fattrs->maxreqs = i;
+ memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
+ new->se_target_maxslots = i;
+ atomic_add(i - 1, &nfsd_total_target_slots);
+ new->se_cb_slot_avail = ~0U;
+ new->se_cb_highest_slot = min(battrs->maxreqs - 1,
+ NFSD_BC_SLOT_TABLE_SIZE - 1);
+ spin_lock_init(&new->se_lock);
return new;
out_free:
- while (i--)
- kfree(new->se_slots[i]);
+ kfree(slot);
+ xa_destroy(&new->se_slots);
kfree(new);
return NULL;
}
@@ -2121,17 +2151,47 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
static void __free_session(struct nfsd4_session *ses)
{
- free_session_slots(ses);
+ free_session_slots(ses, 0);
+ xa_destroy(&ses->se_slots);
kfree(ses);
}
static void free_session(struct nfsd4_session *ses)
{
nfsd4_del_conns(ses);
- nfsd4_put_drc_mem(&ses->se_fchannel);
__free_session(ses);
}
+static unsigned long
+nfsd_slot_count(struct shrinker *s, struct shrink_control *sc)
+{
+ unsigned long cnt = atomic_read(&nfsd_total_target_slots);
+
+ return cnt ? cnt : SHRINK_EMPTY;
+}
+
+static unsigned long
+nfsd_slot_scan(struct shrinker *s, struct shrink_control *sc)
+{
+ struct nfsd4_session *ses;
+ unsigned long scanned = 0;
+ unsigned long freed = 0;
+
+ spin_lock(&nfsd_session_list_lock);
+ list_for_each_entry(ses, &nfsd_session_list, se_all_sessions) {
+ freed += reduce_session_slots(ses, 1);
+ scanned += 1;
+ if (scanned >= sc->nr_to_scan) {
+ /* Move starting point for next scan */
+ list_move(&nfsd_session_list, &ses->se_all_sessions);
+ break;
+ }
+ }
+ spin_unlock(&nfsd_session_list_lock);
+ sc->nr_scanned = scanned;
+ return freed;
+}
+
static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
{
int idx;
@@ -2142,17 +2202,24 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
INIT_LIST_HEAD(&new->se_conns);
- new->se_cb_seq_nr = 1;
- new->se_flags = cses->flags;
+ atomic_set(&new->se_ref, 0);
+ new->se_dead = false;
new->se_cb_prog = cses->callback_prog;
new->se_cb_sec = cses->cb_sec;
- atomic_set(&new->se_ref, 0);
+
+ for (idx = 0; idx < NFSD_BC_SLOT_TABLE_SIZE; ++idx)
+ new->se_cb_seq_nr[idx] = 1;
+
idx = hash_sessionid(&new->se_sessionid);
list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]);
spin_lock(&clp->cl_lock);
list_add(&new->se_perclnt, &clp->cl_sessions);
spin_unlock(&clp->cl_lock);
+ spin_lock(&nfsd_session_list_lock);
+ list_add_tail(&new->se_all_sessions, &nfsd_session_list);
+ spin_unlock(&nfsd_session_list_lock);
+
{
struct sockaddr *sa = svc_addr(rqstp);
/*
@@ -2222,6 +2289,9 @@ unhash_session(struct nfsd4_session *ses)
spin_lock(&ses->se_client->cl_lock);
list_del(&ses->se_perclnt);
spin_unlock(&ses->se_client->cl_lock);
+ spin_lock(&nfsd_session_list_lock);
+ list_del(&ses->se_all_sessions);
+ spin_unlock(&nfsd_session_list_lock);
}
/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
@@ -2239,21 +2309,16 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
return 1;
}
-/*
- * XXX Should we use a slab cache ?
- * This type of memory management is somewhat inefficient, but we use it
- * anyway since SETCLIENTID is not a common operation.
- */
static struct nfs4_client *alloc_client(struct xdr_netobj name,
struct nfsd_net *nn)
{
struct nfs4_client *clp;
int i;
- if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) {
+ if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients &&
+ atomic_read(&nn->nfsd_courtesy_clients) > 0)
mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
- return NULL;
- }
+
clp = kmem_cache_zalloc(client_slab, GFP_KERNEL);
if (clp == NULL)
return NULL;
@@ -2362,8 +2427,12 @@ unhash_client_locked(struct nfs4_client *clp)
}
list_del_init(&clp->cl_lru);
spin_lock(&clp->cl_lock);
- list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+ spin_lock(&nfsd_session_list_lock);
+ list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) {
list_del_init(&ses->se_hash);
+ list_del_init(&ses->se_all_sessions);
+ }
+ spin_unlock(&nfsd_session_list_lock);
spin_unlock(&clp->cl_lock);
}
@@ -2685,6 +2754,7 @@ static const char *cb_state2str(int state)
static int client_info_show(struct seq_file *m, void *v)
{
struct inode *inode = file_inode(m->file);
+ struct nfsd4_session *ses;
struct nfs4_client *clp;
u64 clid;
@@ -2721,6 +2791,16 @@ static int client_info_show(struct seq_file *m, void *v)
seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr);
seq_printf(m, "admin-revoked states: %d\n",
atomic_read(&clp->cl_admin_revoked));
+ spin_lock(&clp->cl_lock);
+ seq_printf(m, "session slots:");
+ list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+ seq_printf(m, " %u", ses->se_fchannel.maxreqs);
+ seq_printf(m, "\nsession target slots:");
+ list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+ seq_printf(m, " %u", ses->se_target_maxslots);
+ spin_unlock(&clp->cl_lock);
+ seq_puts(m, "\n");
+
drop_client(clp);
return 0;
@@ -2873,6 +2953,21 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
return 0;
}
+static char *nfs4_show_deleg_type(u32 dl_type)
+{
+ switch (dl_type) {
+ case OPEN_DELEGATE_READ:
+ return "r";
+ case OPEN_DELEGATE_WRITE:
+ return "w";
+ case OPEN_DELEGATE_READ_ATTRS_DELEG:
+ return "ra";
+ case OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ return "wa";
+ }
+ return "?";
+}
+
static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
{
struct nfs4_delegation *ds;
@@ -2886,8 +2981,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
nfs4_show_stateid(s, &st->sc_stateid);
seq_puts(s, ": { type: deleg, ");
- seq_printf(s, "access: %s",
- ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");
+ seq_printf(s, "access: %s", nfs4_show_deleg_type(ds->dl_type));
/* XXX: lease time, whether it's being recalled. */
@@ -3076,7 +3170,6 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
{
struct nfs4_client *clp = cb->cb_clp;
- clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
drop_client(clp);
}
@@ -3107,7 +3200,6 @@ nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
struct nfs4_delegation *dp =
container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
- clear_and_wake_up_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
nfs4_put_stid(&dp->dl_stid);
}
@@ -3128,11 +3220,15 @@ static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
struct nfs4_delegation *dp =
container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
- if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
+ if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &ncf->ncf_getattr.cb_flags))
return;
+
/* set to proper status when nfsd4_cb_getattr_done runs */
ncf->ncf_cb_status = NFS4ERR_IO;
+ /* ensure that wake_bit is done when RUNNING is cleared */
+ set_bit(NFSD4_CALLBACK_WAKE, &ncf->ncf_getattr.cb_flags);
+
refcount_inc(&dp->dl_stid.sc_count);
nfsd4_run_cb(&ncf->ncf_getattr);
}
@@ -3160,7 +3256,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
kref_init(&clp->cl_nfsdfs.cl_ref);
nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL);
clp->cl_time = ktime_get_boottime_seconds();
- clear_bit(0, &clp->cl_cb_slot_busy);
copy_verf(clp, verf);
memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage));
clp->cl_cb_session = NULL;
@@ -3487,7 +3582,7 @@ static bool client_has_state(struct nfs4_client *clp)
#endif
|| !list_empty(&clp->cl_delegations)
|| !list_empty(&clp->cl_sessions)
- || !list_empty(&clp->async_copies);
+ || nfsd4_has_active_async_copies(clp);
}
static __be32 copy_impl_id(struct nfs4_client *clp,
@@ -3525,6 +3620,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__func__, rqstp, exid, exid->clname.len, exid->clname.data,
addr_str, exid->flags, exid->spa_how);
+ exid->server_impl_name = kasprintf(GFP_KERNEL, "%s %s %s %s",
+ utsname()->sysname, utsname()->release,
+ utsname()->version, utsname()->machine);
+ if (!exid->server_impl_name)
+ return nfserr_jukebox;
+
if (exid->flags & ~EXCHGID4_FLAG_MASK_A)
return nfserr_inval;
@@ -3662,6 +3763,23 @@ out_copy:
exid->seqid = conf->cl_cs_slot.sl_seqid + 1;
nfsd4_set_ex_flags(conf, exid);
+ exid->nii_domain.len = sizeof("kernel.org") - 1;
+ exid->nii_domain.data = "kernel.org";
+
+ /*
+ * Note that RFC 8881 places no length limit on
+ * nii_name, but this implementation permits no
+ * more than NFS4_OPAQUE_LIMIT bytes.
+ */
+ exid->nii_name.len = strlen(exid->server_impl_name);
+ if (exid->nii_name.len > NFS4_OPAQUE_LIMIT)
+ exid->nii_name.len = NFS4_OPAQUE_LIMIT;
+ exid->nii_name.data = exid->server_impl_name;
+
+ /* just send zeros - the date is in nii_name */
+ exid->nii_time.tv_sec = 0;
+ exid->nii_time.tv_nsec = 0;
+
dprintk("nfsd4_exchange_id seqid %d flags %x\n",
conf->cl_cs_slot.sl_seqid, conf->cl_exchange_flags);
status = nfs_ok;
@@ -3678,10 +3796,18 @@ out_nolock:
return status;
}
-static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
+void
+nfsd4_exchange_id_release(union nfsd4_op_u *u)
+{
+ struct nfsd4_exchange_id *exid = &u->exchange_id;
+
+ kfree(exid->server_impl_name);
+}
+
+static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
{
/* The slot is in use, and no response has been sent. */
- if (slot_inuse) {
+ if (flags & NFSD4_SLOT_INUSE) {
if (seqid == slot_seqid)
return nfserr_jukebox;
else
@@ -3690,6 +3816,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
/* Note unsigned 32-bit arithmetic handles wraparound: */
if (likely(seqid == slot_seqid + 1))
return nfs_ok;
+ if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
+ return nfs_ok;
if (seqid == slot_seqid)
return nfserr_replay_cache;
return nfserr_seq_misordered;
@@ -3748,17 +3876,6 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
ca->maxresp_cached = min_t(u32, ca->maxresp_cached,
NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ);
ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION);
- /*
- * Note decreasing slot size below client's request may make it
- * difficult for client to function correctly, whereas
- * decreasing the number of slots will (just?) affect
- * performance. When short on memory we therefore prefer to
- * decrease number of slots instead of their size. Clients that
- * request larger slots than they need will get poor results:
- * Note that we always allow at least one slot, because our
- * accounting is soft and provides no guarantees either way.
- */
- ca->maxreqs = nfsd4_get_drc_mem(ca, nn);
return nfs_ok;
}
@@ -3836,11 +3953,11 @@ nfsd4_create_session(struct svc_rqst *rqstp,
return status;
status = check_backchannel_attrs(&cr_ses->back_channel);
if (status)
- goto out_release_drc_mem;
+ goto out_err;
status = nfserr_jukebox;
new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel);
if (!new)
- goto out_release_drc_mem;
+ goto out_err;
conn = alloc_conn_from_crses(rqstp, cr_ses);
if (!conn)
goto out_free_session;
@@ -3911,6 +4028,8 @@ nfsd4_create_session(struct svc_rqst *rqstp,
cr_ses->flags &= ~SESSION4_PERSIST;
/* Upshifting from TCP to RDMA is not supported */
cr_ses->flags &= ~SESSION4_RDMA;
+ /* Report the correct number of backchannel slots */
+ cr_ses->back_channel.maxreqs = new->se_cb_highest_slot + 1;
init_session(rqstp, new, conf, cr_ses);
nfsd4_get_session_locked(new);
@@ -3931,7 +4050,6 @@ nfsd4_create_session(struct svc_rqst *rqstp,
return status;
out_expired_error:
- old = NULL;
/*
* Revert the slot seq_nr change so the server will process
* the client's resend instead of returning a cached response.
@@ -3946,12 +4064,9 @@ out_cache_error:
out_free_conn:
spin_unlock(&nn->client_lock);
free_conn(conn);
- if (old)
- expire_client(old);
out_free_session:
__free_session(new);
-out_release_drc_mem:
- nfsd4_put_drc_mem(&cr_ses->fore_channel);
+out_err:
return status;
}
@@ -4249,17 +4364,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (seq->slotid >= session->se_fchannel.maxreqs)
goto out_put_session;
- slot = session->se_slots[seq->slotid];
+ slot = xa_load(&session->se_slots, seq->slotid);
dprintk("%s: slotid %d\n", __func__, seq->slotid);
- /* We do not negotiate the number of slots yet, so set the
- * maxslots to the session maxreqs which is used to encode
- * sr_highest_slotid and the sr_target_slot id to maxslots */
- seq->maxslots = session->se_fchannel.maxreqs;
-
trace_nfsd_slot_seqid_sequence(clp, seq, slot);
- status = check_slot_seqid(seq->seqid, slot->sl_seqid,
- slot->sl_flags & NFSD4_SLOT_INUSE);
+ status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
if (status == nfserr_replay_cache) {
status = nfserr_seq_misordered;
if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
@@ -4284,6 +4393,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out_put_session;
+ if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
+ slot->sl_generation == session->se_slot_gen &&
+ seq->maxslots <= session->se_target_maxslots)
+ /* Client acknowledged our reduce maxreqs */
+ free_session_slots(session, session->se_target_maxslots);
+
buflen = (seq->cachethis) ?
session->se_fchannel.maxresp_cached :
session->se_fchannel.maxresp_sz;
@@ -4291,12 +4406,14 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfserr_rep_too_big;
if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack))
goto out_put_session;
- svc_reserve(rqstp, buflen);
+ svc_reserve_auth(rqstp, buflen);
status = nfs_ok;
- /* Success! bump slot seqid */
+ /* Success! accept new slot seqid */
slot->sl_seqid = seq->seqid;
+ slot->sl_flags &= ~NFSD4_SLOT_REUSED;
slot->sl_flags |= NFSD4_SLOT_INUSE;
+ slot->sl_generation = session->se_slot_gen;
if (seq->cachethis)
slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
else
@@ -4306,7 +4423,51 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
cstate->session = session;
cstate->clp = clp;
+ /*
+ * If the client ever uses the highest available slot,
+ * gently try to allocate another 20%. This allows
+ * fairly quick growth without grossly over-shooting what
+ * the client might use.
+ */
+ if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
+ session->se_target_maxslots >= session->se_fchannel.maxreqs &&
+ session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
+ int s = session->se_fchannel.maxreqs;
+ int cnt = DIV_ROUND_UP(s, 5);
+ void *prev_slot;
+
+ do {
+ /*
+ * GFP_NOWAIT both allows allocation under a
+ * spinlock, and only succeeds if there is
+ * plenty of memory.
+ */
+ slot = nfsd4_alloc_slot(&session->se_fchannel, s,
+ GFP_NOWAIT);
+ prev_slot = xa_load(&session->se_slots, s);
+ if (xa_is_value(prev_slot) && slot) {
+ slot->sl_seqid = xa_to_value(prev_slot);
+ slot->sl_flags |= NFSD4_SLOT_REUSED;
+ }
+ if (slot &&
+ !xa_is_err(xa_store(&session->se_slots, s, slot,
+ GFP_NOWAIT))) {
+ s += 1;
+ session->se_fchannel.maxreqs = s;
+ atomic_add(s - session->se_target_maxslots,
+ &nfsd_total_target_slots);
+ session->se_target_maxslots = s;
+ } else {
+ kfree(slot);
+ slot = NULL;
+ }
+ } while (slot && --cnt > 0);
+ }
+
out:
+ seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
+ seq->target_maxslots = session->se_target_maxslots;
+
switch (clp->cl_cb_state) {
case NFSD4_CB_DOWN:
seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN;
@@ -4658,8 +4819,8 @@ out:
static unsigned long
nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
{
- int count;
struct nfsd_net *nn = shrink->private_data;
+ long count;
count = atomic_read(&nn->nfsd_courtesy_clients);
if (!count)
@@ -4710,7 +4871,7 @@ static void init_nfs4_replay(struct nfs4_replay *rp)
rp->rp_status = nfserr_serverfault;
rp->rp_buflen = 0;
rp->rp_buf = rp->rp_ibuf;
- atomic_set(&rp->rp_locked, RP_UNLOCKED);
+ rp->rp_locked = RP_UNLOCKED;
}
static int nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
@@ -4718,9 +4879,9 @@ static int nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
{
if (!nfsd4_has_session(cstate)) {
wait_var_event(&so->so_replay.rp_locked,
- atomic_cmpxchg(&so->so_replay.rp_locked,
- RP_UNLOCKED, RP_LOCKED) != RP_LOCKED);
- if (atomic_read(&so->so_replay.rp_locked) == RP_UNHASHED)
+ cmpxchg(&so->so_replay.rp_locked,
+ RP_UNLOCKED, RP_LOCKED) != RP_LOCKED);
+ if (so->so_replay.rp_locked == RP_UNHASHED)
return -EAGAIN;
cstate->replay_owner = nfs4_get_stateowner(so);
}
@@ -4733,9 +4894,7 @@ void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate)
if (so != NULL) {
cstate->replay_owner = NULL;
- atomic_set(&so->so_replay.rp_locked, RP_UNLOCKED);
- smp_mb__after_atomic();
- wake_up_var(&so->so_replay.rp_locked);
+ store_release_wake_up(&so->so_replay.rp_locked, RP_UNLOCKED);
nfs4_put_stateowner(so);
}
}
@@ -4975,6 +5134,12 @@ retry:
spin_lock(&oo->oo_owner.so_client->cl_lock);
spin_lock(&fp->fi_lock);
+ if (nfs4_openowner_unhashed(oo)) {
+ mutex_unlock(&stp->st_mutex);
+ stp = NULL;
+ goto out_unlock;
+ }
+
retstp = nfsd4_find_existing_open(fp, open);
if (retstp)
goto out_unlock;
@@ -5034,9 +5199,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
* Some threads with a reference might be waiting for rp_locked,
* so tell them to stop waiting.
*/
- atomic_set(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED);
- smp_mb__after_atomic();
- wake_up_var(&oo->oo_owner.so_replay.rp_locked);
+ store_release_wake_up(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED);
wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
release_all_access(s);
@@ -5255,6 +5418,11 @@ static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
{
+ bool queued;
+
+ if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &dp->dl_recall.cb_flags))
+ return;
+
/*
* We're assuming the state code never drops its reference
* without first removing the lease. Since we're in this lease
@@ -5263,7 +5431,10 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
* we know it's safe to take a reference.
*/
refcount_inc(&dp->dl_stid.sc_count);
- WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall));
+ queued = nfsd4_run_cb(&dp->dl_recall);
+ WARN_ON_ONCE(!queued);
+ if (!queued)
+ refcount_dec(&dp->dl_stid.sc_count);
}
/* Called from break_lease() with flc_lock held. */
@@ -5437,7 +5608,7 @@ retry:
static inline __be32
nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
{
- if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
+ if (!(flags & RD_STATE) && deleg_is_read(dp->dl_type))
return nfserr_openmode;
else
return nfs_ok;
@@ -5669,8 +5840,7 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
}
-static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
- int flag)
+static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp)
{
struct file_lease *fl;
@@ -5679,7 +5849,7 @@ static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
return NULL;
fl->fl_lmops = &nfsd_lease_mng_ops;
fl->c.flc_flags = FL_DELEG;
- fl->c.flc_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
+ fl->c.flc_type = deleg_is_read(dp->dl_type) ? F_RDLCK : F_WRLCK;
fl->c.flc_owner = (fl_owner_t)dp;
fl->c.flc_pid = current->tgid;
fl->c.flc_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
@@ -5790,17 +5960,30 @@ nfsd4_verify_setuid_write(struct nfsd4_open *open, struct nfsd_file *nf)
return 0;
}
+#ifdef CONFIG_NFSD_V4_DELEG_TIMESTAMPS
+static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open)
+{
+ return open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS;
+}
+#else /* CONFIG_NFSD_V4_DELEG_TIMESTAMPS */
+static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open)
+{
+ return false;
+}
+#endif /* CONFIG NFSD_V4_DELEG_TIMESTAMPS */
+
static struct nfs4_delegation *
nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
struct svc_fh *parent)
{
- int status = 0;
+ bool deleg_ts = nfsd4_want_deleg_timestamps(open);
struct nfs4_client *clp = stp->st_stid.sc_client;
struct nfs4_file *fp = stp->st_stid.sc_file;
struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate;
struct nfs4_delegation *dp;
struct nfsd_file *nf = NULL;
struct file_lease *fl;
+ int status = 0;
u32 dl_type;
/*
@@ -5825,7 +6008,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
*/
if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_BOTH) {
nf = find_rw_file(fp);
- dl_type = NFS4_OPEN_DELEGATE_WRITE;
+ dl_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : OPEN_DELEGATE_WRITE;
}
/*
@@ -5834,12 +6017,21 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
*/
if (!nf && (open->op_share_access & NFS4_SHARE_ACCESS_READ)) {
nf = find_readable_file(fp);
- dl_type = NFS4_OPEN_DELEGATE_READ;
+ dl_type = deleg_ts ? OPEN_DELEGATE_READ_ATTRS_DELEG : OPEN_DELEGATE_READ;
}
if (!nf)
return ERR_PTR(-EAGAIN);
+ /*
+ * File delegations and associated locks cannot be recovered if the
+ * export is from an NFS proxy server.
+ */
+ if (exportfs_cannot_lock(nf->nf_file->f_path.mnt->mnt_sb->s_export_op)) {
+ nfsd_file_put(nf);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
spin_lock(&state_lock);
spin_lock(&fp->fi_lock);
if (nfs4_delegation_exists(clp, fp))
@@ -5866,7 +6058,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
if (!dp)
goto out_delegees;
- fl = nfs4_alloc_init_lease(dp, dl_type);
+ fl = nfs4_alloc_init_lease(dp);
if (!fl)
goto out_clnt_odstate;
@@ -5923,20 +6115,20 @@ out_delegees:
static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
{
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+ open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
if (status == -EAGAIN)
open->op_why_no_deleg = WND4_CONTENTION;
else {
open->op_why_no_deleg = WND4_RESOURCE;
switch (open->op_deleg_want) {
- case NFS4_SHARE_WANT_READ_DELEG:
- case NFS4_SHARE_WANT_WRITE_DELEG:
- case NFS4_SHARE_WANT_ANY_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_READ_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_ANY_DELEG:
break;
- case NFS4_SHARE_WANT_CANCEL:
+ case OPEN4_SHARE_ACCESS_WANT_CANCEL:
open->op_why_no_deleg = WND4_CANCELLED;
break;
- case NFS4_SHARE_WANT_NO_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_NO_DELEG:
WARN_ON_ONCE(1);
}
}
@@ -5957,7 +6149,7 @@ nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh,
path.dentry = file_dentry(nf->nf_file);
rc = vfs_getattr(&path, stat,
- (STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE),
+ (STATX_MODE | STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE),
AT_STATX_SYNC_AS_STAT);
nfsd_file_put(nf);
@@ -5992,13 +6184,14 @@ static void
nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
struct svc_fh *currentfh)
{
- struct nfs4_delegation *dp;
struct nfs4_openowner *oo = openowner(stp->st_stateowner);
+ bool deleg_ts = nfsd4_want_deleg_timestamps(open);
struct nfs4_client *clp = stp->st_stid.sc_client;
struct svc_fh *parent = NULL;
- int cb_up;
- int status = 0;
+ struct nfs4_delegation *dp;
struct kstat stat;
+ int status = 0;
+ int cb_up;
cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
open->op_recall = false;
@@ -6039,21 +6232,22 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
destroy_delegation(dp);
goto out_no_deleg;
}
- open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
+ open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG :
+ OPEN_DELEGATE_WRITE;
dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
- dp->dl_cb_fattr.ncf_initial_cinfo =
- nfsd4_change_attribute(&stat, d_inode(currentfh->fh_dentry));
+ dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat);
trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
} else {
- open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
+ open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_READ_ATTRS_DELEG :
+ OPEN_DELEGATE_READ;
trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
}
nfs4_put_stid(&dp->dl_stid);
return;
out_no_deleg:
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
+ open->op_delegate_type = OPEN_DELEGATE_NONE;
if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
- open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) {
+ open->op_delegate_type != OPEN_DELEGATE_NONE) {
dprintk("NFSD: WARNING: refusing delegation reclaim\n");
open->op_recall = true;
}
@@ -6067,21 +6261,32 @@ out_no_deleg:
static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
struct nfs4_delegation *dp)
{
- if (open->op_deleg_want == NFS4_SHARE_WANT_READ_DELEG &&
- dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
- open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
- } else if (open->op_deleg_want == NFS4_SHARE_WANT_WRITE_DELEG &&
- dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
- open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
+ if (deleg_is_write(dp->dl_type)) {
+ if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_READ_DELEG) {
+ open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
+ open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
+ } else if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG) {
+ open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
+ open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
+ }
}
/* Otherwise the client must be confused wanting a delegation
* it already has, therefore we don't return
- * NFS4_OPEN_DELEGATE_NONE_EXT and reason.
+ * OPEN_DELEGATE_NONE_EXT and reason.
*/
}
+/* Are we returning only a delegation stateid? */
+static bool open_xor_delegation(struct nfsd4_open *open)
+{
+ if (!(open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION))
+ return false;
+ /* Did we actually get a delegation? */
+ if (!deleg_is_read(open->op_delegate_type) && !deleg_is_write(open->op_delegate_type))
+ return false;
+ return true;
+}
+
/**
* nfsd4_process_open2 - finish open processing
* @rqstp: the RPC transaction being executed
@@ -6127,6 +6332,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
if (!stp) {
stp = init_open_stateid(fp, open);
+ if (!stp) {
+ status = nfserr_jukebox;
+ goto out;
+ }
+
if (!open->op_stp)
new_stp = true;
}
@@ -6162,8 +6372,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
mutex_unlock(&stp->st_mutex);
if (nfsd4_has_session(&resp->cstate)) {
- if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+ if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_NO_DELEG) {
+ open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
open->op_why_no_deleg = WND4_NOT_WANTED;
goto nodeleg;
}
@@ -6174,12 +6384,23 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
* OPEN succeeds even if we fail.
*/
nfs4_open_delegation(open, stp, &resp->cstate.current_fh);
+
+ /*
+ * If there is an existing open stateid, it must be updated and
+ * returned. Only respect WANT_OPEN_XOR_DELEGATION when a new
+ * open stateid would have to be created.
+ */
+ if (new_stp && open_xor_delegation(open)) {
+ memcpy(&open->op_stateid, &zero_stateid, sizeof(open->op_stateid));
+ open->op_rflags |= OPEN4_RESULT_NO_OPEN_STATEID;
+ release_open_stateid(stp);
+ }
nodeleg:
status = nfs_ok;
trace_nfsd_open(&stp->st_stid.sc_stateid);
out:
/* 4.1 client trying to upgrade/downgrade delegation? */
- if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp &&
+ if (open->op_delegate_type == OPEN_DELEGATE_NONE && dp &&
open->op_deleg_want)
nfsd4_deleg_xgrade_none_ext(open, dp);
@@ -6190,7 +6411,7 @@ out:
/*
* To finish the open response, we just need to set the rflags.
*/
- open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
+ open->op_rflags |= NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
if (nfsd4_has_session(&resp->cstate))
open->op_rflags |= NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK;
else if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED))
@@ -6562,6 +6783,7 @@ nfs4_laundromat(struct nfsd_net *nn)
_free_cpntf_state_locked(nn, cps);
}
spin_unlock(&nn->s2s_cp_lock);
+ nfsd4_async_copy_reaper(nn);
nfs4_get_client_reaplist(nn, &reaplist, &lt);
nfs4_process_client_reaplist(&reaplist);
@@ -6666,38 +6888,34 @@ deleg_reaper(struct nfsd_net *nn)
{
struct list_head *pos, *next;
struct nfs4_client *clp;
- LIST_HEAD(cblist);
spin_lock(&nn->client_lock);
list_for_each_safe(pos, next, &nn->client_lru) {
clp = list_entry(pos, struct nfs4_client, cl_lru);
- if (clp->cl_state != NFSD4_ACTIVE ||
- list_empty(&clp->cl_delegations) ||
- atomic_read(&clp->cl_delegs_in_recall) ||
- test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
- (ktime_get_boottime_seconds() -
- clp->cl_ra_time < 5)) {
+
+ if (clp->cl_state != NFSD4_ACTIVE)
+ continue;
+ if (list_empty(&clp->cl_delegations))
+ continue;
+ if (atomic_read(&clp->cl_delegs_in_recall))
+ continue;
+ if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &clp->cl_ra->ra_cb.cb_flags))
+ continue;
+ if (ktime_get_boottime_seconds() - clp->cl_ra_time < 5)
+ continue;
+ if (clp->cl_cb_state != NFSD4_CB_UP)
continue;
- }
- list_add(&clp->cl_ra_cblist, &cblist);
/* release in nfsd4_cb_recall_any_release */
kref_get(&clp->cl_nfsdfs.cl_ref);
- set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
clp->cl_ra_time = ktime_get_boottime_seconds();
- }
- spin_unlock(&nn->client_lock);
-
- while (!list_empty(&cblist)) {
- clp = list_first_entry(&cblist, struct nfs4_client,
- cl_ra_cblist);
- list_del_init(&clp->cl_ra_cblist);
clp->cl_ra->ra_keep = 0;
clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
BIT(RCA4_TYPE_MASK_WDATA_DLG);
trace_nfsd_cb_recall_any(clp->cl_ra);
nfsd4_run_cb(&clp->cl_ra->ra_cb);
}
+ spin_unlock(&nn->client_lock);
}
static void
@@ -6862,7 +7080,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
*/
statusmask |= SC_STATUS_REVOKED;
- statusmask |= SC_STATUS_ADMIN_REVOKED;
+ statusmask |= SC_STATUS_ADMIN_REVOKED | SC_STATUS_FREEABLE;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
CLOSE_STATEID(stateid))
@@ -7517,9 +7735,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
return status;
- status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG,
- SC_STATUS_REVOKED | SC_STATUS_FREEABLE,
- &s, nn);
+ status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, SC_STATUS_REVOKED, &s, nn);
if (status)
goto out;
dp = delegstateid(s);
@@ -7627,7 +7843,7 @@ nfsd4_lm_notify(struct file_lock *fl)
if (queue) {
trace_nfsd_cb_notify_lock(lo, nbl);
- nfsd4_run_cb(&nbl->nbl_cb);
+ nfsd4_try_run_cb(&nbl->nbl_cb);
}
}
@@ -7926,7 +8142,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_blocked_lock *nbl = NULL;
struct file_lock *file_lock = NULL;
struct file_lock *conflock = NULL;
- struct super_block *sb;
__be32 status = 0;
int lkflg;
int err;
@@ -7943,12 +8158,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (check_lock_length(lock->lk_offset, lock->lk_length))
return nfserr_inval;
- if ((status = fh_verify(rqstp, &cstate->current_fh,
- S_IFREG, NFSD_MAY_LOCK))) {
- dprintk("NFSD: nfsd4_lock: permission denied!\n");
+ status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0);
+ if (status != nfs_ok)
return status;
+ if (exportfs_cannot_lock(cstate->current_fh.fh_dentry->d_sb->s_export_op)) {
+ status = nfserr_notsupp;
+ goto out;
}
- sb = cstate->current_fh.fh_dentry->d_sb;
if (lock->lk_is_new) {
if (nfsd4_has_session(cstate))
@@ -8001,9 +8217,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fp = lock_stp->st_stid.sc_file;
switch (lock->lk_type) {
case NFS4_READW_LT:
- if (nfsd4_has_session(cstate) ||
- exportfs_lock_op_is_async(sb->s_export_op))
- flags |= FL_SLEEP;
fallthrough;
case NFS4_READ_LT:
spin_lock(&fp->fi_lock);
@@ -8014,9 +8227,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
type = F_RDLCK;
break;
case NFS4_WRITEW_LT:
- if (nfsd4_has_session(cstate) ||
- exportfs_lock_op_is_async(sb->s_export_op))
- flags |= FL_SLEEP;
fallthrough;
case NFS4_WRITE_LT:
spin_lock(&fp->fi_lock);
@@ -8036,15 +8246,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
}
- /*
- * Most filesystems with their own ->lock operations will block
- * the nfsd thread waiting to acquire the lock. That leads to
- * deadlocks (we don't want every nfsd thread tied up waiting
- * for file locks), so don't attempt blocking lock notifications
- * on those filesystems:
- */
- if (!exportfs_lock_op_is_async(sb->s_export_op))
- flags &= ~FL_SLEEP;
+ if (lock->lk_type & (NFS4_READW_LT | NFS4_WRITEW_LT) &&
+ nfsd4_has_session(cstate) &&
+ locks_can_async_lock(nf->nf_file->f_op))
+ flags |= FL_SLEEP;
nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
if (!nbl) {
@@ -8299,6 +8504,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfserr_lock_range;
goto put_stateid;
}
+ if (exportfs_cannot_lock(nf->nf_file->f_path.mnt->mnt_sb->s_export_op)) {
+ status = nfserr_notsupp;
+ goto put_file;
+ }
+
file_lock = locks_alloc_lock();
if (!file_lock) {
dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
@@ -8694,7 +8904,6 @@ skip_grace:
}
/* initialization to perform when the nfsd service is started: */
-
int
nfs4_state_start(void)
{
@@ -8704,6 +8913,15 @@ nfs4_state_start(void)
if (ret)
return ret;
+ nfsd_slot_shrinker = shrinker_alloc(0, "nfsd-DRC-slot");
+ if (!nfsd_slot_shrinker) {
+ rhltable_destroy(&nfs4_file_rhltable);
+ return -ENOMEM;
+ }
+ nfsd_slot_shrinker->count_objects = nfsd_slot_count;
+ nfsd_slot_shrinker->scan_objects = nfsd_slot_scan;
+ shrinker_register(nfsd_slot_shrinker);
+
set_max_delegations();
return 0;
}
@@ -8745,6 +8963,7 @@ void
nfs4_state_shutdown(void)
{
rhltable_destroy(&nfs4_file_rhltable);
+ shrinker_free(nfsd_slot_shrinker);
}
static void
@@ -8862,11 +9081,82 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
}
/**
+ * set_cb_time - vet and set the timespec for a cb_getattr update
+ * @cb: timestamp from the CB_GETATTR response
+ * @orig: original timestamp in the inode
+ * @now: current time
+ *
+ * Given a timestamp in a CB_GETATTR response, check it against the
+ * current timestamp in the inode and the current time. Returns true
+ * if the inode's timestamp needs to be updated, and false otherwise.
+ * @cb may also be changed if the timestamp needs to be clamped.
+ */
+static bool set_cb_time(struct timespec64 *cb, const struct timespec64 *orig,
+ const struct timespec64 *now)
+{
+
+ /*
+ * "When the time presented is before the original time, then the
+ * update is ignored." Also no need to update if there is no change.
+ */
+ if (timespec64_compare(cb, orig) <= 0)
+ return false;
+
+ /*
+ * "When the time presented is in the future, the server can either
+ * clamp the new time to the current time, or it may
+ * return NFS4ERR_DELAY to the client, allowing it to retry."
+ */
+ if (timespec64_compare(cb, now) > 0) {
+ /* clamp it */
+ *cb = *now;
+ }
+
+ return true;
+}
+
+static int cb_getattr_update_times(struct dentry *dentry, struct nfs4_delegation *dp)
+{
+ struct inode *inode = d_inode(dentry);
+ struct timespec64 now = current_time(inode);
+ struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr;
+ struct iattr attrs = { };
+ int ret;
+
+ if (deleg_attrs_deleg(dp->dl_type)) {
+ struct timespec64 atime = inode_get_atime(inode);
+ struct timespec64 mtime = inode_get_mtime(inode);
+
+ attrs.ia_atime = ncf->ncf_cb_atime;
+ attrs.ia_mtime = ncf->ncf_cb_mtime;
+
+ if (set_cb_time(&attrs.ia_atime, &atime, &now))
+ attrs.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
+
+ if (set_cb_time(&attrs.ia_mtime, &mtime, &now)) {
+ attrs.ia_valid |= ATTR_CTIME | ATTR_MTIME | ATTR_MTIME_SET;
+ attrs.ia_ctime = attrs.ia_mtime;
+ }
+ } else {
+ attrs.ia_valid |= ATTR_MTIME | ATTR_CTIME;
+ attrs.ia_mtime = attrs.ia_ctime = now;
+ }
+
+ if (!attrs.ia_valid)
+ return 0;
+
+ attrs.ia_valid |= ATTR_DELEG;
+ inode_lock(inode);
+ ret = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL);
+ inode_unlock(inode);
+ return ret;
+}
+
+/**
* nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict
* @rqstp: RPC transaction context
* @dentry: dentry of inode to be checked for a conflict
- * @modified: return true if file was modified
- * @size: new size of file if modified is true
+ * @pdp: returned WRITE delegation, if one was found
*
* This function is called when there is a conflict between a write
* delegation and a change/size GETATTR from another client. The server
@@ -8876,25 +9166,24 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
* 18.7.4.
*
* Returns 0 if there is no conflict; otherwise an nfs_stat
- * code is returned.
+ * code is returned. If @pdp is set to a non-NULL value, then the
+ * caller must put the reference.
*/
__be32
nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry,
- bool *modified, u64 *size)
+ struct nfs4_delegation **pdp)
{
__be32 status;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct file_lock_context *ctx;
struct nfs4_delegation *dp = NULL;
struct file_lease *fl;
- struct iattr attrs;
struct nfs4_cb_fattr *ncf;
struct inode *inode = d_inode(dentry);
- *modified = false;
ctx = locks_inode_context(inode);
if (!ctx)
- return 0;
+ return nfs_ok;
#define NON_NFSD_LEASE ((void *)1)
@@ -8929,8 +9218,8 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry,
nfs4_cb_getattr(&dp->dl_cb_fattr);
spin_unlock(&ctx->flc_lock);
- wait_on_bit_timeout(&ncf->ncf_cb_flags, CB_GETATTR_BUSY,
- TASK_INTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT);
+ wait_on_bit_timeout(&ncf->ncf_getattr.cb_flags, NFSD4_CALLBACK_RUNNING,
+ TASK_UNINTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT);
if (ncf->ncf_cb_status) {
/* Recall delegation only if client didn't respond */
status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
@@ -8950,20 +9239,16 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry,
* not update the file's metadata with the client's
* modified size
*/
- attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
- attrs.ia_valid = ATTR_MTIME | ATTR_CTIME | ATTR_DELEG;
- inode_lock(inode);
- err = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL);
- inode_unlock(inode);
+ err = cb_getattr_update_times(dentry, dp);
if (err) {
status = nfserrno(err);
goto out_status;
}
ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
- *size = ncf->ncf_cur_fsize;
- *modified = true;
+ *pdp = dp;
+ return nfs_ok;
}
- status = 0;
+ status = nfs_ok;
out_status:
nfs4_put_stid(&dp->dl_stid);
return status;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index f118921250c3..3afcdbed6e14 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -55,6 +55,7 @@
#include "netns.h"
#include "pnfs.h"
#include "filecache.h"
+#include "nfs4xdr_gen.h"
#include "trace.h"
@@ -520,6 +521,26 @@ nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen,
*umask = mask & S_IRWXUGO;
iattr->ia_valid |= ATTR_MODE;
}
+ if (bmval[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) {
+ fattr4_time_deleg_access access;
+
+ if (!xdrgen_decode_fattr4_time_deleg_access(argp->xdr, &access))
+ return nfserr_bad_xdr;
+ iattr->ia_atime.tv_sec = access.seconds;
+ iattr->ia_atime.tv_nsec = access.nseconds;
+ iattr->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET | ATTR_DELEG;
+ }
+ if (bmval[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) {
+ fattr4_time_deleg_modify modify;
+
+ if (!xdrgen_decode_fattr4_time_deleg_modify(argp->xdr, &modify))
+ return nfserr_bad_xdr;
+ iattr->ia_mtime.tv_sec = modify.seconds;
+ iattr->ia_mtime.tv_nsec = modify.nseconds;
+ iattr->ia_ctime.tv_sec = modify.seconds;
+ iattr->ia_ctime.tv_nsec = modify.seconds;
+ iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME | ATTR_MTIME_SET | ATTR_DELEG;
+ }
/* request sanity: did attrlist4 contain the expected number of words? */
if (attrlist4_count != xdr_stream_pos(argp->xdr) - starting_pos)
@@ -1066,13 +1087,13 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh
return nfs_ok;
if (!argp->minorversion)
return nfserr_bad_xdr;
- switch (w & NFS4_SHARE_WANT_MASK) {
- case NFS4_SHARE_WANT_NO_PREFERENCE:
- case NFS4_SHARE_WANT_READ_DELEG:
- case NFS4_SHARE_WANT_WRITE_DELEG:
- case NFS4_SHARE_WANT_ANY_DELEG:
- case NFS4_SHARE_WANT_NO_DELEG:
- case NFS4_SHARE_WANT_CANCEL:
+ switch (w & NFS4_SHARE_WANT_TYPE_MASK) {
+ case OPEN4_SHARE_ACCESS_WANT_NO_PREFERENCE:
+ case OPEN4_SHARE_ACCESS_WANT_READ_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_ANY_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_NO_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_CANCEL:
break;
default:
return nfserr_bad_xdr;
@@ -1884,7 +1905,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
return nfserr_bad_xdr;
seq->seqid = be32_to_cpup(p++);
seq->slotid = be32_to_cpup(p++);
- seq->maxslots = be32_to_cpup(p++);
+ /* sa_highest_slotid counts from 0 but maxslots counts from 1 ... */
+ seq->maxslots = be32_to_cpup(p++) + 1;
seq->cachethis = be32_to_cpup(p);
seq->status_flags = 0;
@@ -2542,7 +2564,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
/* Sessions make the DRC unnecessary: */
if (argp->minorversion)
cachethis = false;
- svc_reserve(argp->rqstp, max_reply + readbytes);
+ svc_reserve_auth(argp->rqstp, max_reply + readbytes);
argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
argp->splice_ok = nfsd_read_splice_ok(argp->rqstp);
@@ -2652,13 +2674,10 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep,
strlen = end - str;
if (strlen) {
- p = xdr_reserve_space(xdr, strlen + 4);
- if (!p)
+ if (xdr_stream_encode_opaque(xdr, str, strlen) < 0)
return nfserr_resource;
- p = xdr_encode_opaque(p, str, strlen);
count++;
- }
- else
+ } else
end++;
if (found_esc)
end = next;
@@ -2699,7 +2718,6 @@ static __be32 nfsd4_encode_pathname4(struct xdr_stream *xdr,
const struct path *path)
{
struct path cur = *path;
- __be32 *p;
struct dentry **components = NULL;
unsigned int ncomponents = 0;
__be32 err = nfserr_jukebox;
@@ -2730,24 +2748,19 @@ static __be32 nfsd4_encode_pathname4(struct xdr_stream *xdr,
components[ncomponents++] = cur.dentry;
cur.dentry = dget_parent(cur.dentry);
}
+
err = nfserr_resource;
- p = xdr_reserve_space(xdr, 4);
- if (!p)
+ if (xdr_stream_encode_u32(xdr, ncomponents) != XDR_UNIT)
goto out_free;
- *p++ = cpu_to_be32(ncomponents);
-
while (ncomponents) {
struct dentry *dentry = components[ncomponents - 1];
- unsigned int len;
spin_lock(&dentry->d_lock);
- len = dentry->d_name.len;
- p = xdr_reserve_space(xdr, len + 4);
- if (!p) {
+ if (xdr_stream_encode_opaque(xdr, dentry->d_name.name,
+ dentry->d_name.len) < 0) {
spin_unlock(&dentry->d_lock);
goto out_free;
}
- p = xdr_encode_opaque(p, dentry->d_name.name, len);
dprintk("/%pd", dentry);
spin_unlock(&dentry->d_lock);
dput(dentry);
@@ -2827,11 +2840,11 @@ static __be32 nfsd4_encode_nfsace4(struct xdr_stream *xdr, struct svc_rqst *rqst
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
static inline __be32
nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp,
- void *context, int len)
+ const struct lsm_context *context)
{
__be32 *p;
- p = xdr_reserve_space(xdr, len + 4 + 4 + 4);
+ p = xdr_reserve_space(xdr, context->len + 4 + 4 + 4);
if (!p)
return nfserr_resource;
@@ -2841,13 +2854,13 @@ nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp,
*/
*p++ = cpu_to_be32(0); /* lfs */
*p++ = cpu_to_be32(0); /* pi */
- p = xdr_encode_opaque(p, context, len);
+ p = xdr_encode_opaque(p, context->context, context->len);
return 0;
}
#else
static inline __be32
nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp,
- void *context, int len)
+ struct lsm_context *context)
{ return 0; }
#endif
@@ -2928,10 +2941,9 @@ struct nfsd4_fattr_args {
struct kstat stat;
struct kstatfs statfs;
struct nfs4_acl *acl;
- u64 size;
+ u64 change_attr;
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
- void *context;
- int contextlen;
+ struct lsm_context context;
#endif
u32 rdattr_err;
bool contextsupport;
@@ -3028,7 +3040,6 @@ static __be32 nfsd4_encode_fattr4_change(struct xdr_stream *xdr,
const struct nfsd4_fattr_args *args)
{
const struct svc_export *exp = args->exp;
- u64 c;
if (unlikely(exp->ex_flags & NFSEXP_V4ROOT)) {
u32 flush_time = convert_to_wallclock(exp->cd->flush_time);
@@ -3039,15 +3050,13 @@ static __be32 nfsd4_encode_fattr4_change(struct xdr_stream *xdr,
return nfserr_resource;
return nfs_ok;
}
-
- c = nfsd4_change_attribute(&args->stat, d_inode(args->dentry));
- return nfsd4_encode_changeid4(xdr, c);
+ return nfsd4_encode_changeid4(xdr, args->change_attr);
}
static __be32 nfsd4_encode_fattr4_size(struct xdr_stream *xdr,
const struct nfsd4_fattr_args *args)
{
- return nfsd4_encode_uint64_t(xdr, args->size);
+ return nfsd4_encode_uint64_t(xdr, args->stat.size);
}
static __be32 nfsd4_encode_fattr4_fsid(struct xdr_stream *xdr,
@@ -3382,12 +3391,28 @@ static __be32 nfsd4_encode_fattr4_suppattr_exclcreat(struct xdr_stream *xdr,
return nfsd4_encode_bitmap4(xdr, supp[0], supp[1], supp[2]);
}
+/*
+ * Copied from generic_remap_checks/generic_remap_file_range_prep.
+ *
+ * These generic functions use the file system's s_blocksize, but
+ * individual file systems aren't required to use
+ * generic_remap_file_range_prep. Until there is a mechanism for
+ * determining a particular file system's (or file's) clone block
+ * size, this is the best NFSD can do.
+ */
+static __be32 nfsd4_encode_fattr4_clone_blksize(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ struct inode *inode = d_inode(args->dentry);
+
+ return nfsd4_encode_uint32_t(xdr, inode->i_sb->s_blocksize);
+}
+
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
static __be32 nfsd4_encode_fattr4_sec_label(struct xdr_stream *xdr,
const struct nfsd4_fattr_args *args)
{
- return nfsd4_encode_security_label(xdr, args->rqstp,
- args->context, args->contextlen);
+ return nfsd4_encode_security_label(xdr, args->rqstp, &args->context);
}
#endif
@@ -3399,6 +3424,56 @@ static __be32 nfsd4_encode_fattr4_xattr_support(struct xdr_stream *xdr,
return nfsd4_encode_bool(xdr, err == 0);
}
+#define NFSD_OA_SHARE_ACCESS (BIT(OPEN_ARGS_SHARE_ACCESS_READ) | \
+ BIT(OPEN_ARGS_SHARE_ACCESS_WRITE) | \
+ BIT(OPEN_ARGS_SHARE_ACCESS_BOTH))
+
+#define NFSD_OA_SHARE_DENY (BIT(OPEN_ARGS_SHARE_DENY_NONE) | \
+ BIT(OPEN_ARGS_SHARE_DENY_READ) | \
+ BIT(OPEN_ARGS_SHARE_DENY_WRITE) | \
+ BIT(OPEN_ARGS_SHARE_DENY_BOTH))
+
+#define NFSD_OA_SHARE_ACCESS_WANT (BIT(OPEN_ARGS_SHARE_ACCESS_WANT_ANY_DELEG) | \
+ BIT(OPEN_ARGS_SHARE_ACCESS_WANT_NO_DELEG) | \
+ BIT(OPEN_ARGS_SHARE_ACCESS_WANT_CANCEL) | \
+ BIT(OPEN_ARGS_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS) | \
+ BIT(OPEN_ARGS_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION))
+
+#define NFSD_OA_OPEN_CLAIM (BIT(OPEN_ARGS_OPEN_CLAIM_NULL) | \
+ BIT(OPEN_ARGS_OPEN_CLAIM_PREVIOUS) | \
+ BIT(OPEN_ARGS_OPEN_CLAIM_DELEGATE_CUR) | \
+ BIT(OPEN_ARGS_OPEN_CLAIM_DELEGATE_PREV)| \
+ BIT(OPEN_ARGS_OPEN_CLAIM_FH) | \
+ BIT(OPEN_ARGS_OPEN_CLAIM_DELEG_CUR_FH) | \
+ BIT(OPEN_ARGS_OPEN_CLAIM_DELEG_PREV_FH))
+
+#define NFSD_OA_CREATE_MODE (BIT(OPEN_ARGS_CREATEMODE_UNCHECKED4) | \
+ BIT(OPEN_ARGS_CREATE_MODE_GUARDED) | \
+ BIT(OPEN_ARGS_CREATEMODE_EXCLUSIVE4) | \
+ BIT(OPEN_ARGS_CREATE_MODE_EXCLUSIVE4_1))
+
+static uint32_t oa_share_access = NFSD_OA_SHARE_ACCESS;
+static uint32_t oa_share_deny = NFSD_OA_SHARE_DENY;
+static uint32_t oa_share_access_want = NFSD_OA_SHARE_ACCESS_WANT;
+static uint32_t oa_open_claim = NFSD_OA_OPEN_CLAIM;
+static uint32_t oa_create_mode = NFSD_OA_CREATE_MODE;
+
+static const struct open_arguments4 nfsd_open_arguments = {
+ .oa_share_access = { .count = 1, .element = &oa_share_access },
+ .oa_share_deny = { .count = 1, .element = &oa_share_deny },
+ .oa_share_access_want = { .count = 1, .element = &oa_share_access_want },
+ .oa_open_claim = { .count = 1, .element = &oa_open_claim },
+ .oa_create_mode = { .count = 1, .element = &oa_create_mode },
+};
+
+static __be32 nfsd4_encode_fattr4_open_arguments(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ if (!xdrgen_encode_fattr4_open_arguments(xdr, &nfsd_open_arguments))
+ return nfserr_resource;
+ return nfs_ok;
+}
+
static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = {
[FATTR4_SUPPORTED_ATTRS] = nfsd4_encode_fattr4_supported_attrs,
[FATTR4_TYPE] = nfsd4_encode_fattr4_type,
@@ -3487,7 +3562,7 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = {
[FATTR4_MODE_SET_MASKED] = nfsd4_encode_fattr4__noop,
[FATTR4_SUPPATTR_EXCLCREAT] = nfsd4_encode_fattr4_suppattr_exclcreat,
[FATTR4_FS_CHARSET_CAP] = nfsd4_encode_fattr4__noop,
- [FATTR4_CLONE_BLKSIZE] = nfsd4_encode_fattr4__noop,
+ [FATTR4_CLONE_BLKSIZE] = nfsd4_encode_fattr4_clone_blksize,
[FATTR4_SPACE_FREED] = nfsd4_encode_fattr4__noop,
[FATTR4_CHANGE_ATTR_TYPE] = nfsd4_encode_fattr4__noop,
@@ -3499,6 +3574,7 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = {
[FATTR4_MODE_UMASK] = nfsd4_encode_fattr4__noop,
[FATTR4_XATTR_SUPPORT] = nfsd4_encode_fattr4_xattr_support,
+ [FATTR4_OPEN_ARGUMENTS] = nfsd4_encode_fattr4_open_arguments,
};
/*
@@ -3512,11 +3588,12 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
int ignore_crossmnt)
{
DECLARE_BITMAP(attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
+ struct nfs4_delegation *dp = NULL;
struct nfsd4_fattr_args args;
struct svc_fh *tempfh = NULL;
int starting_len = xdr->buf->len;
- __be32 *attrlen_p, status;
- int attrlen_offset;
+ unsigned int attrlen_offset;
+ __be32 attrlen, status;
u32 attrmask[3];
int err;
struct nfsd4_compoundres *resp = rqstp->rq_resp;
@@ -3526,8 +3603,6 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
.dentry = dentry,
};
unsigned long bit;
- bool file_modified = false;
- u64 size = 0;
WARN_ON_ONCE(bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1);
WARN_ON_ONCE(!nfsd_attrs_supported(minorversion, bmval));
@@ -3538,7 +3613,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
args.ignore_crossmnt = (ignore_crossmnt != 0);
args.acl = NULL;
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
- args.context = NULL;
+ args.context.context = NULL;
#endif
/*
@@ -3555,10 +3630,12 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
if (status)
goto out;
}
- args.size = 0;
- if (attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
- status = nfsd4_deleg_getattr_conflict(rqstp, dentry,
- &file_modified, &size);
+ if ((attrmask[0] & (FATTR4_WORD0_CHANGE |
+ FATTR4_WORD0_SIZE)) ||
+ (attrmask[1] & (FATTR4_WORD1_TIME_ACCESS |
+ FATTR4_WORD1_TIME_MODIFY |
+ FATTR4_WORD1_TIME_METADATA))) {
+ status = nfsd4_deleg_getattr_conflict(rqstp, dentry, &dp);
if (status)
goto out;
}
@@ -3566,12 +3643,27 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
err = vfs_getattr(&path, &args.stat,
STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
AT_STATX_SYNC_AS_STAT);
+ if (dp) {
+ struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr;
+
+ if (ncf->ncf_file_modified) {
+ ++ncf->ncf_initial_cinfo;
+ args.stat.size = ncf->ncf_cur_fsize;
+ if (!timespec64_is_epoch(&ncf->ncf_cb_mtime))
+ args.stat.mtime = ncf->ncf_cb_mtime;
+ }
+ args.change_attr = ncf->ncf_initial_cinfo;
+
+ if (!timespec64_is_epoch(&ncf->ncf_cb_atime))
+ args.stat.atime = ncf->ncf_cb_atime;
+
+ nfs4_put_stid(&dp->dl_stid);
+ } else {
+ args.change_attr = nfsd4_change_attribute(&args.stat);
+ }
+
if (err)
goto out_nfserr;
- if (file_modified)
- args.size = size;
- else
- args.size = args.stat.size;
if (!(args.stat.result_mask & STATX_BTIME))
/* underlying FS does not offer btime so we can't share it */
@@ -3616,7 +3708,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) {
if (exp->ex_flags & NFSEXP_SECURITY_LABEL)
err = security_inode_getsecctx(d_inode(dentry),
- &args.context, &args.contextlen);
+ &args.context);
else
err = -EOPNOTSUPP;
args.contextsupport = (err == 0);
@@ -3637,8 +3729,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
/* attr_vals */
attrlen_offset = xdr->buf->len;
- attrlen_p = xdr_reserve_space(xdr, XDR_UNIT);
- if (!attrlen_p)
+ if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT)))
goto out_resource;
bitmap_from_arr32(attr_bitmap, attrmask,
ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
@@ -3648,13 +3739,14 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
if (status != nfs_ok)
goto out;
}
- *attrlen_p = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT);
+ attrlen = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT);
+ write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, XDR_UNIT);
status = nfs_ok;
out:
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
- if (args.context)
- security_release_secctx(args.context, args.contextlen);
+ if (args.context.context)
+ security_release_secctx(&args.context);
#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
kfree(args.acl);
if (tempfh) {
@@ -3737,7 +3829,9 @@ nfsd4_encode_entry4_fattr(struct nfsd4_readdir *cd, const char *name,
__be32 nfserr;
int ignore_crossmnt = 0;
- dentry = lookup_positive_unlocked(name, cd->rd_fhp->fh_dentry, namlen);
+ dentry = lookup_one_positive_unlocked(&nop_mnt_idmap,
+ &QSTR_LEN(name, namlen),
+ cd->rd_fhp->fh_dentry);
if (IS_ERR(dentry))
return nfserrno(PTR_ERR(dentry));
@@ -3767,7 +3861,7 @@ nfsd4_encode_entry4_fattr(struct nfsd4_readdir *cd, const char *name,
nfserr = nfserrno(err);
goto out_put;
}
- nfserr = check_nfsd_access(exp, cd->rd_rqstp);
+ nfserr = check_nfsd_access(exp, cd->rd_rqstp, false);
if (nfserr)
goto out_put;
@@ -4238,18 +4332,20 @@ nfsd4_encode_open_delegation4(struct xdr_stream *xdr, struct nfsd4_open *open)
if (xdr_stream_encode_u32(xdr, open->op_delegate_type) != XDR_UNIT)
return nfserr_resource;
switch (open->op_delegate_type) {
- case NFS4_OPEN_DELEGATE_NONE:
+ case OPEN_DELEGATE_NONE:
status = nfs_ok;
break;
- case NFS4_OPEN_DELEGATE_READ:
+ case OPEN_DELEGATE_READ:
+ case OPEN_DELEGATE_READ_ATTRS_DELEG:
/* read */
status = nfsd4_encode_open_read_delegation4(xdr, open);
break;
- case NFS4_OPEN_DELEGATE_WRITE:
+ case OPEN_DELEGATE_WRITE:
+ case OPEN_DELEGATE_WRITE_ATTRS_DELEG:
/* write */
status = nfsd4_encode_open_write_delegation4(xdr, open);
break;
- case NFS4_OPEN_DELEGATE_NONE_EXT:
+ case OPEN_DELEGATE_NONE_EXT:
/* od_whynone */
status = nfsd4_encode_open_none_delegation4(xdr, open);
break;
@@ -4326,6 +4422,15 @@ static __be32 nfsd4_encode_splice_read(
__be32 nfserr;
/*
+ * Splice read doesn't work if encoding has already wandered
+ * into the XDR buf's page array.
+ */
+ if (unlikely(xdr->buf->page_len)) {
+ WARN_ON_ONCE(1);
+ return nfserr_serverfault;
+ }
+
+ /*
* Make sure there is room at the end of buf->head for
* svcxdr_encode_opaque_pages() to create a tail buffer
* to XDR-pad the payload.
@@ -4407,25 +4512,23 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_compoundargs *argp = resp->rqstp->rq_argp;
struct nfsd4_read *read = &u->read;
struct xdr_stream *xdr = resp->xdr;
- int starting_len = xdr->buf->len;
bool splice_ok = argp->splice_ok;
+ unsigned int eof_offset;
unsigned long maxcount;
+ __be32 wire_data[2];
struct file *file;
- __be32 *p;
if (nfserr)
return nfserr;
+
+ eof_offset = xdr->buf->len;
file = read->rd_nf->nf_file;
- p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
- if (!p) {
+ /* Reserve space for the eof flag and byte count */
+ if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT * 2))) {
WARN_ON_ONCE(splice_ok);
return nfserr_resource;
}
- if (resp->xdr->buf->page_len && splice_ok) {
- WARN_ON_ONCE(1);
- return nfserr_serverfault;
- }
xdr_commit_encode(xdr);
maxcount = min_t(unsigned long, read->rd_length,
@@ -4436,12 +4539,13 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
else
nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
if (nfserr) {
- xdr_truncate_encode(xdr, starting_len);
+ xdr_truncate_encode(xdr, eof_offset);
return nfserr;
}
- p = xdr_encode_bool(p, read->rd_eof);
- *p = cpu_to_be32(read->rd_length);
+ wire_data[0] = read->rd_eof ? xdr_one : xdr_zero;
+ wire_data[1] = cpu_to_be32(read->rd_length);
+ write_bytes_to_xdr_buf(xdr->buf, eof_offset, &wire_data, XDR_UNIT * 2);
return nfs_ok;
}
@@ -4450,25 +4554,21 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr,
union nfsd4_op_u *u)
{
struct nfsd4_readlink *readlink = &u->readlink;
- __be32 *p, *maxcount_p, zero = xdr_zero;
+ __be32 *p, wire_count, zero = xdr_zero;
struct xdr_stream *xdr = resp->xdr;
- int length_offset = xdr->buf->len;
+ unsigned int length_offset;
int maxcount, status;
- maxcount_p = xdr_reserve_space(xdr, XDR_UNIT);
- if (!maxcount_p)
+ /* linktext4.count */
+ length_offset = xdr->buf->len;
+ if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT)))
return nfserr_resource;
- maxcount = PAGE_SIZE;
+ /* linktext4.data */
+ maxcount = PAGE_SIZE;
p = xdr_reserve_space(xdr, maxcount);
if (!p)
return nfserr_resource;
- /*
- * XXX: By default, vfs_readlink() will truncate symlinks if they
- * would overflow the buffer. Is this kosher in NFSv4? If not, one
- * easy fix is: if vfs_readlink() precisely fills the buffer, assume
- * that truncation occurred, and return NFS4ERR_RESOURCE.
- */
nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp,
(char *)p, &maxcount);
if (nfserr == nfserr_isdir)
@@ -4481,7 +4581,9 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr,
nfserr = nfserrno(status);
goto out_err;
}
- *maxcount_p = cpu_to_be32(maxcount);
+
+ wire_count = cpu_to_be32(maxcount);
+ write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, XDR_UNIT);
xdr_truncate_encode(xdr, length_offset + 4 + xdr_align_size(maxcount));
write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, &zero,
xdr_pad_size(maxcount));
@@ -4616,14 +4718,42 @@ nfsd4_encode_rpcsec_gss_info(struct xdr_stream *xdr,
}
static __be32
-nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp)
+nfsd4_encode_secinfo4(struct xdr_stream *xdr, rpc_authflavor_t pf,
+ u32 *supported)
+{
+ struct rpcsec_gss_info info;
+ __be32 status;
+
+ if (rpcauth_get_gssinfo(pf, &info) == 0) {
+ (*supported)++;
+
+ /* flavor */
+ status = nfsd4_encode_uint32_t(xdr, RPC_AUTH_GSS);
+ if (status != nfs_ok)
+ return status;
+ /* flavor_info */
+ status = nfsd4_encode_rpcsec_gss_info(xdr, &info);
+ if (status != nfs_ok)
+ return status;
+ } else if (pf < RPC_AUTH_MAXFLAVOR) {
+ (*supported)++;
+
+ /* flavor */
+ status = nfsd4_encode_uint32_t(xdr, pf);
+ if (status != nfs_ok)
+ return status;
+ }
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_encode_SECINFO4resok(struct xdr_stream *xdr, struct svc_export *exp)
{
u32 i, nflavs, supported;
struct exp_flavor_info *flavs;
struct exp_flavor_info def_flavs[2];
- static bool report = true;
- __be32 *flavorsp;
- __be32 status;
+ unsigned int count_offset;
+ __be32 status, wire_count;
if (exp->ex_nflavors) {
flavs = exp->ex_flavors;
@@ -4645,43 +4775,20 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp)
}
}
- supported = 0;
- flavorsp = xdr_reserve_space(xdr, XDR_UNIT);
- if (!flavorsp)
+ count_offset = xdr->buf->len;
+ if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT)))
return nfserr_resource;
- for (i = 0; i < nflavs; i++) {
- rpc_authflavor_t pf = flavs[i].pseudoflavor;
- struct rpcsec_gss_info info;
-
- if (rpcauth_get_gssinfo(pf, &info) == 0) {
- supported++;
-
- /* flavor */
- status = nfsd4_encode_uint32_t(xdr, RPC_AUTH_GSS);
- if (status != nfs_ok)
- return status;
- /* flavor_info */
- status = nfsd4_encode_rpcsec_gss_info(xdr, &info);
- if (status != nfs_ok)
- return status;
- } else if (pf < RPC_AUTH_MAXFLAVOR) {
- supported++;
-
- /* flavor */
- status = nfsd4_encode_uint32_t(xdr, pf);
- if (status != nfs_ok)
- return status;
- } else {
- if (report)
- pr_warn("NFS: SECINFO: security flavor %u "
- "is not supported\n", pf);
- }
+ for (i = 0, supported = 0; i < nflavs; i++) {
+ status = nfsd4_encode_secinfo4(xdr, flavs[i].pseudoflavor,
+ &supported);
+ if (status != nfs_ok)
+ return status;
}
- if (nflavs != supported)
- report = false;
- *flavorsp = cpu_to_be32(supported);
+ wire_count = cpu_to_be32(supported);
+ write_bytes_to_xdr_buf(xdr->buf, count_offset, &wire_count,
+ XDR_UNIT);
return 0;
}
@@ -4692,7 +4799,7 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_secinfo *secinfo = &u->secinfo;
struct xdr_stream *xdr = resp->xdr;
- return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp);
+ return nfsd4_encode_SECINFO4resok(xdr, secinfo->si_exp);
}
static __be32
@@ -4702,7 +4809,7 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_secinfo_no_name *secinfo = &u->secinfo_no_name;
struct xdr_stream *xdr = resp->xdr;
- return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp);
+ return nfsd4_encode_SECINFO4resok(xdr, secinfo->sin_exp);
}
static __be32
@@ -4826,6 +4933,25 @@ nfsd4_encode_server_owner4(struct xdr_stream *xdr, struct svc_rqst *rqstp)
}
static __be32
+nfsd4_encode_nfs_impl_id4(struct xdr_stream *xdr, struct nfsd4_exchange_id *exid)
+{
+ __be32 status;
+
+ /* nii_domain */
+ status = nfsd4_encode_opaque(xdr, exid->nii_domain.data,
+ exid->nii_domain.len);
+ if (status != nfs_ok)
+ return status;
+ /* nii_name */
+ status = nfsd4_encode_opaque(xdr, exid->nii_name.data,
+ exid->nii_name.len);
+ if (status != nfs_ok)
+ return status;
+ /* nii_time */
+ return nfsd4_encode_nfstime4(xdr, &exid->nii_time);
+}
+
+static __be32
nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
union nfsd4_op_u *u)
{
@@ -4859,8 +4985,11 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
if (nfserr != nfs_ok)
return nfserr;
/* eir_server_impl_id<1> */
- if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+ if (xdr_stream_encode_u32(xdr, 1) != XDR_UNIT)
return nfserr_resource;
+ nfserr = nfsd4_encode_nfs_impl_id4(xdr, exid);
+ if (nfserr != nfs_ok)
+ return nfserr;
return nfs_ok;
}
@@ -4955,7 +5084,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
if (nfserr != nfs_ok)
return nfserr;
/* sr_target_highest_slotid */
- nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
+ nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
if (nfserr != nfs_ok)
return nfserr;
/* sr_status_flags */
@@ -5283,17 +5412,20 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
struct file *file = read->rd_nf->nf_file;
struct xdr_stream *xdr = resp->xdr;
bool splice_ok = argp->splice_ok;
+ unsigned int offset_offset;
+ __be32 nfserr, wire_count;
unsigned long maxcount;
- __be32 nfserr, *p;
+ __be64 wire_offset;
- /* Content type, offset, byte count */
- p = xdr_reserve_space(xdr, 4 + 8 + 4);
- if (!p)
+ if (xdr_stream_encode_u32(xdr, NFS4_CONTENT_DATA) != XDR_UNIT)
return nfserr_io;
- if (resp->xdr->buf->page_len && splice_ok) {
- WARN_ON_ONCE(splice_ok);
- return nfserr_serverfault;
- }
+
+ offset_offset = xdr->buf->len;
+
+ /* Reserve space for the byte offset and count */
+ if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT * 3)))
+ return nfserr_io;
+ xdr_commit_encode(xdr);
maxcount = min_t(unsigned long, read->rd_length,
(xdr->buf->buflen - xdr->buf->len));
@@ -5305,10 +5437,12 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
if (nfserr)
return nfserr;
- *p++ = cpu_to_be32(NFS4_CONTENT_DATA);
- p = xdr_encode_hyper(p, read->rd_offset);
- *p = cpu_to_be32(read->rd_length);
-
+ wire_offset = cpu_to_be64(read->rd_offset);
+ write_bytes_to_xdr_buf(xdr->buf, offset_offset, &wire_offset,
+ XDR_UNIT * 2);
+ wire_count = cpu_to_be32(read->rd_length);
+ write_bytes_to_xdr_buf(xdr->buf, offset_offset + XDR_UNIT * 2,
+ &wire_count, XDR_UNIT);
return nfs_ok;
}
@@ -5319,16 +5453,17 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_read *read = &u->read;
struct file *file = read->rd_nf->nf_file;
struct xdr_stream *xdr = resp->xdr;
- int starting_len = xdr->buf->len;
+ unsigned int eof_offset;
+ __be32 wire_data[2];
u32 segments = 0;
- __be32 *p;
if (nfserr)
return nfserr;
- /* eof flag, segment count */
- p = xdr_reserve_space(xdr, 4 + 4);
- if (!p)
+ eof_offset = xdr->buf->len;
+
+ /* Reserve space for the eof flag and segment count */
+ if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT * 2)))
return nfserr_io;
xdr_commit_encode(xdr);
@@ -5338,15 +5473,16 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
nfserr = nfsd4_encode_read_plus_data(resp, read);
if (nfserr) {
- xdr_truncate_encode(xdr, starting_len);
+ xdr_truncate_encode(xdr, eof_offset);
return nfserr;
}
segments++;
out:
- p = xdr_encode_bool(p, read->rd_eof);
- *p = cpu_to_be32(segments);
+ wire_data[0] = read->rd_eof ? xdr_one : xdr_zero;
+ wire_data[1] = cpu_to_be32(segments);
+ write_bytes_to_xdr_buf(xdr->buf, eof_offset, &wire_data, XDR_UNIT * 2);
return nfserr;
}
@@ -5747,15 +5883,14 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
struct nfs4_stateowner *so = resp->cstate.replay_owner;
struct svc_rqst *rqstp = resp->rqstp;
const struct nfsd4_operation *opdesc = op->opdesc;
- int post_err_offset;
+ unsigned int op_status_offset;
nfsd4_enc encoder;
- __be32 *p;
- p = xdr_reserve_space(xdr, 8);
- if (!p)
+ if (xdr_stream_encode_u32(xdr, op->opnum) != XDR_UNIT)
+ goto release;
+ op_status_offset = xdr->buf->len;
+ if (!xdr_reserve_space(xdr, XDR_UNIT))
goto release;
- *p++ = cpu_to_be32(op->opnum);
- post_err_offset = xdr->buf->len;
if (op->opnum == OP_ILLEGAL)
goto status;
@@ -5796,20 +5931,21 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
* bug if we had to do this on a non-idempotent op:
*/
warn_on_nonidempotent_op(op);
- xdr_truncate_encode(xdr, post_err_offset);
+ xdr_truncate_encode(xdr, op_status_offset + XDR_UNIT);
}
if (so) {
- int len = xdr->buf->len - post_err_offset;
+ int len = xdr->buf->len - (op_status_offset + XDR_UNIT);
so->so_replay.rp_status = op->status;
so->so_replay.rp_buflen = len;
- read_bytes_from_xdr_buf(xdr->buf, post_err_offset,
+ read_bytes_from_xdr_buf(xdr->buf, op_status_offset + XDR_UNIT,
so->so_replay.rp_buf, len);
}
status:
op->status = nfsd4_map_status(op->status,
resp->cstate.minorversion);
- *p = op->status;
+ write_bytes_to_xdr_buf(xdr->buf, op_status_offset,
+ &op->status, XDR_UNIT);
release:
if (opdesc && opdesc->op_release)
opdesc->op_release(&op->u);
diff --git a/fs/nfsd/nfs4xdr_gen.c b/fs/nfsd/nfs4xdr_gen.c
new file mode 100644
index 000000000000..a17b5d8e60b3
--- /dev/null
+++ b/fs/nfsd/nfs4xdr_gen.c
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: GPL-2.0
+// Generated by xdrgen. Manual edits will be lost.
+// XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x
+// XDR specification modification time: Mon Oct 14 09:10:13 2024
+
+#include <linux/sunrpc/svc.h>
+
+#include "nfs4xdr_gen.h"
+
+static bool __maybe_unused
+xdrgen_decode_int64_t(struct xdr_stream *xdr, int64_t *ptr)
+{
+ return xdrgen_decode_hyper(xdr, ptr);
+};
+
+static bool __maybe_unused
+xdrgen_decode_uint32_t(struct xdr_stream *xdr, uint32_t *ptr)
+{
+ return xdrgen_decode_unsigned_int(xdr, ptr);
+};
+
+static bool __maybe_unused
+xdrgen_decode_bitmap4(struct xdr_stream *xdr, bitmap4 *ptr)
+{
+ if (xdr_stream_decode_u32(xdr, &ptr->count) < 0)
+ return false;
+ for (u32 i = 0; i < ptr->count; i++)
+ if (!xdrgen_decode_uint32_t(xdr, &ptr->element[i]))
+ return false;
+ return true;
+};
+
+static bool __maybe_unused
+xdrgen_decode_nfstime4(struct xdr_stream *xdr, struct nfstime4 *ptr)
+{
+ if (!xdrgen_decode_int64_t(xdr, &ptr->seconds))
+ return false;
+ if (!xdrgen_decode_uint32_t(xdr, &ptr->nseconds))
+ return false;
+ return true;
+};
+
+static bool __maybe_unused
+xdrgen_decode_fattr4_offline(struct xdr_stream *xdr, fattr4_offline *ptr)
+{
+ return xdrgen_decode_bool(xdr, ptr);
+};
+
+static bool __maybe_unused
+xdrgen_decode_open_arguments4(struct xdr_stream *xdr, struct open_arguments4 *ptr)
+{
+ if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_share_access))
+ return false;
+ if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_share_deny))
+ return false;
+ if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_share_access_want))
+ return false;
+ if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_open_claim))
+ return false;
+ if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_create_mode))
+ return false;
+ return true;
+};
+
+static bool __maybe_unused
+xdrgen_decode_open_args_share_access4(struct xdr_stream *xdr, open_args_share_access4 *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_open_args_share_deny4(struct xdr_stream *xdr, open_args_share_deny4 *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_open_args_share_access_want4(struct xdr_stream *xdr, open_args_share_access_want4 *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_open_args_open_claim4(struct xdr_stream *xdr, open_args_open_claim4 *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_open_args_createmode4(struct xdr_stream *xdr, open_args_createmode4 *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+bool
+xdrgen_decode_fattr4_open_arguments(struct xdr_stream *xdr, fattr4_open_arguments *ptr)
+{
+ return xdrgen_decode_open_arguments4(xdr, ptr);
+};
+
+bool
+xdrgen_decode_fattr4_time_deleg_access(struct xdr_stream *xdr, fattr4_time_deleg_access *ptr)
+{
+ return xdrgen_decode_nfstime4(xdr, ptr);
+};
+
+bool
+xdrgen_decode_fattr4_time_deleg_modify(struct xdr_stream *xdr, fattr4_time_deleg_modify *ptr)
+{
+ return xdrgen_decode_nfstime4(xdr, ptr);
+};
+
+static bool __maybe_unused
+xdrgen_decode_open_delegation_type4(struct xdr_stream *xdr, open_delegation_type4 *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_int64_t(struct xdr_stream *xdr, const int64_t value)
+{
+ return xdrgen_encode_hyper(xdr, value);
+};
+
+static bool __maybe_unused
+xdrgen_encode_uint32_t(struct xdr_stream *xdr, const uint32_t value)
+{
+ return xdrgen_encode_unsigned_int(xdr, value);
+};
+
+static bool __maybe_unused
+xdrgen_encode_bitmap4(struct xdr_stream *xdr, const bitmap4 value)
+{
+ if (xdr_stream_encode_u32(xdr, value.count) != XDR_UNIT)
+ return false;
+ for (u32 i = 0; i < value.count; i++)
+ if (!xdrgen_encode_uint32_t(xdr, value.element[i]))
+ return false;
+ return true;
+};
+
+static bool __maybe_unused
+xdrgen_encode_nfstime4(struct xdr_stream *xdr, const struct nfstime4 *value)
+{
+ if (!xdrgen_encode_int64_t(xdr, value->seconds))
+ return false;
+ if (!xdrgen_encode_uint32_t(xdr, value->nseconds))
+ return false;
+ return true;
+};
+
+static bool __maybe_unused
+xdrgen_encode_fattr4_offline(struct xdr_stream *xdr, const fattr4_offline value)
+{
+ return xdrgen_encode_bool(xdr, value);
+};
+
+static bool __maybe_unused
+xdrgen_encode_open_arguments4(struct xdr_stream *xdr, const struct open_arguments4 *value)
+{
+ if (!xdrgen_encode_bitmap4(xdr, value->oa_share_access))
+ return false;
+ if (!xdrgen_encode_bitmap4(xdr, value->oa_share_deny))
+ return false;
+ if (!xdrgen_encode_bitmap4(xdr, value->oa_share_access_want))
+ return false;
+ if (!xdrgen_encode_bitmap4(xdr, value->oa_open_claim))
+ return false;
+ if (!xdrgen_encode_bitmap4(xdr, value->oa_create_mode))
+ return false;
+ return true;
+};
+
+static bool __maybe_unused
+xdrgen_encode_open_args_share_access4(struct xdr_stream *xdr, open_args_share_access4 value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_open_args_share_deny4(struct xdr_stream *xdr, open_args_share_deny4 value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_open_args_share_access_want4(struct xdr_stream *xdr, open_args_share_access_want4 value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_open_args_open_claim4(struct xdr_stream *xdr, open_args_open_claim4 value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_open_args_createmode4(struct xdr_stream *xdr, open_args_createmode4 value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+bool
+xdrgen_encode_fattr4_open_arguments(struct xdr_stream *xdr, const fattr4_open_arguments *value)
+{
+ return xdrgen_encode_open_arguments4(xdr, value);
+};
+
+bool
+xdrgen_encode_fattr4_time_deleg_access(struct xdr_stream *xdr, const fattr4_time_deleg_access *value)
+{
+ return xdrgen_encode_nfstime4(xdr, value);
+};
+
+bool
+xdrgen_encode_fattr4_time_deleg_modify(struct xdr_stream *xdr, const fattr4_time_deleg_modify *value)
+{
+ return xdrgen_encode_nfstime4(xdr, value);
+};
+
+static bool __maybe_unused
+xdrgen_encode_open_delegation_type4(struct xdr_stream *xdr, open_delegation_type4 value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
diff --git a/fs/nfsd/nfs4xdr_gen.h b/fs/nfsd/nfs4xdr_gen.h
new file mode 100644
index 000000000000..41a0033b7256
--- /dev/null
+++ b/fs/nfsd/nfs4xdr_gen.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Generated by xdrgen. Manual edits will be lost. */
+/* XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x */
+/* XDR specification modification time: Mon Oct 14 09:10:13 2024 */
+
+#ifndef _LINUX_XDRGEN_NFS4_1_DECL_H
+#define _LINUX_XDRGEN_NFS4_1_DECL_H
+
+#include <linux/types.h>
+
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/xdrgen/_defs.h>
+#include <linux/sunrpc/xdrgen/_builtins.h>
+#include <linux/sunrpc/xdrgen/nfs4_1.h>
+
+bool xdrgen_decode_fattr4_open_arguments(struct xdr_stream *xdr, fattr4_open_arguments *ptr);
+bool xdrgen_encode_fattr4_open_arguments(struct xdr_stream *xdr, const fattr4_open_arguments *value);
+
+bool xdrgen_decode_fattr4_time_deleg_access(struct xdr_stream *xdr, fattr4_time_deleg_access *ptr);
+bool xdrgen_encode_fattr4_time_deleg_access(struct xdr_stream *xdr, const fattr4_time_deleg_access *value);
+
+bool xdrgen_decode_fattr4_time_deleg_modify(struct xdr_stream *xdr, fattr4_time_deleg_modify *ptr);
+bool xdrgen_encode_fattr4_time_deleg_modify(struct xdr_stream *xdr, const fattr4_time_deleg_modify *value);
+
+#endif /* _LINUX_XDRGEN_NFS4_1_DECL_H */
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3adbc05ebaac..6a42cc7a845a 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -48,7 +48,6 @@ enum {
NFSD_Versions,
NFSD_Ports,
NFSD_MaxBlkSize,
- NFSD_MaxConnections,
NFSD_Filecache,
NFSD_Leasetime,
NFSD_Gracetime,
@@ -68,7 +67,6 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size);
static ssize_t write_versions(struct file *file, char *buf, size_t size);
static ssize_t write_ports(struct file *file, char *buf, size_t size);
static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
-static ssize_t write_maxconn(struct file *file, char *buf, size_t size);
#ifdef CONFIG_NFSD_V4
static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
@@ -87,7 +85,6 @@ static ssize_t (*const write_op[])(struct file *, char *, size_t) = {
[NFSD_Versions] = write_versions,
[NFSD_Ports] = write_ports,
[NFSD_MaxBlkSize] = write_maxblksize,
- [NFSD_MaxConnections] = write_maxconn,
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = write_leasetime,
[NFSD_Gracetime] = write_gracetime,
@@ -902,44 +899,6 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
nfsd_max_blksize);
}
-/*
- * write_maxconn - Set or report the current max number of connections
- *
- * Input:
- * buf: ignored
- * size: zero
- * OR
- *
- * Input:
- * buf: C string containing an unsigned
- * integer value representing the new
- * number of max connections
- * size: non-zero length of C string in @buf
- * Output:
- * On success: passed-in buffer filled with '\n'-terminated C string
- * containing numeric value of max_connections setting
- * for this net namespace;
- * return code is the size in bytes of the string
- * On error: return code is zero or a negative errno value
- */
-static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
-{
- char *mesg = buf;
- struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id);
- unsigned int maxconn = nn->max_connections;
-
- if (size > 0) {
- int rv = get_uint(&mesg, &maxconn);
-
- if (rv)
- return rv;
- trace_nfsd_ctl_maxconn(netns(file), maxconn);
- nn->max_connections = maxconn;
- }
-
- return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%u\n", maxconn);
-}
-
#ifdef CONFIG_NFSD_V4
static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
time64_t *time, struct nfsd_net *nn)
@@ -1372,7 +1331,6 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
- [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
[NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO},
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
@@ -1653,7 +1611,7 @@ out_unlock:
*/
int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info)
{
- int *nthreads, count = 0, nrpools, i, ret = -EOPNOTSUPP, rem;
+ int *nthreads, nrpools = 0, i, ret = -EOPNOTSUPP, rem;
struct net *net = genl_info_net(info);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
const struct nlattr *attr;
@@ -1665,12 +1623,11 @@ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info)
/* count number of SERVER_THREADS values */
nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) {
if (nla_type(attr) == NFSD_A_SERVER_THREADS)
- count++;
+ nrpools++;
}
mutex_lock(&nfsd_mutex);
- nrpools = max(count, nfsd_nrpools(net));
nthreads = kcalloc(nrpools, sizeof(int), GFP_KERNEL);
if (!nthreads) {
ret = -ENOMEM;
@@ -1959,6 +1916,7 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info)
struct svc_serv *serv;
LIST_HEAD(permsocks);
struct nfsd_net *nn;
+ bool delete = false;
int err, rem;
mutex_lock(&nfsd_mutex);
@@ -2019,34 +1977,28 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info)
}
}
- /* For now, no removing old sockets while server is running */
- if (serv->sv_nrthreads && !list_empty(&permsocks)) {
+ /*
+ * If there are listener transports remaining on the permsocks list,
+ * it means we were asked to remove a listener.
+ */
+ if (!list_empty(&permsocks)) {
list_splice_init(&permsocks, &serv->sv_permsocks);
- spin_unlock_bh(&serv->sv_lock);
- err = -EBUSY;
- goto out_unlock_mtx;
+ delete = true;
}
+ spin_unlock_bh(&serv->sv_lock);
- /* Close the remaining sockets on the permsocks list */
- while (!list_empty(&permsocks)) {
- xprt = list_first_entry(&permsocks, struct svc_xprt, xpt_list);
- list_move(&xprt->xpt_list, &serv->sv_permsocks);
-
- /*
- * Newly-created sockets are born with the BUSY bit set. Clear
- * it if there are no threads, since nothing can pick it up
- * in that case.
- */
- if (!serv->sv_nrthreads)
- clear_bit(XPT_BUSY, &xprt->xpt_flags);
-
- set_bit(XPT_CLOSE, &xprt->xpt_flags);
- spin_unlock_bh(&serv->sv_lock);
- svc_xprt_close(xprt);
- spin_lock_bh(&serv->sv_lock);
+ /* Do not remove listeners while there are active threads. */
+ if (serv->sv_nrthreads) {
+ err = -EBUSY;
+ goto out_unlock_mtx;
}
- spin_unlock_bh(&serv->sv_lock);
+ /*
+ * Since we can't delete an arbitrary llist entry, destroy the
+ * remaining listeners and recreate the list.
+ */
+ if (delete)
+ svc_xprt_destroy_all(serv, net);
/* walk list of addrs again, open any that still don't exist */
nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) {
@@ -2073,6 +2025,9 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info)
xprt = svc_find_listener(serv, xcl_name, net, sa);
if (xprt) {
+ if (delete)
+ WARN_ONCE(1, "Transport type=%s already exists\n",
+ xcl_name);
svc_xprt_put(xprt);
continue;
}
@@ -2246,8 +2201,14 @@ static __net_init int nfsd_net_init(struct net *net)
NFSD_STATS_COUNTERS_NUM);
if (retval)
goto out_repcache_error;
+
memset(&nn->nfsd_svcstats, 0, sizeof(nn->nfsd_svcstats));
nn->nfsd_svcstats.program = &nfsd_programs[0];
+ if (!nfsd_proc_stat_init(net)) {
+ retval = -ENOMEM;
+ goto out_proc_error;
+ }
+
for (i = 0; i < sizeof(nn->nfsd_versions); i++)
nn->nfsd_versions[i] = nfsd_support_version(i);
for (i = 0; i < sizeof(nn->nfsd4_minorversions); i++)
@@ -2257,12 +2218,14 @@ static __net_init int nfsd_net_init(struct net *net)
nfsd4_init_leases_net(nn);
get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
seqlock_init(&nn->writeverf_lock);
- nfsd_proc_stat_init(net);
#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ spin_lock_init(&nn->local_clients_lock);
INIT_LIST_HEAD(&nn->local_clients);
#endif
return 0;
+out_proc_error:
+ percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM);
out_repcache_error:
nfsd_idmap_shutdown(net);
out_idmap_error:
@@ -2276,14 +2239,15 @@ out_export_error:
* nfsd_net_pre_exit - Disconnect localio clients from net namespace
* @net: a network namespace that is about to be destroyed
*
- * This invalidated ->net pointers held by localio clients
+ * This invalidates ->net pointers held by localio clients
* while they can still safely access nn->counter.
*/
static __net_exit void nfsd_net_pre_exit(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- nfs_uuid_invalidate_clients(&nn->local_clients);
+ nfs_localio_invalidate_clients(&nn->local_clients,
+ &nn->local_clients_lock);
}
#endif
@@ -2316,6 +2280,8 @@ static int __init init_nfsd(void)
{
int retval;
+ nfsd_debugfs_init();
+
retval = nfsd4_init_slabs();
if (retval)
return retval;
@@ -2326,12 +2292,9 @@ static int __init init_nfsd(void)
if (retval)
goto out_free_pnfs;
nfsd_lockd_init(); /* lockd->nfsd callbacks */
- retval = create_proc_exports_entry();
- if (retval)
- goto out_free_lockd;
retval = register_pernet_subsys(&nfsd_net_ops);
if (retval < 0)
- goto out_free_exports;
+ goto out_free_lockd;
retval = register_cld_notifier();
if (retval)
goto out_free_subsys;
@@ -2340,22 +2303,26 @@ static int __init init_nfsd(void)
goto out_free_cld;
retval = register_filesystem(&nfsd_fs_type);
if (retval)
- goto out_free_all;
+ goto out_free_nfsd4;
retval = genl_register_family(&nfsd_nl_family);
if (retval)
+ goto out_free_filesystem;
+ retval = create_proc_exports_entry();
+ if (retval)
goto out_free_all;
nfsd_localio_ops_init();
return 0;
out_free_all:
+ genl_unregister_family(&nfsd_nl_family);
+out_free_filesystem:
+ unregister_filesystem(&nfsd_fs_type);
+out_free_nfsd4:
nfsd4_destroy_laundry_wq();
out_free_cld:
unregister_cld_notifier();
out_free_subsys:
unregister_pernet_subsys(&nfsd_net_ops);
-out_free_exports:
- remove_proc_entry("fs/nfs/exports", NULL);
- remove_proc_entry("fs/nfs", NULL);
out_free_lockd:
nfsd_lockd_shutdown();
nfsd_drc_slab_free();
@@ -2363,22 +2330,24 @@ out_free_pnfs:
nfsd4_exit_pnfs();
out_free_slabs:
nfsd4_free_slabs();
+ nfsd_debugfs_exit();
return retval;
}
static void __exit exit_nfsd(void)
{
+ remove_proc_entry("fs/nfs/exports", NULL);
+ remove_proc_entry("fs/nfs", NULL);
genl_unregister_family(&nfsd_nl_family);
unregister_filesystem(&nfsd_fs_type);
nfsd4_destroy_laundry_wq();
unregister_cld_notifier();
unregister_pernet_subsys(&nfsd_net_ops);
nfsd_drc_slab_free();
- remove_proc_entry("fs/nfs/exports", NULL);
- remove_proc_entry("fs/nfs", NULL);
nfsd_lockd_shutdown();
nfsd4_free_slabs();
nfsd4_exit_pnfs();
+ nfsd_debugfs_exit();
}
MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 4b56ba1e8e48..1bfd0b4e9af7 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -44,24 +44,14 @@ bool nfsd_support_version(int vers);
#include "stats.h"
/*
- * Maximum blocksizes supported by daemon under various circumstances.
+ * Default and maximum payload size (NFS READ or WRITE), in bytes.
+ * The default is historical, and the maximum is an implementation
+ * limit.
*/
-#define NFSSVC_MAXBLKSIZE RPCSVC_MAXPAYLOAD
-/* NFSv2 is limited by the protocol specification, see RFC 1094 */
-#define NFSSVC_MAXBLKSIZE_V2 (8*1024)
-
-
-/*
- * Largest number of bytes we need to allocate for an NFS
- * call or reply. Used to control buffer sizes. We use
- * the length of v3 WRITE, READDIR and READDIR replies
- * which are an RPC header, up to 26 XDR units of reply
- * data, and some page data.
- *
- * Note that accuracy here doesn't matter too much as the
- * size is rounded up to a page size when allocating space.
- */
-#define NFSD_BUFSIZE ((RPC_MAX_HEADER_WITH_AUTH+26)*XDR_UNIT + NFSSVC_MAXBLKSIZE)
+enum {
+ NFSSVC_DEFBLKSIZE = 1 * 1024 * 1024,
+ NFSSVC_MAXBLKSIZE = RPCSVC_MAXPAYLOAD,
+};
struct readdir_cd {
__be32 err; /* 0, nfserr, or nfserr_eof */
@@ -88,9 +78,6 @@ struct nfsd_genl_rqstp {
extern struct svc_program nfsd_programs[];
extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4;
extern struct mutex nfsd_mutex;
-extern spinlock_t nfsd_drc_lock;
-extern unsigned long nfsd_drc_max_mem;
-extern unsigned long nfsd_drc_mem_used;
extern atomic_t nfsd_th_cnt; /* number of available threads */
extern const struct seq_operations nfs_exports_op;
@@ -159,6 +146,16 @@ void nfsd_reset_versions(struct nfsd_net *nn);
int nfsd_create_serv(struct net *net);
void nfsd_destroy_serv(struct net *net);
+#ifdef CONFIG_DEBUG_FS
+void nfsd_debugfs_init(void);
+void nfsd_debugfs_exit(void);
+#else
+static inline void nfsd_debugfs_init(void) {}
+static inline void nfsd_debugfs_exit(void) {}
+#endif
+
+extern bool nfsd_disable_splice_read __read_mostly;
+
extern int nfsd_max_blksize;
static inline int nfsd_v4client(struct svc_rqst *rq)
@@ -458,7 +455,10 @@ enum {
(NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
FATTR4_WORD2_MODE_UMASK | \
NFSD4_2_SECURITY_ATTRS | \
- FATTR4_WORD2_XATTR_SUPPORT)
+ FATTR4_WORD2_XATTR_SUPPORT | \
+ FATTR4_WORD2_TIME_DELEG_ACCESS | \
+ FATTR4_WORD2_TIME_DELEG_MODIFY | \
+ FATTR4_WORD2_OPEN_ARGUMENTS)
extern const u32 nfsd_suppattrs[3][3];
@@ -528,7 +528,10 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
#endif
#define NFSD_WRITEABLE_ATTRS_WORD2 \
(FATTR4_WORD2_MODE_UMASK \
- | MAYBE_FATTR4_WORD2_SECURITY_LABEL)
+ | MAYBE_FATTR4_WORD2_SECURITY_LABEL \
+ | FATTR4_WORD2_TIME_DELEG_ACCESS \
+ | FATTR4_WORD2_TIME_DELEG_MODIFY \
+ )
#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
NFSD_WRITEABLE_ATTRS_WORD0
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 40ad58a6a036..aef474f1b84b 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -222,7 +222,6 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net,
cap_raise_nfsd_set(new->cap_effective,
new->cap_permitted);
put_cred(override_creds(new));
- put_cred(new);
} else {
error = nfsd_setuser_and_check_port(rqstp, cred, exp);
if (error)
@@ -320,6 +319,7 @@ __fh_verify(struct svc_rqst *rqstp,
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct svc_export *exp = NULL;
+ bool may_bypass_gss = false;
struct dentry *dentry;
__be32 error;
@@ -362,13 +362,12 @@ __fh_verify(struct svc_rqst *rqstp,
if (error)
goto out;
- /*
- * pseudoflavor restrictions are not enforced on NLM,
- * which clients virtually always use auth_sys for,
- * even while using RPCSEC_GSS for NFS.
- */
- if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS)
- goto skip_pseudoflavor_check;
+ if ((access & NFSD_MAY_NLM) && (exp->ex_flags & NFSEXP_NOAUTHNLM))
+ /* NLM is allowed to fully bypass authentication */
+ goto out;
+
+ if (access & NFSD_MAY_BYPASS_GSS)
+ may_bypass_gss = true;
/*
* Clients may expect to be able to use auth_sys during mount,
* even if they use gss for everything else; see section 2.3.2
@@ -376,13 +375,15 @@ __fh_verify(struct svc_rqst *rqstp,
*/
if (access & NFSD_MAY_BYPASS_GSS_ON_ROOT
&& exp->ex_path.dentry == dentry)
- goto skip_pseudoflavor_check;
+ may_bypass_gss = true;
- error = check_nfsd_access(exp, rqstp);
+ error = check_nfsd_access(exp, rqstp, may_bypass_gss);
if (error)
goto out;
+ /* During LOCALIO call to fh_verify will be called with a NULL rqstp */
+ if (rqstp)
+ svc_xprt_set_valid(rqstp->rq_xprt);
-skip_pseudoflavor_check:
/* Finally, check access permissions. */
error = nfsd_permission(cred, exp, dentry, access);
out:
@@ -667,20 +668,18 @@ out_negative:
__be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp)
{
bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
- struct inode *inode;
struct kstat stat;
__be32 err;
if (fhp->fh_no_wcc || fhp->fh_pre_saved)
return nfs_ok;
- inode = d_inode(fhp->fh_dentry);
err = fh_getattr(fhp, &stat);
if (err)
return err;
if (v4)
- fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
+ fhp->fh_pre_change = nfsd4_change_attribute(&stat);
fhp->fh_pre_mtime = stat.mtime;
fhp->fh_pre_ctime = stat.ctime;
@@ -697,7 +696,6 @@ __be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp)
__be32 fh_fill_post_attrs(struct svc_fh *fhp)
{
bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
- struct inode *inode = d_inode(fhp->fh_dentry);
__be32 err;
if (fhp->fh_no_wcc)
@@ -713,7 +711,7 @@ __be32 fh_fill_post_attrs(struct svc_fh *fhp)
fhp->fh_post_saved = true;
if (v4)
fhp->fh_post_change =
- nfsd4_change_attribute(&fhp->fh_post_attr, inode);
+ nfsd4_change_attribute(&fhp->fh_post_attr);
return nfs_ok;
}
@@ -770,7 +768,7 @@ char * SVCFH_fmt(struct svc_fh *fhp)
struct knfsd_fh *fh = &fhp->fh_handle;
static char buf[2+1+1+64*3+1];
- if (fh->fh_size < 0 || fh->fh_size> 64)
+ if (fh->fh_size > 64)
return "bad-fh";
sprintf(buf, "%d: %*ph", fh->fh_size, fh->fh_size, fh->fh_raw);
return buf;
@@ -804,7 +802,14 @@ enum fsid_source fsid_source(const struct svc_fh *fhp)
return FSIDSOURCE_DEV;
}
-/*
+/**
+ * nfsd4_change_attribute - Generate an NFSv4 change_attribute value
+ * @stat: inode attributes
+ *
+ * Caller must fill in @stat before calling, typically by invoking
+ * vfs_getattr() with STATX_MODE, STATX_CTIME, and STATX_CHANGE_COOKIE.
+ * Returns an unsigned 64-bit changeid4 value (RFC 8881 Section 3.2).
+ *
* We could use i_version alone as the change attribute. However, i_version
* can go backwards on a regular file after an unclean shutdown. On its own
* that doesn't necessarily cause a problem, but if i_version goes backwards
@@ -821,13 +826,13 @@ enum fsid_source fsid_source(const struct svc_fh *fhp)
* assume that the new change attr is always logged to stable storage in some
* fashion before the results can be seen.
*/
-u64 nfsd4_change_attribute(const struct kstat *stat, const struct inode *inode)
+u64 nfsd4_change_attribute(const struct kstat *stat)
{
u64 chattr;
if (stat->result_mask & STATX_CHANGE_COOKIE) {
chattr = stat->change_cookie;
- if (S_ISREG(inode->i_mode) &&
+ if (S_ISREG(stat->mode) &&
!(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) {
chattr += (u64)stat->ctime.tv_sec << 30;
chattr += stat->ctime.tv_nsec;
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 5b7394801dc4..5103c2f4d225 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -267,7 +267,6 @@ static inline bool fh_fsid_match(const struct knfsd_fh *fh1,
return true;
}
-#ifdef CONFIG_CRC32
/**
* knfsd_fh_hash - calculate the crc32 hash for the filehandle
* @fh - pointer to filehandle
@@ -279,12 +278,6 @@ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
{
return ~crc32_le(0xFFFFFFFF, fh->fh_raw, fh->fh_size);
}
-#else
-static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
-{
- return 0;
-}
-#endif
/**
* fh_clear_pre_post_attrs - Reset pre/post attributes
@@ -297,8 +290,7 @@ static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
fhp->fh_pre_saved = false;
}
-u64 nfsd4_change_attribute(const struct kstat *stat,
- const struct inode *inode);
+u64 nfsd4_change_attribute(const struct kstat *stat);
__be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp);
__be32 fh_fill_post_attrs(struct svc_fh *fhp);
__be32 __must_check fh_fill_both_attrs(struct svc_fh *fhp);
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 6dda081eb24c..c10fa8128a8a 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -10,6 +10,7 @@
#include "cache.h"
#include "xdr.h"
#include "vfs.h"
+#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
@@ -54,7 +55,7 @@ nfsd_proc_getattr(struct svc_rqst *rqstp)
struct nfsd_fhandle *argp = rqstp->rq_argp;
struct nfsd_attrstat *resp = rqstp->rq_resp;
- dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh));
+ trace_nfsd_vfs_getattr(rqstp, &argp->fh);
fh_copy(&resp->fh, &argp->fh);
resp->status = fh_verify(rqstp, &resp->fh, 0,
@@ -211,7 +212,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
SVCFH_fmt(&argp->fh),
argp->count, argp->offset);
- argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2);
+ argp->count = min_t(u32, argp->count, NFS_MAXDATA);
argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen);
resp->pages = rqstp->rq_next_page;
@@ -250,17 +251,14 @@ nfsd_proc_write(struct svc_rqst *rqstp)
struct nfsd_writeargs *argp = rqstp->rq_argp;
struct nfsd_attrstat *resp = rqstp->rq_resp;
unsigned long cnt = argp->len;
- unsigned int nvecs;
dprintk("nfsd: WRITE %s %u bytes at %d\n",
SVCFH_fmt(&argp->fh),
argp->len, argp->offset);
- nvecs = svc_fill_write_vector(rqstp, &argp->payload);
-
- resp->status = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
- argp->offset, rqstp->rq_vec, nvecs,
- &cnt, NFS_DATA_SYNC, NULL);
+ fh_copy(&resp->fh, &argp->fh);
+ resp->status = nfsd_write(rqstp, &resp->fh, argp->offset,
+ &argp->payload, &cnt, NFS_DATA_SYNC, NULL);
if (resp->status == nfs_ok)
resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox)
@@ -292,9 +290,6 @@ nfsd_proc_create(struct svc_rqst *rqstp)
int hosterr;
dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size);
- dprintk("nfsd: CREATE %s %.*s\n",
- SVCFH_fmt(dirfhp), argp->len, argp->name);
-
/* First verify the parent file handle */
resp->status = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_EXEC);
if (resp->status != nfs_ok)
@@ -312,7 +307,8 @@ nfsd_proc_create(struct svc_rqst *rqstp)
}
inode_lock_nested(dirfhp->fh_dentry->d_inode, I_MUTEX_PARENT);
- dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len);
+ dchild = lookup_one(&nop_mnt_idmap, &QSTR_LEN(argp->name, argp->len),
+ dirfhp->fh_dentry);
if (IS_ERR(dchild)) {
resp->status = nfserrno(PTR_ERR(dchild));
goto out_unlock;
@@ -331,7 +327,7 @@ nfsd_proc_create(struct svc_rqst *rqstp)
*/
resp->status = nfserr_acces;
if (!newfhp->fh_dentry) {
- printk(KERN_WARNING
+ printk(KERN_WARNING
"nfsd_proc_create: file handle not verified\n");
goto out_unlock;
}
@@ -445,9 +441,6 @@ nfsd_proc_remove(struct svc_rqst *rqstp)
struct nfsd_diropargs *argp = rqstp->rq_argp;
struct nfsd_stat *resp = rqstp->rq_resp;
- dprintk("nfsd: REMOVE %s %.*s\n", SVCFH_fmt(&argp->fh),
- argp->len, argp->name);
-
/* Unlink. -SIFDIR means file must not be a directory */
resp->status = nfsd_unlink(rqstp, &argp->fh, -S_IFDIR,
argp->name, argp->len);
@@ -462,11 +455,6 @@ nfsd_proc_rename(struct svc_rqst *rqstp)
struct nfsd_renameargs *argp = rqstp->rq_argp;
struct nfsd_stat *resp = rqstp->rq_resp;
- dprintk("nfsd: RENAME %s %.*s -> \n",
- SVCFH_fmt(&argp->ffh), argp->flen, argp->fname);
- dprintk("nfsd: -> %s %.*s\n",
- SVCFH_fmt(&argp->tfh), argp->tlen, argp->tname);
-
resp->status = nfsd_rename(rqstp, &argp->ffh, argp->fname, argp->flen,
&argp->tfh, argp->tname, argp->tlen);
fh_put(&argp->ffh);
@@ -481,13 +469,6 @@ nfsd_proc_link(struct svc_rqst *rqstp)
struct nfsd_linkargs *argp = rqstp->rq_argp;
struct nfsd_stat *resp = rqstp->rq_resp;
- dprintk("nfsd: LINK %s ->\n",
- SVCFH_fmt(&argp->ffh));
- dprintk("nfsd: %s %.*s\n",
- SVCFH_fmt(&argp->tfh),
- argp->tlen,
- argp->tname);
-
resp->status = nfsd_link(rqstp, &argp->tfh, argp->tname, argp->tlen,
&argp->ffh);
fh_put(&argp->ffh);
@@ -519,10 +500,6 @@ nfsd_proc_symlink(struct svc_rqst *rqstp)
goto out;
}
- dprintk("nfsd: SYMLINK %s %.*s -> %.*s\n",
- SVCFH_fmt(&argp->ffh), argp->flen, argp->fname,
- argp->tlen, argp->tname);
-
fh_init(&newfh, NFS_FHSIZE);
resp->status = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
argp->tname, &attrs, &newfh);
@@ -548,8 +525,6 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp)
.na_iattr = &argp->attrs,
};
- dprintk("nfsd: MKDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
-
if (resp->fh.fh_dentry) {
printk(KERN_WARNING
"nfsd_proc_mkdir: response already verified??\n");
@@ -578,8 +553,6 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp)
struct nfsd_diropargs *argp = rqstp->rq_argp;
struct nfsd_stat *resp = rqstp->rq_resp;
- dprintk("nfsd: RMDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
-
resp->status = nfsd_unlink(rqstp, &argp->fh, S_IFDIR,
argp->name, argp->len);
fh_put(&argp->fh);
@@ -615,9 +588,7 @@ nfsd_proc_readdir(struct svc_rqst *rqstp)
struct nfsd_readdirres *resp = rqstp->rq_resp;
loff_t offset;
- dprintk("nfsd: READDIR %s %d bytes at %d\n",
- SVCFH_fmt(&argp->fh),
- argp->count, argp->cookie);
+ trace_nfsd_vfs_readdir(rqstp, &argp->fh, argp->count, argp->cookie);
nfsd_init_dirlist_pages(rqstp, resp, argp->count);
@@ -642,8 +613,6 @@ nfsd_proc_statfs(struct svc_rqst *rqstp)
struct nfsd_fhandle *argp = rqstp->rq_argp;
struct nfsd_statfsres *resp = rqstp->rq_resp;
- dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh));
-
resp->status = nfsd_statfs(rqstp, &argp->fh, &resp->stats,
NFSD_MAY_BYPASS_GSS_ON_ROOT);
fh_put(&argp->fh);
@@ -739,7 +708,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_argzero = sizeof(struct nfsd_readargs),
.pc_ressize = sizeof(struct nfsd_readres),
.pc_cachetype = RC_NOCACHE,
- .pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4,
+ .pc_xdrressize = ST+AT+1+NFS_MAXDATA/4,
.pc_name = "READ",
},
[NFSPROC_WRITECACHE] = {
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 49e2f32102ab..82b0111ac469 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -70,16 +70,6 @@ static __be32 nfsd_init_request(struct svc_rqst *,
*/
DEFINE_MUTEX(nfsd_mutex);
-/*
- * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used.
- * nfsd_drc_max_pages limits the total amount of memory available for
- * version 4.1 DRC caches.
- * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
- */
-DEFINE_SPINLOCK(nfsd_drc_lock);
-unsigned long nfsd_drc_max_mem;
-unsigned long nfsd_drc_mem_used;
-
#if IS_ENABLED(CONFIG_NFS_LOCALIO)
static const struct svc_version *localio_versions[] = {
[1] = &localio_version1,
@@ -214,32 +204,32 @@ int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change
return 0;
}
-bool nfsd_serv_try_get(struct net *net) __must_hold(rcu)
+bool nfsd_net_try_get(struct net *net) __must_hold(rcu)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- return (nn && percpu_ref_tryget_live(&nn->nfsd_serv_ref));
+ return (nn && percpu_ref_tryget_live(&nn->nfsd_net_ref));
}
-void nfsd_serv_put(struct net *net) __must_hold(rcu)
+void nfsd_net_put(struct net *net) __must_hold(rcu)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- percpu_ref_put(&nn->nfsd_serv_ref);
+ percpu_ref_put(&nn->nfsd_net_ref);
}
-static void nfsd_serv_done(struct percpu_ref *ref)
+static void nfsd_net_done(struct percpu_ref *ref)
{
- struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_serv_ref);
+ struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_net_ref);
- complete(&nn->nfsd_serv_confirm_done);
+ complete(&nn->nfsd_net_confirm_done);
}
-static void nfsd_serv_free(struct percpu_ref *ref)
+static void nfsd_net_free(struct percpu_ref *ref)
{
- struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_serv_ref);
+ struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_net_ref);
- complete(&nn->nfsd_serv_free_done);
+ complete(&nn->nfsd_net_free_done);
}
/*
@@ -406,13 +396,13 @@ static int nfsd_startup_net(struct net *net, const struct cred *cred)
if (ret)
goto out_filecache;
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+ nfsd4_ssc_init_umount_work(nn);
+#endif
ret = nfs4_state_start_net(net);
if (ret)
goto out_reply_cache;
-#ifdef CONFIG_NFSD_V4_2_INTER_SSC
- nfsd4_ssc_init_umount_work(nn);
-#endif
nn->nfsd_net_up = true;
return 0;
@@ -436,6 +426,10 @@ static void nfsd_shutdown_net(struct net *net)
if (!nn->nfsd_net_up)
return;
+
+ percpu_ref_kill_and_confirm(&nn->nfsd_net_ref, nfsd_net_done);
+ wait_for_completion(&nn->nfsd_net_confirm_done);
+
nfsd_export_flush(net);
nfs4_state_shutdown_net(net);
nfsd_reply_cache_shutdown(nn);
@@ -444,7 +438,10 @@ static void nfsd_shutdown_net(struct net *net)
lockd_down(net);
nn->lockd_up = false;
}
- percpu_ref_exit(&nn->nfsd_serv_ref);
+
+ wait_for_completion(&nn->nfsd_net_free_done);
+ percpu_ref_exit(&nn->nfsd_net_ref);
+
nn->nfsd_net_up = false;
nfsd_shutdown_generic();
}
@@ -526,11 +523,6 @@ void nfsd_destroy_serv(struct net *net)
lockdep_assert_held(&nfsd_mutex);
- percpu_ref_kill_and_confirm(&nn->nfsd_serv_ref, nfsd_serv_done);
- wait_for_completion(&nn->nfsd_serv_confirm_done);
- wait_for_completion(&nn->nfsd_serv_free_done);
- /* percpu_ref_exit is called in nfsd_shutdown_net */
-
spin_lock(&nfsd_notifier_lock);
nn->nfsd_serv = NULL;
spin_unlock(&nfsd_notifier_lock);
@@ -575,27 +567,6 @@ void nfsd_reset_versions(struct nfsd_net *nn)
}
}
-/*
- * Each session guarantees a negotiated per slot memory cache for replies
- * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated
- * NFSv4.1 server might want to use more memory for a DRC than a machine
- * with mutiple services.
- *
- * Impose a hard limit on the number of pages for the DRC which varies
- * according to the machines free pages. This is of course only a default.
- *
- * For now this is a #defined shift which could be under admin control
- * in the future.
- */
-static void set_max_drc(void)
-{
- #define NFSD_DRC_SIZE_SHIFT 7
- nfsd_drc_max_mem = (nr_free_buffer_pages()
- >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
- nfsd_drc_mem_used = 0;
- dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem);
-}
-
static int nfsd_get_default_max_blksize(void)
{
struct sysinfo i;
@@ -611,7 +582,7 @@ static int nfsd_get_default_max_blksize(void)
*/
target >>= 12;
- ret = NFSSVC_MAXBLKSIZE;
+ ret = NFSSVC_DEFBLKSIZE;
while (ret > target && ret >= 8*1024*2)
ret /= 2;
return ret;
@@ -652,12 +623,12 @@ int nfsd_create_serv(struct net *net)
if (nn->nfsd_serv)
return 0;
- error = percpu_ref_init(&nn->nfsd_serv_ref, nfsd_serv_free,
+ error = percpu_ref_init(&nn->nfsd_net_ref, nfsd_net_free,
0, GFP_KERNEL);
if (error)
return error;
- init_completion(&nn->nfsd_serv_free_done);
- init_completion(&nn->nfsd_serv_confirm_done);
+ init_completion(&nn->nfsd_net_free_done);
+ init_completion(&nn->nfsd_net_confirm_done);
if (nfsd_max_blksize == 0)
nfsd_max_blksize = nfsd_get_default_max_blksize();
@@ -668,7 +639,6 @@ int nfsd_create_serv(struct net *net)
if (serv == NULL)
return -ENOMEM;
- serv->sv_maxconn = nn->max_connections;
error = svc_bind(serv, net);
if (error < 0) {
svc_destroy(&serv);
@@ -678,7 +648,6 @@ int nfsd_create_serv(struct net *net)
nn->nfsd_serv = serv;
spin_unlock(&nfsd_notifier_lock);
- set_max_drc();
/* check if the notifier is already set */
if (atomic_inc_return(&nfsd_notifier_refcount) == 1) {
register_inetaddr_notifier(&nfsd_inetaddr_notifier);
@@ -954,11 +923,7 @@ nfsd(void *vrqstp)
* The main request loop
*/
while (!svc_thread_should_stop(rqstp)) {
- /* Update sv_maxconn if it has changed */
- rqstp->rq_server->sv_maxconn = nn->max_connections;
-
svc_recv(rqstp);
-
nfsd_file_net_dispose(nn);
}
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 5777f40c7353..fc262ceafca9 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -336,7 +336,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
/* opaque data */
if (xdr_stream_decode_u32(xdr, &args->len) < 0)
return false;
- if (args->len > NFSSVC_MAXBLKSIZE_V2)
+ if (args->len > NFS_MAXDATA)
return false;
return xdr_stream_subsegment(xdr, &args->payload, args->len);
@@ -540,7 +540,7 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
p = xdr_reserve_space(xdr, XDR_UNIT * 5);
if (!p)
return false;
- *p++ = cpu_to_be32(NFSSVC_MAXBLKSIZE_V2);
+ *p++ = cpu_to_be32(NFS_MAXDATA);
*p++ = cpu_to_be32(stat->f_bsize);
*p++ = cpu_to_be32(stat->f_blocks);
*p++ = cpu_to_be32(stat->f_bfree);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 35b3564c065f..1995bca158b8 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -64,15 +64,36 @@ typedef struct {
refcount_t cs_count;
} copy_stateid_t;
+struct nfsd4_referring_call {
+ struct list_head __list;
+
+ u32 rc_sequenceid;
+ u32 rc_slotid;
+};
+
+struct nfsd4_referring_call_list {
+ struct list_head __list;
+
+ struct nfs4_sessionid rcl_sessionid;
+ int __nr_referring_calls;
+ struct list_head rcl_referring_calls;
+};
+
struct nfsd4_callback {
struct nfs4_client *cb_clp;
struct rpc_message cb_msg;
+#define NFSD4_CALLBACK_RUNNING (0)
+#define NFSD4_CALLBACK_WAKE (1)
+#define NFSD4_CALLBACK_REQUEUE (2)
+ unsigned long cb_flags;
const struct nfsd4_callback_ops *cb_ops;
struct work_struct cb_work;
int cb_seq_status;
int cb_status;
- bool cb_need_restart;
- bool cb_holds_slot;
+ int cb_held_slot;
+
+ int cb_nr_referring_call_list;
+ struct list_head cb_referring_call_list;
};
struct nfsd4_callback_ops {
@@ -137,24 +158,36 @@ struct nfs4_cpntf_state {
time64_t cpntf_time; /* last time stateid used */
};
+/*
+ * RFC 7862 Section 4.8 states:
+ *
+ * | A copy offload stateid will be valid until either (A) the client
+ * | or server restarts or (B) the client returns the resource by
+ * | issuing an OFFLOAD_CANCEL operation or the client replies to a
+ * | CB_OFFLOAD operation.
+ *
+ * Because a client might not reply to a CB_OFFLOAD, or a reply
+ * might get lost due to connection loss, NFSD purges async copy
+ * state after a short period to prevent it from accumulating
+ * over time.
+ */
+#define NFSD_COPY_INITIAL_TTL 10
+
struct nfs4_cb_fattr {
struct nfsd4_callback ncf_getattr;
u32 ncf_cb_status;
- u32 ncf_cb_bmap[1];
/* from CB_GETATTR reply */
u64 ncf_cb_change;
u64 ncf_cb_fsize;
+ struct timespec64 ncf_cb_mtime;
+ struct timespec64 ncf_cb_atime;
- unsigned long ncf_cb_flags;
bool ncf_file_modified;
u64 ncf_initial_cinfo;
u64 ncf_cur_fsize;
};
-/* bits for ncf_cb_flags */
-#define CB_GETATTR_BUSY 0
-
/*
* Represents a delegation stateid. The nfs4_client holds references to these
* and they are put when it is being destroyed or when the delegation is
@@ -182,8 +215,8 @@ struct nfs4_delegation {
struct list_head dl_perclnt;
struct list_head dl_recall_lru; /* delegation recalled */
struct nfs4_clnt_odstate *dl_clnt_odstate;
- u32 dl_type;
time64_t dl_time;
+ u32 dl_type;
/* For recall: */
int dl_retries;
struct nfsd4_callback dl_recall;
@@ -193,6 +226,22 @@ struct nfs4_delegation {
struct nfs4_cb_fattr dl_cb_fattr;
};
+static inline bool deleg_is_read(u32 dl_type)
+{
+ return (dl_type == OPEN_DELEGATE_READ || dl_type == OPEN_DELEGATE_READ_ATTRS_DELEG);
+}
+
+static inline bool deleg_is_write(u32 dl_type)
+{
+ return (dl_type == OPEN_DELEGATE_WRITE || dl_type == OPEN_DELEGATE_WRITE_ATTRS_DELEG);
+}
+
+static inline bool deleg_attrs_deleg(u32 dl_type)
+{
+ return dl_type == OPEN_DELEGATE_READ_ATTRS_DELEG ||
+ dl_type == OPEN_DELEGATE_WRITE_ATTRS_DELEG;
+}
+
#define cb_to_delegation(cb) \
container_of(cb, struct nfs4_delegation, dl_recall)
@@ -213,8 +262,11 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
return container_of(s, struct nfs4_delegation, dl_stid);
}
-/* Maximum number of slots per session. 160 is useful for long haul TCP */
-#define NFSD_MAX_SLOTS_PER_SESSION 160
+/* Maximum number of slots per session. This is for sanity-check only.
+ * It could be increased if we had a mechanism to shutdown misbehaving clients.
+ * A large number can be needed to get good throughput on high-latency servers.
+ */
+#define NFSD_MAX_SLOTS_PER_SESSION 2048
/* Maximum session per slot cache size */
#define NFSD_SLOT_CACHE_SIZE 2048
/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
@@ -226,12 +278,15 @@ struct nfsd4_slot {
u32 sl_seqid;
__be32 sl_status;
struct svc_cred sl_cred;
+ u32 sl_index;
u32 sl_datalen;
u16 sl_opcnt;
+ u16 sl_generation;
#define NFSD4_SLOT_INUSE (1 << 0)
#define NFSD4_SLOT_CACHETHIS (1 << 1)
#define NFSD4_SLOT_INITIALIZED (1 << 2)
#define NFSD4_SLOT_CACHED (1 << 3)
+#define NFSD4_SLOT_REUSED (1 << 4)
u8 sl_flags;
char sl_data[];
};
@@ -290,6 +345,9 @@ struct nfsd4_conn {
unsigned char cn_flags;
};
+/* Maximum number of slots that nfsd will use in the backchannel */
+#define NFSD_BC_SLOT_TABLE_SIZE (sizeof(u32) * 8)
+
/*
* Representation of a v4.1+ session. These are refcounted in a similar fashion
* to the nfs4_client. References are only taken when the server is actively
@@ -297,20 +355,23 @@ struct nfsd4_conn {
*/
struct nfsd4_session {
atomic_t se_ref;
+ spinlock_t se_lock;
+ u32 se_cb_slot_avail; /* bitmap of available slots */
+ u32 se_cb_highest_slot; /* highest slot client wants */
+ u32 se_cb_prog;
struct list_head se_hash; /* hash by sessionid */
struct list_head se_perclnt;
-/* See SESSION4_PERSIST, etc. for standard flags; this is internal-only: */
-#define NFS4_SESSION_DEAD 0x010
- u32 se_flags;
+ struct list_head se_all_sessions;/* global list of sessions */
struct nfs4_client *se_client;
struct nfs4_sessionid se_sessionid;
struct nfsd4_channel_attrs se_fchannel;
- struct nfsd4_channel_attrs se_bchannel;
struct nfsd4_cb_sec se_cb_sec;
struct list_head se_conns;
- u32 se_cb_prog;
- u32 se_cb_seq_nr;
- struct nfsd4_slot *se_slots[]; /* forward channel slots */
+ u32 se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
+ struct xarray se_slots; /* forward channel slots */
+ u16 se_slot_gen;
+ bool se_dead;
+ u32 se_target_maxslots;
};
/* formatted contents of nfs4_sessionid */
@@ -409,7 +470,6 @@ struct nfs4_client {
#define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
1 << NFSD4_CLIENT_CB_KILL)
-#define NFSD4_CLIENT_CB_RECALL_ANY (6)
unsigned long cl_flags;
struct workqueue_struct *cl_callback_wq;
@@ -443,9 +503,6 @@ struct nfs4_client {
*/
struct dentry *cl_nfsd_info_dentry;
- /* for nfs41 callbacks */
- /* We currently support a single back channel with a single slot */
- unsigned long cl_cb_slot_busy;
struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
/* wait here for slots */
struct net *net;
@@ -458,7 +515,6 @@ struct nfs4_client {
struct nfsd4_cb_recall_any *cl_ra;
time64_t cl_ra_time;
- struct list_head cl_ra_cblist;
};
/* struct nfs4_client_reset
@@ -491,7 +547,7 @@ struct nfs4_replay {
unsigned int rp_buflen;
char *rp_buf;
struct knfsd_fh rp_openfh;
- atomic_t rp_locked;
+ int rp_locked;
char rp_ibuf[NFSD4_REPLAY_ISIZE];
};
@@ -737,11 +793,24 @@ extern __be32 nfs4_check_open_reclaim(struct nfs4_client *);
extern void nfsd4_probe_callback(struct nfs4_client *clp);
extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
+extern void nfsd41_cb_referring_call(struct nfsd4_callback *cb,
+ struct nfs4_sessionid *sessionid,
+ u32 slotid, u32 seqno);
+extern void nfsd41_cb_destroy_referring_call_list(struct nfsd4_callback *cb);
extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op);
extern bool nfsd4_run_cb(struct nfsd4_callback *cb);
+
+static inline void nfsd4_try_run_cb(struct nfsd4_callback *cb)
+{
+ if (!test_and_set_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags))
+ WARN_ON_ONCE(!nfsd4_run_cb(cb));
+}
+
extern void nfsd4_shutdown_callback(struct nfs4_client *);
extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
+void nfsd4_async_copy_reaper(struct nfsd_net *nn);
+bool nfsd4_has_active_async_copies(struct nfs4_client *clp);
extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name,
struct xdr_netobj princhash, struct nfsd_net *nn);
extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
@@ -784,5 +853,5 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
}
extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
- struct dentry *dentry, bool *file_modified, u64 *size);
+ struct dentry *dentry, struct nfs4_delegation **pdp);
#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index bb22893f1157..f7eaf95e20fc 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -73,11 +73,11 @@ static int nfsd_show(struct seq_file *seq, void *v)
DEFINE_PROC_SHOW_ATTRIBUTE(nfsd);
-void nfsd_proc_stat_init(struct net *net)
+struct proc_dir_entry *nfsd_proc_stat_init(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops);
+ return svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops);
}
void nfsd_proc_stat_shutdown(struct net *net)
diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
index 04aacb6c36e2..e4efb0e4e56d 100644
--- a/fs/nfsd/stats.h
+++ b/fs/nfsd/stats.h
@@ -10,7 +10,7 @@
#include <uapi/linux/nfsd/stats.h>
#include <linux/percpu_counter.h>
-void nfsd_proc_stat_init(struct net *net);
+struct proc_dir_entry *nfsd_proc_stat_init(struct net *net);
void nfsd_proc_stat_shutdown(struct net *net);
static inline void nfsd_stats_rc_hits_inc(struct nfsd_net *nn)
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index b8470d4cbe99..3c5505ef5e3a 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -11,6 +11,7 @@
#include <linux/tracepoint.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/xprt.h>
+#include <trace/misc/fs.h>
#include <trace/misc/nfs.h>
#include <trace/misc/sunrpc.h>
@@ -18,22 +19,40 @@
#include "nfsfh.h"
#include "xdr4.h"
-#define NFSD_TRACE_PROC_RES_FIELDS \
+#define NFSD_TRACE_PROC_CALL_FIELDS(r) \
+ __field(unsigned int, netns_ino) \
+ __field(u32, xid) \
+ __sockaddr(server, (r)->rq_xprt->xpt_locallen) \
+ __sockaddr(client, (r)->rq_xprt->xpt_remotelen)
+
+#define NFSD_TRACE_PROC_CALL_ASSIGNMENTS(r) \
+ do { \
+ struct svc_xprt *xprt = (r)->rq_xprt; \
+ __entry->netns_ino = SVC_NET(r)->ns.inum; \
+ __entry->xid = be32_to_cpu((r)->rq_xid); \
+ __assign_sockaddr(server, &xprt->xpt_local, \
+ xprt->xpt_locallen); \
+ __assign_sockaddr(client, &xprt->xpt_remote, \
+ xprt->xpt_remotelen); \
+ } while (0)
+
+#define NFSD_TRACE_PROC_RES_FIELDS(r) \
__field(unsigned int, netns_ino) \
__field(u32, xid) \
__field(unsigned long, status) \
- __array(unsigned char, server, sizeof(struct sockaddr_in6)) \
- __array(unsigned char, client, sizeof(struct sockaddr_in6))
+ __sockaddr(server, (r)->rq_xprt->xpt_locallen) \
+ __sockaddr(client, (r)->rq_xprt->xpt_remotelen)
-#define NFSD_TRACE_PROC_RES_ASSIGNMENTS(error) \
+#define NFSD_TRACE_PROC_RES_ASSIGNMENTS(r, error) \
do { \
- __entry->netns_ino = SVC_NET(rqstp)->ns.inum; \
- __entry->xid = be32_to_cpu(rqstp->rq_xid); \
+ struct svc_xprt *xprt = (r)->rq_xprt; \
+ __entry->netns_ino = SVC_NET(r)->ns.inum; \
+ __entry->xid = be32_to_cpu((r)->rq_xid); \
__entry->status = be32_to_cpu(error); \
- memcpy(__entry->server, &rqstp->rq_xprt->xpt_local, \
- rqstp->rq_xprt->xpt_locallen); \
- memcpy(__entry->client, &rqstp->rq_xprt->xpt_remote, \
- rqstp->rq_xprt->xpt_remotelen); \
+ __assign_sockaddr(server, &xprt->xpt_local, \
+ xprt->xpt_locallen); \
+ __assign_sockaddr(client, &xprt->xpt_remote, \
+ xprt->xpt_remotelen); \
} while (0);
DECLARE_EVENT_CLASS(nfsd_xdr_err_class,
@@ -79,7 +98,7 @@ DEFINE_NFSD_XDR_ERR_EVENT(cant_encode);
{ NFSD_MAY_READ, "READ" }, \
{ NFSD_MAY_SATTR, "SATTR" }, \
{ NFSD_MAY_TRUNC, "TRUNC" }, \
- { NFSD_MAY_LOCK, "LOCK" }, \
+ { NFSD_MAY_NLM, "NLM" }, \
{ NFSD_MAY_OWNER_OVERRIDE, "OWNER_OVERRIDE" }, \
{ NFSD_MAY_LOCAL_ACCESS, "LOCAL_ACCESS" }, \
{ NFSD_MAY_BYPASS_GSS_ON_ROOT, "BYPASS_GSS_ON_ROOT" }, \
@@ -145,14 +164,14 @@ TRACE_EVENT(nfsd_compound_decode_err,
),
TP_ARGS(rqstp, args_opcnt, resp_opcnt, opnum, status),
TP_STRUCT__entry(
- NFSD_TRACE_PROC_RES_FIELDS
+ NFSD_TRACE_PROC_RES_FIELDS(rqstp)
__field(u32, args_opcnt)
__field(u32, resp_opcnt)
__field(u32, opnum)
),
TP_fast_assign(
- NFSD_TRACE_PROC_RES_ASSIGNMENTS(status)
+ NFSD_TRACE_PROC_RES_ASSIGNMENTS(rqstp, status)
__entry->args_opcnt = args_opcnt;
__entry->resp_opcnt = resp_opcnt;
@@ -163,7 +182,7 @@ TRACE_EVENT(nfsd_compound_decode_err,
__entry->opnum, __entry->status)
);
-TRACE_EVENT(nfsd_compound_encode_err,
+DECLARE_EVENT_CLASS(nfsd_compound_err_class,
TP_PROTO(
const struct svc_rqst *rqstp,
u32 opnum,
@@ -171,12 +190,12 @@ TRACE_EVENT(nfsd_compound_encode_err,
),
TP_ARGS(rqstp, opnum, status),
TP_STRUCT__entry(
- NFSD_TRACE_PROC_RES_FIELDS
+ NFSD_TRACE_PROC_RES_FIELDS(rqstp)
__field(u32, opnum)
),
TP_fast_assign(
- NFSD_TRACE_PROC_RES_ASSIGNMENTS(status)
+ NFSD_TRACE_PROC_RES_ASSIGNMENTS(rqstp, status)
__entry->opnum = opnum;
),
@@ -184,6 +203,18 @@ TRACE_EVENT(nfsd_compound_encode_err,
__entry->opnum, __entry->status)
);
+#define DEFINE_NFSD_COMPOUND_ERR_EVENT(name) \
+DEFINE_EVENT(nfsd_compound_err_class, nfsd_compound_##name##_err, \
+ TP_PROTO( \
+ const struct svc_rqst *rqstp, \
+ u32 opnum, \
+ __be32 status \
+ ), \
+ TP_ARGS(rqstp, opnum, status))
+
+DEFINE_NFSD_COMPOUND_ERR_EVENT(op);
+DEFINE_NFSD_COMPOUND_ERR_EVENT(encode);
+
#define show_fs_file_type(x) \
__print_symbolic(x, \
{ S_IFLNK, "LNK" }, \
@@ -439,6 +470,8 @@ DEFINE_NFSD_IO_EVENT(write_start);
DEFINE_NFSD_IO_EVENT(write_opened);
DEFINE_NFSD_IO_EVENT(write_io_done);
DEFINE_NFSD_IO_EVENT(write_done);
+DEFINE_NFSD_IO_EVENT(commit_start);
+DEFINE_NFSD_IO_EVENT(commit_done);
DECLARE_EVENT_CLASS(nfsd_err_class,
TP_PROTO(struct svc_rqst *rqstp,
@@ -614,7 +647,6 @@ DEFINE_STATEID_EVENT(open);
DEFINE_STATEID_EVENT(deleg_read);
DEFINE_STATEID_EVENT(deleg_write);
DEFINE_STATEID_EVENT(deleg_return);
-DEFINE_STATEID_EVENT(deleg_recall);
DECLARE_EVENT_CLASS(nfsd_stateseqid_class,
TP_PROTO(u32 seqid, const stateid_t *stp),
@@ -792,6 +824,14 @@ DEFINE_EVENT(nfsd_cs_slot_class, nfsd_##name, \
DEFINE_CS_SLOT_EVENT(slot_seqid_conf);
DEFINE_CS_SLOT_EVENT(slot_seqid_unconf);
+#define show_nfs_slot_flags(val) \
+ __print_flags(val, "|", \
+ { NFSD4_SLOT_INUSE, "INUSE" }, \
+ { NFSD4_SLOT_CACHETHIS, "CACHETHIS" }, \
+ { NFSD4_SLOT_INITIALIZED, "INITIALIZED" }, \
+ { NFSD4_SLOT_CACHED, "CACHED" }, \
+ { NFSD4_SLOT_REUSED, "REUSED" })
+
TRACE_EVENT(nfsd_slot_seqid_sequence,
TP_PROTO(
const struct nfs4_client *clp,
@@ -802,10 +842,11 @@ TRACE_EVENT(nfsd_slot_seqid_sequence,
TP_STRUCT__entry(
__field(u32, seqid)
__field(u32, slot_seqid)
+ __field(u32, slot_index)
+ __field(unsigned long, slot_flags)
__field(u32, cl_boot)
__field(u32, cl_id)
__sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
- __field(bool, in_use)
),
TP_fast_assign(
__entry->cl_boot = clp->cl_clientid.cl_boot;
@@ -814,11 +855,13 @@ TRACE_EVENT(nfsd_slot_seqid_sequence,
clp->cl_cb_conn.cb_addrlen);
__entry->seqid = seq->seqid;
__entry->slot_seqid = slot->sl_seqid;
+ __entry->slot_index = seq->slotid;
+ __entry->slot_flags = slot->sl_flags;
),
- TP_printk("addr=%pISpc client %08x:%08x seqid=%u slot_seqid=%u (%sin use)",
+ TP_printk("addr=%pISpc client %08x:%08x idx=%u seqid=%u slot_seqid=%u flags=%s",
__get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
- __entry->seqid, __entry->slot_seqid,
- __entry->in_use ? "" : "not "
+ __entry->slot_index, __entry->seqid, __entry->slot_seqid,
+ show_nfs_slot_flags(__entry->slot_flags)
)
);
@@ -1028,6 +1071,7 @@ DEFINE_CLID_EVENT(confirmed_r);
{ 1 << NFSD_FILE_HASHED, "HASHED" }, \
{ 1 << NFSD_FILE_PENDING, "PENDING" }, \
{ 1 << NFSD_FILE_REFERENCED, "REFERENCED" }, \
+ { 1 << NFSD_FILE_RECENT, "RECENT" }, \
{ 1 << NFSD_FILE_GC, "GC" })
DECLARE_EVENT_CLASS(nfsd_file_class,
@@ -1306,6 +1350,7 @@ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed);
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use);
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback);
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced);
+DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_aged);
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed);
DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class,
@@ -1335,6 +1380,7 @@ DEFINE_EVENT(nfsd_file_lruwalk_class, name, \
TP_ARGS(removed, remaining))
DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed);
+DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_recent);
DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed);
TRACE_EVENT(nfsd_file_close,
@@ -1591,7 +1637,7 @@ DECLARE_EVENT_CLASS(nfsd_cb_lifetime_class,
__entry->cl_id = clp->cl_clientid.cl_id;
__entry->cb = cb;
__entry->opcode = cb->cb_ops ? cb->cb_ops->opcode : _CB_NULL;
- __entry->need_restart = cb->cb_need_restart;
+ __entry->need_restart = test_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags);
__assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
clp->cl_cb_conn.cb_addrlen)
),
@@ -1685,7 +1731,7 @@ TRACE_EVENT(nfsd_cb_free_slot,
__entry->cl_id = sid->clientid.cl_id;
__entry->seqno = sid->sequence;
__entry->reserved = sid->reserved;
- __entry->slot_seqno = session->se_cb_seq_nr;
+ __entry->slot_seqno = session->se_cb_seq_nr[cb->cb_held_slot];
),
TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
" sessionid=%08x:%08x:%08x:%08x new slot seqno=%u",
@@ -2232,7 +2278,7 @@ TRACE_EVENT(nfsd_copy_done,
)
);
-TRACE_EVENT(nfsd_copy_async_done,
+DECLARE_EVENT_CLASS(nfsd_copy_async_done_class,
TP_PROTO(
const struct nfsd4_copy *copy
),
@@ -2301,6 +2347,268 @@ TRACE_EVENT(nfsd_copy_async_done,
)
);
+#define DEFINE_COPY_ASYNC_DONE_EVENT(name) \
+DEFINE_EVENT(nfsd_copy_async_done_class, \
+ nfsd_copy_async_##name, \
+ TP_PROTO(const struct nfsd4_copy *copy), \
+ TP_ARGS(copy))
+
+DEFINE_COPY_ASYNC_DONE_EVENT(done);
+DEFINE_COPY_ASYNC_DONE_EVENT(cancel);
+
+TRACE_EVENT(nfsd_vfs_setattr,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ const struct iattr *iap,
+ const struct timespec64 *guardtime
+ ),
+ TP_ARGS(rqstp, fhp, iap, guardtime),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ __field(s64, gtime_tv_sec)
+ __field(u32, gtime_tv_nsec)
+ __field(unsigned int, ia_valid)
+ __field(loff_t, ia_size)
+ __field(uid_t, ia_uid)
+ __field(gid_t, ia_gid)
+ __field(umode_t, ia_mode)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __entry->gtime_tv_sec = guardtime ? guardtime->tv_sec : 0;
+ __entry->gtime_tv_nsec = guardtime ? guardtime->tv_nsec : 0;
+ __entry->ia_valid = iap->ia_valid;
+ __entry->ia_size = iap->ia_size;
+ __entry->ia_uid = __kuid_val(iap->ia_uid);
+ __entry->ia_gid = __kgid_val(iap->ia_gid);
+ __entry->ia_mode = iap->ia_mode;
+ ),
+ TP_printk(
+ "xid=0x%08x fh_hash=0x%08x ia_valid=%s ia_size=%llu ia_mode=0%o ia_uid=%u ia_gid=%u guard_time=%lld.%u",
+ __entry->xid, __entry->fh_hash, show_ia_valid_flags(__entry->ia_valid),
+ __entry->ia_size, __entry->ia_mode, __entry->ia_uid, __entry->ia_gid,
+ __entry->gtime_tv_sec, __entry->gtime_tv_nsec
+ )
+)
+
+TRACE_EVENT(nfsd_vfs_lookup,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ const char *name,
+ unsigned int len
+ ),
+ TP_ARGS(rqstp, fhp, name, len),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ __string_len(name, name, len)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __assign_str(name);
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x name=%s",
+ __entry->xid, __entry->fh_hash, __get_str(name)
+ )
+);
+
+TRACE_EVENT(nfsd_vfs_create,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ umode_t type,
+ const char *name,
+ unsigned int len
+ ),
+ TP_ARGS(rqstp, fhp, type, name, len),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ __field(umode_t, type)
+ __string_len(name, name, len)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __entry->type = type;
+ __assign_str(name);
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x type=%s name=%s",
+ __entry->xid, __entry->fh_hash,
+ show_fs_file_type(__entry->type), __get_str(name)
+ )
+);
+
+TRACE_EVENT(nfsd_vfs_symlink,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ const char *name,
+ unsigned int namelen,
+ const char *target
+ ),
+ TP_ARGS(rqstp, fhp, name, namelen, target),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ __string_len(name, name, namelen)
+ __string(target, target)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __assign_str(name);
+ __assign_str(target);
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x name=%s target=%s",
+ __entry->xid, __entry->fh_hash,
+ __get_str(name), __get_str(target)
+ )
+);
+
+TRACE_EVENT(nfsd_vfs_link,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *sfhp,
+ const struct svc_fh *tfhp,
+ const char *name,
+ unsigned int namelen
+ ),
+ TP_ARGS(rqstp, sfhp, tfhp, name, namelen),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, sfh_hash)
+ __field(u32, tfh_hash)
+ __string_len(name, name, namelen)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->sfh_hash = knfsd_fh_hash(&sfhp->fh_handle);
+ __entry->tfh_hash = knfsd_fh_hash(&tfhp->fh_handle);
+ __assign_str(name);
+ ),
+ TP_printk("xid=0x%08x src_fh=0x%08x tgt_fh=0x%08x name=%s",
+ __entry->xid, __entry->sfh_hash, __entry->tfh_hash,
+ __get_str(name)
+ )
+);
+
+TRACE_EVENT(nfsd_vfs_unlink,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ const char *name,
+ unsigned int len
+ ),
+ TP_ARGS(rqstp, fhp, name, len),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ __string_len(name, name, len)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __assign_str(name);
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x name=%s",
+ __entry->xid, __entry->fh_hash,
+ __get_str(name)
+ )
+);
+
+TRACE_EVENT(nfsd_vfs_rename,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *sfhp,
+ const struct svc_fh *tfhp,
+ const char *source,
+ unsigned int sourcelen,
+ const char *target,
+ unsigned int targetlen
+ ),
+ TP_ARGS(rqstp, sfhp, tfhp, source, sourcelen, target, targetlen),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, sfh_hash)
+ __field(u32, tfh_hash)
+ __string_len(source, source, sourcelen)
+ __string_len(target, target, targetlen)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->sfh_hash = knfsd_fh_hash(&sfhp->fh_handle);
+ __entry->tfh_hash = knfsd_fh_hash(&tfhp->fh_handle);
+ __assign_str(source);
+ __assign_str(target);
+ ),
+ TP_printk("xid=0x%08x sfh_hash=0x%08x tfh_hash=0x%08x source=%s target=%s",
+ __entry->xid, __entry->sfh_hash, __entry->tfh_hash,
+ __get_str(source), __get_str(target)
+ )
+);
+
+TRACE_EVENT(nfsd_vfs_readdir,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ u32 count,
+ u64 offset
+ ),
+ TP_ARGS(rqstp, fhp, count, offset),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ __field(u32, count)
+ __field(u64, offset)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __entry->count = count;
+ __entry->offset = offset;
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x offset=%llu count=%u",
+ __entry->xid, __entry->fh_hash,
+ __entry->offset, __entry->count
+ )
+);
+
+DECLARE_EVENT_CLASS(nfsd_vfs_getattr_class,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp
+ ),
+ TP_ARGS(rqstp, fhp),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x",
+ __entry->xid, __entry->fh_hash
+ )
+);
+
+#define DEFINE_NFSD_VFS_GETATTR_EVENT(__name) \
+DEFINE_EVENT(nfsd_vfs_getattr_class, __name, \
+ TP_PROTO( \
+ const struct svc_rqst *rqstp, \
+ const struct svc_fh *fhp \
+ ), \
+ TP_ARGS(rqstp, fhp))
+
+DEFINE_NFSD_VFS_GETATTR_EVENT(nfsd_vfs_getattr);
+DEFINE_NFSD_VFS_GETATTR_EVENT(nfsd_vfs_statfs);
+
#endif /* _NFSD_TRACE_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 22325b590e17..cd689df2ca5d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -31,11 +31,11 @@
#include <linux/exportfs.h>
#include <linux/writeback.h>
#include <linux/security.h>
+#include <linux/sunrpc/xdr.h>
#include "xdr3.h"
#ifdef CONFIG_NFSD_V4
-#include "../internal.h"
#include "acl.h"
#include "idmap.h"
#include "xdr4.h"
@@ -48,6 +48,8 @@
#define NFSDDBG_FACILITY NFSDDBG_FILEOP
+bool nfsd_disable_splice_read __read_mostly;
+
/**
* nfserrno - Map Linux errnos to NFS errnos
* @errno: POSIX(-ish) error code to be mapped
@@ -72,7 +74,6 @@ nfserrno (int errno)
{ nfserr_acces, -EACCES },
{ nfserr_exist, -EEXIST },
{ nfserr_xdev, -EXDEV },
- { nfserr_mlink, -EMLINK },
{ nfserr_nodev, -ENODEV },
{ nfserr_notdir, -ENOTDIR },
{ nfserr_isdir, -EISDIR },
@@ -246,7 +247,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct dentry *dentry;
int host_err;
- dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
+ trace_nfsd_vfs_lookup(rqstp, fhp, name, len);
dparent = fhp->fh_dentry;
exp = exp_get(fhp->fh_export);
@@ -266,7 +267,8 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out_nfserr;
}
} else {
- dentry = lookup_one_len_unlocked(name, dparent, len);
+ dentry = lookup_one_unlocked(&nop_mnt_idmap,
+ &QSTR_LEN(name, len), dparent);
host_err = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_nfserr;
@@ -321,7 +323,7 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry);
if (err)
return err;
- err = check_nfsd_access(exp, rqstp);
+ err = check_nfsd_access(exp, rqstp, false);
if (err)
goto out;
/*
@@ -501,6 +503,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
bool size_change = (iap->ia_valid & ATTR_SIZE);
int retries;
+ trace_nfsd_vfs_setattr(rqstp, fhp, iap, guardtime);
+
if (iap->ia_valid & ATTR_SIZE) {
accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
ftype = S_IFREG;
@@ -861,8 +865,7 @@ int nfsd_open_break_lease(struct inode *inode, int access)
* N.B. After this call fhp needs an fh_put
*/
static int
-__nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
- int may_flags, struct file **filp)
+__nfsd_open(struct svc_fh *fhp, umode_t type, int may_flags, struct file **filp)
{
struct path path;
struct inode *inode;
@@ -903,11 +906,6 @@ __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
goto out;
}
- if (may_flags & NFSD_MAY_64BIT_COOKIE)
- file->f_mode |= FMODE_64BITHASH;
- else
- file->f_mode |= FMODE_32BITHASH;
-
*filp = file;
out:
return host_err;
@@ -930,14 +928,14 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
* directories, but we never have and it doesn't seem to have
* caused anyone a problem. If we were to change this, note
* also that our filldir callbacks would need a variant of
- * lookup_one_len that doesn't check permissions.
+ * lookup_one_positive_unlocked() that doesn't check permissions.
*/
if (type == S_IFREG)
may_flags |= NFSD_MAY_OWNER_OVERRIDE;
retry:
err = fh_verify(rqstp, fhp, type, may_flags);
if (!err) {
- host_err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+ host_err = __nfsd_open(fhp, type, may_flags, filp);
if (host_err == -EOPENSTALE && !retried) {
retried = true;
fh_put(fhp);
@@ -950,7 +948,6 @@ retry:
/**
* nfsd_open_verified - Open a regular file for the filecache
- * @rqstp: RPC request
* @fhp: NFS filehandle of the file to open
* @may_flags: internal permission flags
* @filp: OUT: open "struct file *"
@@ -958,10 +955,9 @@ retry:
* Returns zero on success, or a negative errno value.
*/
int
-nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags,
- struct file **filp)
+nfsd_open_verified(struct svc_fh *fhp, int may_flags, struct file **filp)
{
- return __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp);
+ return __nfsd_open(fhp, S_IFREG, may_flags, filp);
}
/*
@@ -1091,23 +1087,23 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned long v, total;
struct iov_iter iter;
loff_t ppos = offset;
- struct page *page;
ssize_t host_err;
+ size_t len;
v = 0;
total = *count;
while (total) {
- page = *(rqstp->rq_next_page++);
- rqstp->rq_vec[v].iov_base = page_address(page) + base;
- rqstp->rq_vec[v].iov_len = min_t(size_t, total, PAGE_SIZE - base);
- total -= rqstp->rq_vec[v].iov_len;
+ len = min_t(size_t, total, PAGE_SIZE - base);
+ bvec_set_page(&rqstp->rq_bvec[v], *(rqstp->rq_next_page++),
+ len, base);
+ total -= len;
++v;
base = 0;
}
- WARN_ON_ONCE(v > ARRAY_SIZE(rqstp->rq_vec));
+ WARN_ON_ONCE(v > rqstp->rq_maxpages);
trace_nfsd_read_vector(rqstp, fhp, offset, *count);
- iov_iter_kvec(&iter, ITER_DEST, rqstp->rq_vec, v, *count);
+ iov_iter_bvec(&iter, ITER_DEST, rqstp->rq_bvec, v, *count);
host_err = vfs_iter_read(file, &iter, &ppos, 0);
return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
@@ -1149,11 +1145,27 @@ static int wait_for_concurrent_writes(struct file *file)
return err;
}
+/**
+ * nfsd_vfs_write - write data to an already-open file
+ * @rqstp: RPC execution context
+ * @fhp: File handle of file to write into
+ * @nf: An open file matching @fhp
+ * @offset: Byte offset of start
+ * @payload: xdr_buf containing the write payload
+ * @cnt: IN: number of bytes to write, OUT: number of bytes actually written
+ * @stable: An NFS stable_how value
+ * @verf: NFS WRITE verifier
+ *
+ * Upon return, caller must invoke fh_put on @fhp.
+ *
+ * Return values:
+ * An nfsstat value in network byte order.
+ */
__be32
-nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
- loff_t offset, struct kvec *vec, int vlen,
- unsigned long *cnt, int stable,
- __be32 *verf)
+nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct nfsd_file *nf, loff_t offset,
+ const struct xdr_buf *payload, unsigned long *cnt,
+ int stable, __be32 *verf)
{
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct file *file = nf->nf_file;
@@ -1168,6 +1180,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
unsigned int pflags = current->flags;
rwf_t flags = 0;
bool restore_flags = false;
+ unsigned int nvecs;
trace_nfsd_write_opened(rqstp, fhp, offset, *cnt);
@@ -1195,7 +1208,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
if (stable && !fhp->fh_use_wgather)
flags |= RWF_SYNC;
- iov_iter_kvec(&iter, ITER_SOURCE, vec, vlen, *cnt);
+ nvecs = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, payload);
+ iov_iter_bvec(&iter, ITER_SOURCE, rqstp->rq_bvec, nvecs, *cnt);
since = READ_ONCE(file->f_wb_err);
if (verf)
nfsd_copy_write_verifier(verf, nn);
@@ -1246,6 +1260,8 @@ out_nfserr:
*/
bool nfsd_read_splice_ok(struct svc_rqst *rqstp)
{
+ if (nfsd_disable_splice_read)
+ return false;
switch (svc_auth_flavor(rqstp)) {
case RPC_AUTH_GSS_KRB5I:
case RPC_AUTH_GSS_KRB5P:
@@ -1293,14 +1309,24 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
return err;
}
-/*
- * Write data to a file.
- * The stable flag requests synchronous writes.
- * N.B. After this call fhp needs an fh_put
+/**
+ * nfsd_write - open a file and write data to it
+ * @rqstp: RPC execution context
+ * @fhp: File handle of file to write into; nfsd_write() may modify it
+ * @offset: Byte offset of start
+ * @payload: xdr_buf containing the write payload
+ * @cnt: IN: number of bytes to write, OUT: number of bytes actually written
+ * @stable: An NFS stable_how value
+ * @verf: NFS WRITE verifier
+ *
+ * Upon return, caller must invoke fh_put on @fhp.
+ *
+ * Return values:
+ * An nfsstat value in network byte order.
*/
__be32
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
- struct kvec *vec, int vlen, unsigned long *cnt, int stable,
+ const struct xdr_buf *payload, unsigned long *cnt, int stable,
__be32 *verf)
{
struct nfsd_file *nf;
@@ -1312,8 +1338,8 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
if (err)
goto out;
- err = nfsd_vfs_write(rqstp, fhp, nf, offset, vec,
- vlen, cnt, stable, verf);
+ err = nfsd_vfs_write(rqstp, fhp, nf, offset, payload, cnt,
+ stable, verf);
nfsd_file_put(nf);
out:
trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
@@ -1349,6 +1375,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
loff_t start, end;
struct nfsd_net *nn;
+ trace_nfsd_commit_start(rqstp, fhp, offset, count);
+
/*
* Convert the client-provided (offset, count) range to a
* (start, end) range. If the client-provided range falls
@@ -1387,6 +1415,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
} else
nfsd_copy_write_verifier(verf, nn);
+ trace_nfsd_commit_done(rqstp, fhp, offset, count);
return err;
}
@@ -1470,7 +1499,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct inode *dirp;
struct iattr *iap = attrs->na_iattr;
__be32 err;
- int host_err;
+ int host_err = 0;
dentry = fhp->fh_dentry;
dirp = d_inode(dentry);
@@ -1497,28 +1526,15 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
nfsd_check_ignore_resizing(iap);
break;
case S_IFDIR:
- host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode);
- if (!host_err && unlikely(d_unhashed(dchild))) {
- struct dentry *d;
- d = lookup_one_len(dchild->d_name.name,
- dchild->d_parent,
- dchild->d_name.len);
- if (IS_ERR(d)) {
- host_err = PTR_ERR(d);
- break;
- }
- if (unlikely(d_is_negative(d))) {
- dput(d);
- err = nfserr_serverfault;
- goto out;
- }
+ dchild = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode);
+ if (IS_ERR(dchild)) {
+ host_err = PTR_ERR(dchild);
+ } else if (d_is_negative(dchild)) {
+ err = nfserr_serverfault;
+ goto out;
+ } else if (unlikely(dchild != resfhp->fh_dentry)) {
dput(resfhp->fh_dentry);
- resfhp->fh_dentry = dget(d);
- err = fh_update(resfhp);
- dput(dchild);
- dchild = d;
- if (err)
- goto out;
+ resfhp->fh_dentry = dget(dchild);
}
break;
case S_IFCHR:
@@ -1539,7 +1555,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
out:
- dput(dchild);
+ if (!IS_ERR(dchild))
+ dput(dchild);
return err;
out_nfserr:
@@ -1562,6 +1579,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
__be32 err;
int host_err;
+ trace_nfsd_vfs_create(rqstp, fhp, type, fname, flen);
+
if (isdotent(fname, flen))
return nfserr_exist;
@@ -1576,7 +1595,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
return nfserrno(host_err);
inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT);
- dchild = lookup_one_len(fname, dentry, flen);
+ dchild = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry);
host_err = PTR_ERR(dchild);
if (IS_ERR(dchild)) {
err = nfserrno(host_err);
@@ -1662,6 +1681,8 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
__be32 err, cerr;
int host_err;
+ trace_nfsd_vfs_symlink(rqstp, fhp, fname, flen, path);
+
err = nfserr_noent;
if (!flen || path[0] == '\0')
goto out;
@@ -1681,7 +1702,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
dentry = fhp->fh_dentry;
inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT);
- dnew = lookup_one_len(fname, dentry, flen);
+ dnew = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry);
if (IS_ERR(dnew)) {
err = nfserrno(PTR_ERR(dnew));
inode_unlock(dentry->d_inode);
@@ -1708,9 +1729,17 @@ out:
return err;
}
-/*
- * Create a hardlink
- * N.B. After this call _both_ ffhp and tfhp need an fh_put
+/**
+ * nfsd_link - create a link
+ * @rqstp: RPC transaction context
+ * @ffhp: the file handle of the directory where the new link is to be created
+ * @name: the filename of the new link
+ * @len: the length of @name in octets
+ * @tfhp: the file handle of an existing file object
+ *
+ * After this call _both_ ffhp and tfhp need an fh_put.
+ *
+ * Returns a generic NFS status code in network byte-order.
*/
__be32
nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
@@ -1718,9 +1747,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
{
struct dentry *ddir, *dnew, *dold;
struct inode *dirp;
+ int type;
__be32 err;
int host_err;
+ trace_nfsd_vfs_link(rqstp, ffhp, tfhp, name, len);
+
err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE);
if (err)
goto out;
@@ -1737,19 +1769,19 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
if (isdotent(name, len))
goto out;
+ err = nfs_ok;
+ type = d_inode(tfhp->fh_dentry)->i_mode & S_IFMT;
host_err = fh_want_write(tfhp);
- if (host_err) {
- err = nfserrno(host_err);
+ if (host_err)
goto out;
- }
ddir = ffhp->fh_dentry;
dirp = d_inode(ddir);
inode_lock_nested(dirp, I_MUTEX_PARENT);
- dnew = lookup_one_len(name, ddir, len);
+ dnew = lookup_one(&nop_mnt_idmap, &QSTR_LEN(name, len), ddir);
if (IS_ERR(dnew)) {
- err = nfserrno(PTR_ERR(dnew));
+ host_err = PTR_ERR(dnew);
goto out_unlock;
}
@@ -1765,17 +1797,26 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
fh_fill_post_attrs(ffhp);
inode_unlock(dirp);
if (!host_err) {
- err = nfserrno(commit_metadata(ffhp));
- if (!err)
- err = nfserrno(commit_metadata(tfhp));
- } else {
- err = nfserrno(host_err);
+ host_err = commit_metadata(ffhp);
+ if (!host_err)
+ host_err = commit_metadata(tfhp);
}
+
dput(dnew);
out_drop_write:
fh_drop_write(tfhp);
+ if (host_err == -EBUSY) {
+ /*
+ * See RFC 8881 Section 18.9.4 para 1-2: NFSv4 LINK
+ * wants a status unique to the object type.
+ */
+ if (type != S_IFDIR)
+ err = nfserr_file_open;
+ else
+ err = nfserr_acces;
+ }
out:
- return err;
+ return err != nfs_ok ? err : nfserrno(host_err);
out_dput:
dput(dnew);
@@ -1804,9 +1845,19 @@ nfsd_has_cached_files(struct dentry *dentry)
return ret;
}
-/*
- * Rename a file
- * N.B. After this call _both_ ffhp and tfhp need an fh_put
+/**
+ * nfsd_rename - rename a directory entry
+ * @rqstp: RPC transaction context
+ * @ffhp: the file handle of parent directory containing the entry to be renamed
+ * @fname: the filename of directory entry to be renamed
+ * @flen: the length of @fname in octets
+ * @tfhp: the file handle of parent directory to contain the renamed entry
+ * @tname: the filename of the new entry
+ * @tlen: the length of @tlen in octets
+ *
+ * After this call _both_ ffhp and tfhp need an fh_put.
+ *
+ * Returns a generic NFS status code in network byte-order.
*/
__be32
nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
@@ -1814,10 +1865,13 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
{
struct dentry *fdentry, *tdentry, *odentry, *ndentry, *trap;
struct inode *fdir, *tdir;
+ int type = S_IFDIR;
__be32 err;
int host_err;
bool close_cached = false;
+ trace_nfsd_vfs_rename(rqstp, ffhp, tfhp, fname, flen, tname, tlen);
+
err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
if (err)
goto out;
@@ -1860,7 +1914,7 @@ retry:
if (err != nfs_ok)
goto out_unlock;
- odentry = lookup_one_len(fname, fdentry, flen);
+ odentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), fdentry);
host_err = PTR_ERR(odentry);
if (IS_ERR(odentry))
goto out_nfserr;
@@ -1871,11 +1925,14 @@ retry:
host_err = -EINVAL;
if (odentry == trap)
goto out_dput_old;
+ type = d_inode(odentry)->i_mode & S_IFMT;
- ndentry = lookup_one_len(tname, tdentry, tlen);
+ ndentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(tname, tlen), tdentry);
host_err = PTR_ERR(ndentry);
if (IS_ERR(ndentry))
goto out_dput_old;
+ if (d_inode(ndentry))
+ type = d_inode(ndentry)->i_mode & S_IFMT;
host_err = -ENOTEMPTY;
if (ndentry == trap)
goto out_dput_new;
@@ -1913,7 +1970,18 @@ retry:
out_dput_old:
dput(odentry);
out_nfserr:
- err = nfserrno(host_err);
+ if (host_err == -EBUSY) {
+ /*
+ * See RFC 8881 Section 18.26.4 para 1-3: NFSv4 RENAME
+ * wants a status unique to the object type.
+ */
+ if (type != S_IFDIR)
+ err = nfserr_file_open;
+ else
+ err = nfserr_acces;
+ } else {
+ err = nfserrno(host_err);
+ }
if (!close_cached) {
fh_fill_post_attrs(ffhp);
@@ -1940,9 +2008,17 @@ out:
return err;
}
-/*
- * Unlink a file or directory
- * N.B. After this call fhp needs an fh_put
+/**
+ * nfsd_unlink - remove a directory entry
+ * @rqstp: RPC transaction context
+ * @fhp: the file handle of the parent directory to be modified
+ * @type: enforced file type of the object to be removed
+ * @fname: the name of directory entry to be removed
+ * @flen: length of @fname in octets
+ *
+ * After this call fhp needs an fh_put.
+ *
+ * Returns a generic NFS status code in network byte-order.
*/
__be32
nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
@@ -1954,6 +2030,8 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
__be32 err;
int host_err;
+ trace_nfsd_vfs_unlink(rqstp, fhp, fname, flen);
+
err = nfserr_acces;
if (!flen || isdotent(fname, flen))
goto out;
@@ -1969,7 +2047,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
dirp = d_inode(dentry);
inode_lock_nested(dirp, I_MUTEX_PARENT);
- rdentry = lookup_one_len(fname, dentry, flen);
+ rdentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry);
host_err = PTR_ERR(rdentry);
if (IS_ERR(rdentry))
goto out_unlock;
@@ -2016,15 +2094,17 @@ out_drop_write:
fh_drop_write(fhp);
out_nfserr:
if (host_err == -EBUSY) {
- /* name is mounted-on. There is no perfect
- * error status.
+ /*
+ * See RFC 8881 Section 18.25.4 para 4: NFSv4 REMOVE
+ * wants a status unique to the object type.
*/
- err = nfserr_file_open;
- } else {
- err = nfserrno(host_err);
+ if (type != S_IFDIR)
+ err = nfserr_file_open;
+ else
+ err = nfserr_acces;
}
out:
- return err;
+ return err != nfs_ok ? err : nfserrno(host_err);
out_unlock:
inode_unlock(dirp);
goto out_drop_write;
@@ -2174,13 +2254,15 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
loff_t offset = *offsetp;
int may_flags = NFSD_MAY_READ;
- if (fhp->fh_64bit_cookies)
- may_flags |= NFSD_MAY_64BIT_COOKIE;
-
err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file);
if (err)
goto out;
+ if (fhp->fh_64bit_cookies)
+ file->f_mode |= FMODE_64BITHASH;
+ else
+ file->f_mode |= FMODE_32BITHASH;
+
offset = vfs_llseek(file, offset, SEEK_SET);
if (offset < 0) {
err = nfserrno((int)offset);
@@ -2238,6 +2320,8 @@ nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, in
{
__be32 err;
+ trace_nfsd_vfs_statfs(rqstp, fhp);
+
err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access);
if (!err) {
struct path path = {
@@ -2512,7 +2596,7 @@ nfsd_permission(struct svc_cred *cred, struct svc_export *exp,
(acc & NFSD_MAY_EXEC)? " exec" : "",
(acc & NFSD_MAY_SATTR)? " sattr" : "",
(acc & NFSD_MAY_TRUNC)? " trunc" : "",
- (acc & NFSD_MAY_LOCK)? " lock" : "",
+ (acc & NFSD_MAY_NLM)? " nlm" : "",
(acc & NFSD_MAY_OWNER_OVERRIDE)? " owneroverride" : "",
inode->i_mode,
IS_IMMUTABLE(inode)? " immut" : "",
@@ -2537,16 +2621,6 @@ nfsd_permission(struct svc_cred *cred, struct svc_export *exp,
if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode))
return nfserr_perm;
- if (acc & NFSD_MAY_LOCK) {
- /* If we cannot rely on authentication in NLM requests,
- * just allow locks, otherwise require read permission, or
- * ownership
- */
- if (exp->ex_flags & NFSEXP_NOAUTHNLM)
- return 0;
- else
- acc = NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE;
- }
/*
* The file owner always gets access permission for accesses that
* would normally be checked at open time. This is to make
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 3ff146522556..eff04959606f 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -20,7 +20,7 @@
#define NFSD_MAY_READ 0x004 /* == MAY_READ */
#define NFSD_MAY_SATTR 0x008
#define NFSD_MAY_TRUNC 0x010
-#define NFSD_MAY_LOCK 0x020
+#define NFSD_MAY_NLM 0x020 /* request is from lockd */
#define NFSD_MAY_MASK 0x03f
/* extra hints to permission and open routines: */
@@ -114,8 +114,8 @@ __be32 nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
int nfsd_open_break_lease(struct inode *, int);
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
-int nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp,
- int may_flags, struct file **filp);
+int nfsd_open_verified(struct svc_fh *fhp, int may_flags,
+ struct file **filp);
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
unsigned long *count,
@@ -128,13 +128,13 @@ bool nfsd_read_splice_ok(struct svc_rqst *rqstp);
__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, unsigned long *count,
u32 *eof);
-__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
- struct kvec *, int, unsigned long *,
- int stable, __be32 *verf);
+__be32 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ loff_t offset, const struct xdr_buf *payload,
+ unsigned long *cnt, int stable, __be32 *verf);
__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct nfsd_file *nf, loff_t offset,
- struct kvec *vec, int vlen, unsigned long *cnt,
- int stable, __be32 *verf);
+ const struct xdr_buf *payload,
+ unsigned long *cnt, int stable, __be32 *verf);
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *);
__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 2a21a7662e03..aa2a356da784 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -567,6 +567,7 @@ struct nfsd4_exchange_id {
struct xdr_netobj nii_domain;
struct xdr_netobj nii_name;
struct timespec64 nii_time;
+ char *server_impl_name;
};
struct nfsd4_sequence {
@@ -575,9 +576,7 @@ struct nfsd4_sequence {
u32 slotid; /* request/response */
u32 maxslots; /* request/response */
u32 cachethis; /* request */
-#if 0
u32 target_maxslots; /* response */
-#endif /* not yet */
u32 status_flags; /* response */
};
@@ -675,7 +674,12 @@ struct nfsd4_cb_offload {
struct nfsd4_callback co_cb;
struct nfsd42_write_res co_res;
__be32 co_nfserr;
+ unsigned int co_retries;
struct knfsd_fh co_fh;
+
+ struct nfs4_sessionid co_referring_sessionid;
+ u32 co_referring_slotid;
+ u32 co_referring_seqno;
};
struct nfsd4_copy {
@@ -693,12 +697,16 @@ struct nfsd4_copy {
#define NFSD4_COPY_F_SYNCHRONOUS (2)
#define NFSD4_COPY_F_COMMITTED (3)
#define NFSD4_COPY_F_COMPLETED (4)
+#define NFSD4_COPY_F_OFFLOAD_DONE (5)
/* response */
__be32 nfserr;
struct nfsd42_write_res cp_res;
struct knfsd_fh fh;
+ /* offload callback */
+ struct nfsd4_cb_offload cp_cb_offload;
+
struct nfs4_client *cp_clp;
struct nfsd_file *nf_src;
@@ -709,6 +717,7 @@ struct nfsd4_copy {
struct list_head copies;
struct task_struct *copy_task;
refcount_t refcount;
+ unsigned int cp_ttl;
struct nfsd4_ssc_umount_item *ss_nsui;
struct nfs_fh c_fh;
@@ -930,6 +939,7 @@ extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, union nfsd4_op_u *u);
extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, union nfsd4_op_u *u);
+void nfsd4_exchange_id_release(union nfsd4_op_u *u);
extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, union nfsd4_op_u *u);
extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *,
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index e8b00309c449..f4e29c0c701c 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -6,8 +6,11 @@
#define cb_compound_enc_hdr_sz 4
#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2))
#define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2)
+#define enc_referring_call4_sz (1 + 1)
+#define enc_referring_call_list4_sz (sessionid_sz + 1 + \
+ enc_referring_call4_sz)
#define cb_sequence_enc_sz (sessionid_sz + 4 + \
- 1 /* no referring calls list yet */)
+ enc_referring_call_list4_sz)
#define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4)
#define op_enc_sz 1
@@ -59,16 +62,20 @@
* 1: CB_GETATTR opcode (32-bit)
* N: file_handle
* 1: number of entry in attribute array (32-bit)
- * 1: entry 0 in attribute array (32-bit)
+ * 3: entry 0-2 in attribute array (32-bit * 3)
*/
#define NFS4_enc_cb_getattr_sz (cb_compound_enc_hdr_sz + \
cb_sequence_enc_sz + \
- 1 + enc_nfs4_fh_sz + 1 + 1)
+ 1 + enc_nfs4_fh_sz + 1 + 3)
/*
* 4: fattr_bitmap_maxsz
* 1: attribute array len
* 2: change attr (64-bit)
* 2: size (64-bit)
+ * 2: atime.seconds (64-bit)
+ * 1: atime.nanoseconds (32-bit)
+ * 2: mtime.seconds (64-bit)
+ * 1: mtime.nanoseconds (32-bit)
*/
#define NFS4_dec_cb_getattr_sz (cb_compound_dec_hdr_sz + \
- cb_sequence_dec_sz + 4 + 1 + 2 + 2 + op_dec_sz)
+ cb_sequence_dec_sz + 4 + 1 + 2 + 2 + 2 + 1 + 2 + 1 + op_dec_sz)