diff options
Diffstat (limited to 'fs/nfsd')
45 files changed, 5997 insertions, 2748 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 43b88eaf0673..9eb2e795c43c 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -4,9 +4,11 @@ config NFSD depends on INET depends on FILE_LOCKING depends on FSNOTIFY + select CRC32 select LOCKD select SUNRPC select EXPORTFS + select NFS_COMMON select NFS_ACL_SUPPORT if NFSD_V2_ACL select NFS_ACL_SUPPORT if NFSD_V3_ACL depends on MULTIUSER @@ -158,3 +160,19 @@ config NFSD_V4_SECURITY_LABEL If you do not wish to enable fine-grained security labels SELinux or Smack policies on NFSv4 files, say N. + +config NFSD_LEGACY_CLIENT_TRACKING + bool "Support legacy NFSv4 client tracking methods (DEPRECATED)" + depends on NFSD_V4 + default y + help + The NFSv4 server needs to store a small amount of information on + stable storage in order to handle state recovery after reboot. Most + modern deployments upcall to a userland daemon for this (nfsdcld), + but older NFS servers may store information directly in a + recoverydir, or spawn a process directly using a usermodehelper + upcall. + + These legacy client tracking methods have proven to be probelmatic + and will be removed in the future. Say Y here if you need support + for them in the interim. diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 6fffc8f03f74..18cbd3fa7691 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -12,7 +12,8 @@ nfsd-y += trace.o nfsd-y += nfssvc.o nfsctl.o nfsfh.o vfs.o \ export.o auth.o lockd.o nfscache.o \ - stats.o filecache.o nfs3proc.o nfs3xdr.o + stats.o filecache.o nfs3proc.o nfs3xdr.o \ + netlink.o nfsd-$(CONFIG_NFSD_V2) += nfsproc.o nfsxdr.o nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o @@ -22,3 +23,4 @@ nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o nfsd-$(CONFIG_NFSD_FLEXFILELAYOUT) += flexfilelayout.o flexfilelayoutxdr.o +nfsd-$(CONFIG_NFS_LOCALIO) += localio.o diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index e6beaaf4f170..93e33d1ee891 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -5,26 +5,26 @@ #include "nfsd.h" #include "auth.h" -int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) +int nfsexp_flags(struct svc_cred *cred, struct svc_export *exp) { struct exp_flavor_info *f; struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; for (f = exp->ex_flavors; f < end; f++) { - if (f->pseudoflavor == rqstp->rq_cred.cr_flavor) + if (f->pseudoflavor == cred->cr_flavor) return f->flags; } return exp->ex_flags; } -int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) +int nfsd_setuser(struct svc_cred *cred, struct svc_export *exp) { struct group_info *rqgi; struct group_info *gi; struct cred *new; int i; - int flags = nfsexp_flags(rqstp, exp); + int flags = nfsexp_flags(cred, exp); /* discard any old override before preparing the new set */ revert_creds(get_cred(current_real_cred())); @@ -32,10 +32,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) if (!new) return -ENOMEM; - new->fsuid = rqstp->rq_cred.cr_uid; - new->fsgid = rqstp->rq_cred.cr_gid; + new->fsuid = cred->cr_uid; + new->fsgid = cred->cr_gid; - rqgi = rqstp->rq_cred.cr_group_info; + rqgi = cred->cr_group_info; if (flags & NFSEXP_ALLSQUASH) { new->fsuid = exp->ex_anon_uid; diff --git a/fs/nfsd/auth.h b/fs/nfsd/auth.h index dbd66424f600..8c5031bbbcee 100644 --- a/fs/nfsd/auth.h +++ b/fs/nfsd/auth.h @@ -12,6 +12,6 @@ * Set the current process's fsuid/fsgid etc to those of the NFS * client user */ -int nfsd_setuser(struct svc_rqst *, struct svc_export *); +int nfsd_setuser(struct svc_cred *cred, struct svc_export *exp); #endif /* LINUX_NFSD_AUTH_H */ diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 01d7fd108cf3..08a20e5bcf7f 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -117,12 +117,13 @@ static __be32 nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp, struct iomap *iomaps, int nr_iomaps) { + struct timespec64 mtime = inode_get_mtime(inode); loff_t new_size = lcp->lc_last_wr + 1; struct iattr iattr = { .ia_valid = 0 }; int error; if (lcp->lc_mtime.tv_nsec == UTIME_NOW || - timespec64_compare(&lcp->lc_mtime, &inode->i_mtime) < 0) + timespec64_compare(&lcp->lc_mtime, &mtime) < 0) lcp->lc_mtime = current_time(inode); iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME; iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime; @@ -146,8 +147,7 @@ nfsd4_block_get_device_info_simple(struct super_block *sb, struct pnfs_block_deviceaddr *dev; struct pnfs_block_volume *b; - dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) + - sizeof(struct pnfs_block_volume), GFP_KERNEL); + dev = kzalloc(struct_size(dev, volumes, 1), GFP_KERNEL); if (!dev) return -ENOMEM; gdp->gd_device = dev; @@ -254,8 +254,7 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb, const struct pr_ops *ops; int ret; - dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) + - sizeof(struct pnfs_block_volume), GFP_KERNEL); + dev = kzalloc(struct_size(dev, volumes, 1), GFP_KERNEL); if (!dev) return -ENOMEM; gdp->gd_device = dev; @@ -327,10 +326,10 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode, } static void -nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls) +nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file) { struct nfs4_client *clp = ls->ls_stid.sc_client; - struct block_device *bdev = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_bdev; + struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev; bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY, nfsd4_scsi_pr_key(clp), 0, true); diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c index 1ed2f691ebb9..ce78f74715ee 100644 --- a/fs/nfsd/blocklayoutxdr.c +++ b/fs/nfsd/blocklayoutxdr.c @@ -16,9 +16,9 @@ __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr, - struct nfsd4_layoutget *lgp) + const struct nfsd4_layoutget *lgp) { - struct pnfs_block_extent *b = lgp->lg_content; + const struct pnfs_block_extent *b = lgp->lg_content; int len = sizeof(__be32) + 5 * sizeof(__be64) + sizeof(__be32); __be32 *p; @@ -77,7 +77,7 @@ nfsd4_block_encode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) __be32 nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr, - struct nfsd4_getdeviceinfo *gdp) + const struct nfsd4_getdeviceinfo *gdp) { struct pnfs_block_deviceaddr *dev = gdp->gd_device; int len = sizeof(__be32), ret, i; diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h index bc5166bfe46b..4e28ac8f1127 100644 --- a/fs/nfsd/blocklayoutxdr.h +++ b/fs/nfsd/blocklayoutxdr.h @@ -47,13 +47,13 @@ struct pnfs_block_volume { struct pnfs_block_deviceaddr { u32 nr_volumes; - struct pnfs_block_volume volumes[]; + struct pnfs_block_volume volumes[] __counted_by(nr_volumes); }; __be32 nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr, - struct nfsd4_getdeviceinfo *gdp); + const struct nfsd4_getdeviceinfo *gdp); __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr, - struct nfsd4_layoutget *lgp); + const struct nfsd4_layoutget *lgp); int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, u32 block_size); int nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 66a05fefae98..bb7addef4a31 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -10,7 +10,7 @@ #define NFSCACHE_H #include <linux/sunrpc/svc.h> -#include "netns.h" +#include "nfsd.h" /* * Representation of a reply cache entry. diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index d4d3ec58047e..49aede376d86 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -40,24 +40,15 @@ #define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS) #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) -static void expkey_put_work(struct work_struct *work) +static void expkey_put(struct kref *ref) { - struct svc_expkey *key = - container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work); + struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); if (test_bit(CACHE_VALID, &key->h.flags) && !test_bit(CACHE_NEGATIVE, &key->h.flags)) path_put(&key->ek_path); auth_domain_put(key->ek_client); - kfree(key); -} - -static void expkey_put(struct kref *ref) -{ - struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); - - INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work); - queue_rcu_work(system_wq, &key->ek_rcu_work); + kfree_rcu(key, ek_rcu); } static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) @@ -343,43 +334,37 @@ static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc) static int export_stats_init(struct export_stats *stats) { stats->start_time = ktime_get_seconds(); - return nfsd_percpu_counters_init(stats->counter, EXP_STATS_COUNTERS_NUM); + return percpu_counter_init_many(stats->counter, 0, GFP_KERNEL, + EXP_STATS_COUNTERS_NUM); } static void export_stats_reset(struct export_stats *stats) { - if (stats) - nfsd_percpu_counters_reset(stats->counter, - EXP_STATS_COUNTERS_NUM); + if (stats) { + int i; + + for (i = 0; i < EXP_STATS_COUNTERS_NUM; i++) + percpu_counter_set(&stats->counter[i], 0); + } } static void export_stats_destroy(struct export_stats *stats) { if (stats) - nfsd_percpu_counters_destroy(stats->counter, - EXP_STATS_COUNTERS_NUM); + percpu_counter_destroy_many(stats->counter, + EXP_STATS_COUNTERS_NUM); } -static void svc_export_put_work(struct work_struct *work) +static void svc_export_put(struct kref *ref) { - struct svc_export *exp = - container_of(to_rcu_work(work), struct svc_export, ex_rcu_work); - + struct svc_export *exp = container_of(ref, struct svc_export, h.ref); path_put(&exp->ex_path); auth_domain_put(exp->ex_client); nfsd4_fslocs_free(&exp->ex_fslocs); export_stats_destroy(exp->ex_stats); kfree(exp->ex_stats); kfree(exp->ex_uuid); - kfree(exp); -} - -static void svc_export_put(struct kref *ref) -{ - struct svc_export *exp = container_of(ref, struct svc_export, h.ref); - - INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work); - queue_rcu_work(system_wq, &exp->ex_rcu_work); + kfree_rcu(exp, ex_rcu); } static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) @@ -445,8 +430,7 @@ static int check_export(struct path *path, int *flags, unsigned char *uuid) return -EINVAL; } - if (!inode->i_sb->s_export_op || - !inode->i_sb->s_export_op->fh_to_dentry) { + if (!exportfs_can_decode_fh(inode->i_sb->s_export_op)) { dprintk("exp_export: export of invalid fs type.\n"); return -EINVAL; } @@ -1090,10 +1074,30 @@ static struct svc_export *exp_find(struct cache_detail *cd, return exp; } +/** + * check_nfsd_access - check if access to export is allowed. + * @exp: svc_export that is being accessed. + * @rqstp: svc_rqst attempting to access @exp (will be NULL for LOCALIO). + * + * Return values: + * %nfs_ok if access is granted, or + * %nfserr_wrongsec if access is denied + */ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) { struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors; - struct svc_xprt *xprt = rqstp->rq_xprt; + struct svc_xprt *xprt; + + /* + * If rqstp is NULL, this is a LOCALIO request which will only + * ever use a filehandle/credential pair for which access has + * been affirmed (by ACCESS or OPEN NFS requests) over the + * wire. So there is no need for further checks here. + */ + if (!rqstp) + return nfs_ok; + + xprt = rqstp->rq_xprt; if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_NONE) { if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) @@ -1114,17 +1118,17 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) ok: /* legacy gss-only clients are always OK: */ if (exp->ex_client == rqstp->rq_gssclient) - return 0; + return nfs_ok; /* ip-address based client; check sec= export option: */ for (f = exp->ex_flavors; f < end; f++) { if (f->pseudoflavor == rqstp->rq_cred.cr_flavor) - return 0; + return nfs_ok; } /* defaults in absence of sec= options: */ if (exp->ex_nflavors == 0) { if (rqstp->rq_cred.cr_flavor == RPC_AUTH_NULL || rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX) - return 0; + return nfs_ok; } /* If the compound op contains a spo_must_allowed op, @@ -1134,10 +1138,10 @@ ok: */ if (nfsd4_spo_must_allow(rqstp)) - return 0; + return nfs_ok; denied: - return rqstp->rq_vers < 4 ? nfserr_acces : nfserr_wrongsec; + return nfserr_wrongsec; } /* @@ -1180,19 +1184,35 @@ gss: return gssexp; } +/** + * rqst_exp_find - Find an svc_export in the context of a rqst or similar + * @reqp: The handle to be used to suspend the request if a cache-upcall is needed + * If NULL, missing in-cache information will result in failure. + * @net: The network namespace in which the request exists + * @cl: default auth_domain to use for looking up the export + * @gsscl: an alternate auth_domain defined using deprecated gss/krb5 format. + * @fsid_type: The type of fsid to look for + * @fsidv: The actual fsid to look up in the context of either client. + * + * Perform a lookup for @cl/@fsidv in the given @net for an export. If + * none found and @gsscl specified, repeat the lookup. + * + * Returns an export, or an error pointer. + */ struct svc_export * -rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv) +rqst_exp_find(struct cache_req *reqp, struct net *net, + struct auth_domain *cl, struct auth_domain *gsscl, + int fsid_type, u32 *fsidv) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT); - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct cache_detail *cd = nn->svc_export_cache; - if (rqstp->rq_client == NULL) + if (!cl) goto gss; /* First try the auth_unix client: */ - exp = exp_find(cd, rqstp->rq_client, fsid_type, - fsidv, &rqstp->rq_chandle); + exp = exp_find(cd, cl, fsid_type, fsidv, reqp); if (PTR_ERR(exp) == -ENOENT) goto gss; if (IS_ERR(exp)) @@ -1202,10 +1222,9 @@ rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv) return exp; gss: /* Otherwise, try falling back on gss client */ - if (rqstp->rq_gssclient == NULL) + if (!gsscl) return exp; - gssexp = exp_find(cd, rqstp->rq_gssclient, fsid_type, fsidv, - &rqstp->rq_chandle); + gssexp = exp_find(cd, gsscl, fsid_type, fsidv, reqp); if (PTR_ERR(gssexp) == -ENOENT) return exp; if (!IS_ERR(exp)) @@ -1236,7 +1255,9 @@ struct svc_export *rqst_find_fsidzero_export(struct svc_rqst *rqstp) mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); - return rqst_exp_find(rqstp, FSID_NUM, fsidv); + return rqst_exp_find(&rqstp->rq_chandle, SVC_NET(rqstp), + rqstp->rq_client, rqstp->rq_gssclient, + FSID_NUM, fsidv); } /* diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index 9d895570ceba..3794ae253a70 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -75,7 +75,7 @@ struct svc_export { u32 ex_layout_types; struct nfsd4_deviceid_map *ex_devid_map; struct cache_detail *cd; - struct rcu_work ex_rcu_work; + struct rcu_head ex_rcu; unsigned long ex_xprtsec_modes; struct export_stats *ex_stats; }; @@ -92,14 +92,15 @@ struct svc_expkey { u32 ek_fsid[6]; struct path ek_path; - struct rcu_work ek_rcu_work; + struct rcu_head ek_rcu; }; #define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC)) #define EX_NOHIDE(exp) ((exp)->ex_flags & NFSEXP_NOHIDE) #define EX_WGATHER(exp) ((exp)->ex_flags & NFSEXP_GATHERED_WRITES) -int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp); +struct svc_cred; +int nfsexp_flags(struct svc_cred *cred, struct svc_export *exp); __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); /* @@ -127,6 +128,8 @@ static inline struct svc_export *exp_get(struct svc_export *exp) cache_get(&exp->h); return exp; } -struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *); +struct svc_export *rqst_exp_find(struct cache_req *reqp, struct net *net, + struct auth_domain *cl, struct auth_domain *gsscl, + int fsid_type, u32 *fsidv); #endif /* NFSD_EXPORT_H */ diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 6f2bcbfde45e..d19968881855 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -52,22 +52,20 @@ #define NFSD_FILE_CACHE_UP (0) /* We only care about NFSD_MAY_READ/WRITE for this cache */ -#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) +#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE|NFSD_MAY_LOCALIO) static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); +static DEFINE_PER_CPU(unsigned long, nfsd_file_allocations); static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); struct nfsd_fcache_disposal { - struct work_struct work; spinlock_t lock; struct list_head freeme; }; -static struct workqueue_struct *nfsd_filecache_wq __read_mostly; - static struct kmem_cache *nfsd_file_slab; static struct kmem_cache *nfsd_file_mark_slab; static struct list_lru nfsd_file_lru; @@ -114,7 +112,7 @@ static void nfsd_file_schedule_laundrette(void) { if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags)) - queue_delayed_work(system_wq, &nfsd_filecache_laundrette, + queue_delayed_work(system_unbound_wq, &nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY); } @@ -154,7 +152,7 @@ nfsd_file_mark_put(struct nfsd_file_mark *nfm) } static struct nfsd_file_mark * -nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) +nfsd_file_mark_find_or_create(struct inode *inode) { int err; struct fsnotify_mark *mark; @@ -162,8 +160,8 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) do { fsnotify_group_lock(nfsd_file_fsnotify_group); - mark = fsnotify_find_mark(&inode->i_fsnotify_marks, - nfsd_file_fsnotify_group); + mark = fsnotify_find_inode_mark(inode, + nfsd_file_fsnotify_group); if (mark) { nfm = nfsd_file_mark_get(container_of(mark, struct nfsd_file_mark, @@ -218,7 +216,9 @@ nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need, if (unlikely(!nf)) return NULL; + this_cpu_inc(nfsd_file_allocations); INIT_LIST_HEAD(&nf->nf_lru); + INIT_LIST_HEAD(&nf->nf_gc); nf->nf_birthtime = ktime_get(); nf->nf_file = NULL; nf->nf_cred = get_current_cred(); @@ -283,7 +283,7 @@ nfsd_file_free(struct nfsd_file *nf) nfsd_file_mark_put(nf->nf_mark); if (nf->nf_file) { nfsd_file_check_write_error(nf); - filp_close(nf->nf_file, NULL); + nfsd_filp_close(nf->nf_file); } /* @@ -322,7 +322,7 @@ nfsd_file_check_writeback(struct nfsd_file *nf) static bool nfsd_file_lru_add(struct nfsd_file *nf) { set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) { + if (list_lru_add_obj(&nfsd_file_lru, &nf->nf_lru)) { trace_nfsd_file_lru_add(nf); return true; } @@ -331,7 +331,7 @@ static bool nfsd_file_lru_add(struct nfsd_file *nf) static bool nfsd_file_lru_remove(struct nfsd_file *nf) { - if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) { + if (list_lru_del_obj(&nfsd_file_lru, &nf->nf_lru)) { trace_nfsd_file_lru_del(nf); return true; } @@ -390,14 +390,42 @@ nfsd_file_put(struct nfsd_file *nf) nfsd_file_free(nf); } +/** + * nfsd_file_put_local - put nfsd_file reference and arm nfsd_serv_put in caller + * @nf: nfsd_file of which to put the reference + * + * First save the associated net to return to caller, then put + * the reference of the nfsd_file. + */ +struct net * +nfsd_file_put_local(struct nfsd_file *nf) +{ + struct net *net = nf->nf_net; + + nfsd_file_put(nf); + return net; +} + +/** + * nfsd_file_file - get the backing file of an nfsd_file + * @nf: nfsd_file of which to access the backing file. + * + * Return backing file for @nf. + */ +struct file * +nfsd_file_file(struct nfsd_file *nf) +{ + return nf->nf_file; +} + static void nfsd_file_dispose_list(struct list_head *dispose) { struct nfsd_file *nf; while (!list_empty(dispose)) { - nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); + nf = list_first_entry(dispose, struct nfsd_file, nf_gc); + list_del_init(&nf->nf_gc); nfsd_file_free(nf); } } @@ -414,14 +442,53 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose) { while(!list_empty(dispose)) { struct nfsd_file *nf = list_first_entry(dispose, - struct nfsd_file, nf_lru); + struct nfsd_file, nf_gc); struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id); struct nfsd_fcache_disposal *l = nn->fcache_disposal; + struct svc_serv *serv; + + spin_lock(&l->lock); + list_move_tail(&nf->nf_gc, &l->freeme); + spin_unlock(&l->lock); + + /* + * The filecache laundrette is shut down after the + * nn->nfsd_serv pointer is cleared, but before the + * svc_serv is freed. + */ + serv = nn->nfsd_serv; + if (serv) + svc_wake_up(serv); + } +} + +/** + * nfsd_file_net_dispose - deal with nfsd_files waiting to be disposed. + * @nn: nfsd_net in which to find files to be disposed. + * + * When files held open for nfsv3 are removed from the filecache, whether + * due to memory pressure or garbage collection, they are queued to + * a per-net-ns queue. This function completes the disposal, either + * directly or by waking another nfsd thread to help with the work. + */ +void nfsd_file_net_dispose(struct nfsd_net *nn) +{ + struct nfsd_fcache_disposal *l = nn->fcache_disposal; + + if (!list_empty(&l->freeme)) { + LIST_HEAD(dispose); + int i; spin_lock(&l->lock); - list_move_tail(&nf->nf_lru, &l->freeme); + for (i = 0; i < 8 && !list_empty(&l->freeme); i++) + list_move(l->freeme.next, &dispose); spin_unlock(&l->lock); - queue_work(nfsd_filecache_wq, &l->work); + if (!list_empty(&l->freeme)) + /* Wake up another thread to share the work + * *before* doing any actual disposing. + */ + svc_wake_up(nn->nfsd_serv); + nfsd_file_dispose_list(&dispose); } } @@ -476,7 +543,8 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, /* Refcount went to zero. Unhash it and queue it to the dispose list */ nfsd_file_unhash(nf); - list_lru_isolate_move(lru, &nf->nf_lru, head); + list_lru_isolate(lru, &nf->nf_lru); + list_add(&nf->nf_gc, head); this_cpu_inc(nfsd_file_evictions); trace_nfsd_file_gc_disposed(nf); return LRU_REMOVED; @@ -521,11 +589,7 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) return ret; } -static struct shrinker nfsd_file_shrinker = { - .scan_objects = nfsd_file_lru_scan, - .count_objects = nfsd_file_lru_count, - .seeks = 1, -}; +static struct shrinker *nfsd_file_shrinker; /** * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file @@ -555,7 +619,7 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) /* If refcount goes to 0, then put on the dispose list */ if (refcount_sub_and_test(decrement, &nf->nf_ref)) { - list_add(&nf->nf_lru, dispose); + list_add(&nf->nf_gc, dispose); trace_nfsd_file_closing(nf); } } @@ -631,43 +695,21 @@ nfsd_file_close_inode_sync(struct inode *inode) nfsd_file_queue_for_close(inode, &dispose); while (!list_empty(&dispose)) { - nf = list_first_entry(&dispose, struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); + nf = list_first_entry(&dispose, struct nfsd_file, nf_gc); + list_del_init(&nf->nf_gc); nfsd_file_free(nf); } - flush_delayed_fput(); -} - -/** - * nfsd_file_delayed_close - close unused nfsd_files - * @work: dummy - * - * Scrape the freeme list for this nfsd_net, and then dispose of them - * all. - */ -static void -nfsd_file_delayed_close(struct work_struct *work) -{ - LIST_HEAD(head); - struct nfsd_fcache_disposal *l = container_of(work, - struct nfsd_fcache_disposal, work); - - spin_lock(&l->lock); - list_splice_init(&l->freeme, &head); - spin_unlock(&l->lock); - - nfsd_file_dispose_list(&head); } static int nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, void *data) { - struct file_lock *fl = data; + struct file_lease *fl = data; /* Only close files for F_SETLEASE leases */ - if (fl->fl_flags & FL_LEASE) - nfsd_file_close_inode(file_inode(fl->fl_file)); + if (fl->c.flc_flags & FL_LEASE) + nfsd_file_close_inode(file_inode(fl->c.flc_file)); return 0; } @@ -721,37 +763,37 @@ nfsd_file_cache_init(void) goto out; ret = -ENOMEM; - nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); - if (!nfsd_filecache_wq) - goto out; - - nfsd_file_slab = kmem_cache_create("nfsd_file", - sizeof(struct nfsd_file), 0, 0, NULL); + nfsd_file_slab = KMEM_CACHE(nfsd_file, 0); if (!nfsd_file_slab) { pr_err("nfsd: unable to create nfsd_file_slab\n"); goto out_err; } - nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", - sizeof(struct nfsd_file_mark), 0, 0, NULL); + nfsd_file_mark_slab = KMEM_CACHE(nfsd_file_mark, 0); if (!nfsd_file_mark_slab) { pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); goto out_err; } - ret = list_lru_init(&nfsd_file_lru); if (ret) { pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); goto out_err; } - ret = register_shrinker(&nfsd_file_shrinker, "nfsd-filecache"); - if (ret) { - pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); + nfsd_file_shrinker = shrinker_alloc(0, "nfsd-filecache"); + if (!nfsd_file_shrinker) { + ret = -ENOMEM; + pr_err("nfsd: failed to allocate nfsd_file_shrinker\n"); goto out_lru; } + nfsd_file_shrinker->count_objects = nfsd_file_lru_count; + nfsd_file_shrinker->scan_objects = nfsd_file_lru_scan; + nfsd_file_shrinker->seeks = 1; + + shrinker_register(nfsd_file_shrinker); + ret = lease_register_notifier(&nfsd_file_lease_notifier); if (ret) { pr_err("nfsd: unable to register lease notifier: %d\n", ret); @@ -759,7 +801,7 @@ nfsd_file_cache_init(void) } nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, - FSNOTIFY_GROUP_NOFS); + 0); if (IS_ERR(nfsd_file_fsnotify_group)) { pr_err("nfsd: unable to create fsnotify group: %ld\n", PTR_ERR(nfsd_file_fsnotify_group)); @@ -776,7 +818,7 @@ out: out_notifier: lease_unregister_notifier(&nfsd_file_lease_notifier); out_shrinker: - unregister_shrinker(&nfsd_file_shrinker); + shrinker_free(nfsd_file_shrinker); out_lru: list_lru_destroy(&nfsd_file_lru); out_err: @@ -784,8 +826,6 @@ out_err: nfsd_file_slab = NULL; kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; - destroy_workqueue(nfsd_filecache_wq); - nfsd_filecache_wq = NULL; rhltable_destroy(&nfsd_file_rhltable); goto out; } @@ -831,7 +871,6 @@ nfsd_alloc_fcache_disposal(void) l = kmalloc(sizeof(*l), GFP_KERNEL); if (!l) return NULL; - INIT_WORK(&l->work, nfsd_file_delayed_close); spin_lock_init(&l->lock); INIT_LIST_HEAD(&l->freeme); return l; @@ -840,7 +879,6 @@ nfsd_alloc_fcache_disposal(void) static void nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) { - cancel_work_sync(&l->work); nfsd_file_dispose_list(&l->freeme); kfree(l); } @@ -893,7 +931,7 @@ nfsd_file_cache_shutdown(void) return; lease_unregister_notifier(&nfsd_file_lease_notifier); - unregister_shrinker(&nfsd_file_shrinker); + shrinker_free(nfsd_file_shrinker); /* * make sure all callers of nfsd_file_lru_cb are done before * calling nfsd_file_cache_purge @@ -909,13 +947,12 @@ nfsd_file_cache_shutdown(void) fsnotify_wait_marks_destroyed(); kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; - destroy_workqueue(nfsd_filecache_wq); - nfsd_filecache_wq = NULL; rhltable_destroy(&nfsd_file_rhltable); for_each_possible_cpu(i) { per_cpu(nfsd_file_cache_hits, i) = 0; per_cpu(nfsd_file_acquisitions, i) = 0; + per_cpu(nfsd_file_allocations, i) = 0; per_cpu(nfsd_file_releases, i) = 0; per_cpu(nfsd_file_total_age, i) = 0; per_cpu(nfsd_file_evictions, i) = 0; @@ -984,12 +1021,14 @@ nfsd_file_is_cached(struct inode *inode) } static __be32 -nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, +nfsd_file_do_acquire(struct svc_rqst *rqstp, struct net *net, + struct svc_cred *cred, + struct auth_domain *client, + struct svc_fh *fhp, unsigned int may_flags, struct file *file, struct nfsd_file **pnf, bool want_gc) { unsigned char need = may_flags & NFSD_FILE_MAY_MASK; - struct net *net = SVC_NET(rqstp); struct nfsd_file *new, *nf; bool stale_retry = true; bool open_retry = true; @@ -998,8 +1037,13 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, int ret; retry: - status = fh_verify(rqstp, fhp, S_IFREG, - may_flags|NFSD_MAY_OWNER_OVERRIDE); + if (rqstp) { + status = fh_verify(rqstp, fhp, S_IFREG, + may_flags|NFSD_MAY_OWNER_OVERRIDE); + } else { + status = fh_verify_local(net, cred, client, fhp, S_IFREG, + may_flags|NFSD_MAY_OWNER_OVERRIDE); + } if (status != nfs_ok) return status; inode = d_inode(fhp->fh_dentry); @@ -1031,7 +1075,7 @@ retry: if (unlikely(nf)) { spin_unlock(&inode->i_lock); rcu_read_unlock(); - nfsd_file_slab_free(&new->nf_rcu); + nfsd_file_free(new); goto wait_for_construction; } nf = new; @@ -1080,7 +1124,7 @@ out: open_file: trace_nfsd_file_alloc(nf); - nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode); + nf->nf_mark = nfsd_file_mark_find_or_create(inode); if (nf->nf_mark) { if (file) { get_file(file); @@ -1145,7 +1189,8 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true); + return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL, + fhp, may_flags, NULL, pnf, true); } /** @@ -1169,7 +1214,55 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false); + return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL, + fhp, may_flags, NULL, pnf, false); +} + +/** + * nfsd_file_acquire_local - Get a struct nfsd_file with an open file for localio + * @net: The network namespace in which to perform a lookup + * @cred: the user credential with which to validate access + * @client: the auth_domain for LOCALIO lookup + * @fhp: the NFS filehandle of the file to be opened + * @may_flags: NFSD_MAY_ settings for the file + * @pnf: OUT: new or found "struct nfsd_file" object + * + * This file lookup interface provide access to a file given the + * filehandle and credential. No connection-based authorisation + * is performed and in that way it is quite different to other + * file access mediated by nfsd. It allows a kernel module such as the NFS + * client to reach across network and filesystem namespaces to access + * a file. The security implications of this should be carefully + * considered before use. + * + * The nfsd_file object returned by this API is reference-counted + * and garbage-collected. The object is retained for a few + * seconds after the final nfsd_file_put() in case the caller + * wants to re-use it. + * + * Return values: + * %nfs_ok - @pnf points to an nfsd_file with its reference + * count boosted. + * + * On error, an nfsstat value in network byte order is returned. + */ +__be32 +nfsd_file_acquire_local(struct net *net, struct svc_cred *cred, + struct auth_domain *client, struct svc_fh *fhp, + unsigned int may_flags, struct nfsd_file **pnf) +{ + /* + * Save creds before calling nfsd_file_do_acquire() (which calls + * nfsd_setuser). Important because caller (LOCALIO) is from + * client context. + */ + const struct cred *save_cred = get_current_cred(); + __be32 beres; + + beres = nfsd_file_do_acquire(NULL, net, cred, client, + fhp, may_flags, NULL, pnf, true); + revert_creds(save_cred); + return beres; } /** @@ -1195,7 +1288,8 @@ nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct file *file, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false); + return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL, + fhp, may_flags, file, pnf, false); } /* @@ -1205,7 +1299,7 @@ nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, */ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) { - unsigned long releases = 0, evictions = 0; + unsigned long allocations = 0, releases = 0, evictions = 0; unsigned long hits = 0, acquisitions = 0; unsigned int i, count = 0, buckets = 0; unsigned long lru = 0, total_age = 0; @@ -1230,6 +1324,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) for_each_possible_cpu(i) { hits += per_cpu(nfsd_file_cache_hits, i); acquisitions += per_cpu(nfsd_file_acquisitions, i); + allocations += per_cpu(nfsd_file_allocations, i); releases += per_cpu(nfsd_file_releases, i); total_age += per_cpu(nfsd_file_total_age, i); evictions += per_cpu(nfsd_file_evictions, i); @@ -1240,6 +1335,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) seq_printf(m, "lru entries: %lu\n", lru); seq_printf(m, "cache hits: %lu\n", hits); seq_printf(m, "acquisitions: %lu\n", acquisitions); + seq_printf(m, "allocations: %lu\n", allocations); seq_printf(m, "releases: %lu\n", releases); seq_printf(m, "evictions: %lu\n", evictions); if (releases) diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index e54165a3224f..d5db6b34ba30 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -44,6 +44,7 @@ struct nfsd_file { struct nfsd_file_mark *nf_mark; struct list_head nf_lru; + struct list_head nf_gc; struct rcu_head nf_rcu; ktime_t nf_birthtime; }; @@ -54,8 +55,11 @@ void nfsd_file_cache_shutdown(void); int nfsd_file_cache_start_net(struct net *net); void nfsd_file_cache_shutdown_net(struct net *net); void nfsd_file_put(struct nfsd_file *nf); +struct net *nfsd_file_put_local(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); +struct file *nfsd_file_file(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); +void nfsd_file_net_dispose(struct nfsd_net *nn); bool nfsd_file_is_cached(struct inode *inode); __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); @@ -64,5 +68,8 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct file *file, struct nfsd_file **nfp); +__be32 nfsd_file_acquire_local(struct net *net, struct svc_cred *cred, + struct auth_domain *client, struct svc_fh *fhp, + unsigned int may_flags, struct nfsd_file **pnf); int nfsd_file_cache_stats_show(struct seq_file *m, void *v); #endif /* _FS_NFSD_FILECACHE_H */ diff --git a/fs/nfsd/flexfilelayoutxdr.c b/fs/nfsd/flexfilelayoutxdr.c index bb205328e043..aeb71c10ff1b 100644 --- a/fs/nfsd/flexfilelayoutxdr.c +++ b/fs/nfsd/flexfilelayoutxdr.c @@ -17,9 +17,9 @@ struct ff_idmap { __be32 nfsd4_ff_encode_layoutget(struct xdr_stream *xdr, - struct nfsd4_layoutget *lgp) + const struct nfsd4_layoutget *lgp) { - struct pnfs_ff_layout *fl = lgp->lg_content; + const struct pnfs_ff_layout *fl = lgp->lg_content; int len, mirror_len, ds_len, fh_len; __be32 *p; @@ -77,7 +77,7 @@ nfsd4_ff_encode_layoutget(struct xdr_stream *xdr, __be32 nfsd4_ff_encode_getdeviceinfo(struct xdr_stream *xdr, - struct nfsd4_getdeviceinfo *gdp) + const struct nfsd4_getdeviceinfo *gdp) { struct pnfs_ff_device_addr *da = gdp->gd_device; int len; diff --git a/fs/nfsd/flexfilelayoutxdr.h b/fs/nfsd/flexfilelayoutxdr.h index 8e195aeca023..6d5a1066a903 100644 --- a/fs/nfsd/flexfilelayoutxdr.h +++ b/fs/nfsd/flexfilelayoutxdr.h @@ -43,8 +43,8 @@ struct pnfs_ff_layout { }; __be32 nfsd4_ff_encode_getdeviceinfo(struct xdr_stream *xdr, - struct nfsd4_getdeviceinfo *gdp); + const struct nfsd4_getdeviceinfo *gdp); __be32 nfsd4_ff_encode_layoutget(struct xdr_stream *xdr, - struct nfsd4_layoutget *lgp); + const struct nfsd4_layoutget *lgp); #endif /* _NFSD_FLEXFILELAYOUTXDR_H */ diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c new file mode 100644 index 000000000000..f441cb9f74d5 --- /dev/null +++ b/fs/nfsd/localio.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * NFS server support for local clients to bypass network stack + * + * Copyright (C) 2014 Weston Andros Adamson <dros@primarydata.com> + * Copyright (C) 2019 Trond Myklebust <trond.myklebust@hammerspace.com> + * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com> + * Copyright (C) 2024 NeilBrown <neilb@suse.de> + */ + +#include <linux/exportfs.h> +#include <linux/sunrpc/svcauth.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs_common.h> +#include <linux/nfslocalio.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_xdr.h> +#include <linux/string.h> + +#include "nfsd.h" +#include "vfs.h" +#include "netns.h" +#include "filecache.h" +#include "cache.h" + +static const struct nfsd_localio_operations nfsd_localio_ops = { + .nfsd_serv_try_get = nfsd_serv_try_get, + .nfsd_serv_put = nfsd_serv_put, + .nfsd_open_local_fh = nfsd_open_local_fh, + .nfsd_file_put_local = nfsd_file_put_local, + .nfsd_file_file = nfsd_file_file, +}; + +void nfsd_localio_ops_init(void) +{ + nfs_to = &nfsd_localio_ops; +} + +/** + * nfsd_open_local_fh - lookup a local filehandle @nfs_fh and map to nfsd_file + * + * @net: 'struct net' to get the proper nfsd_net required for LOCALIO access + * @dom: 'struct auth_domain' required for LOCALIO access + * @rpc_clnt: rpc_clnt that the client established + * @cred: cred that the client established + * @nfs_fh: filehandle to lookup + * @fmode: fmode_t to use for open + * + * This function maps a local fh to a path on a local filesystem. + * This is useful when the nfs client has the local server mounted - it can + * avoid all the NFS overhead with reads, writes and commits. + * + * On successful return, returned nfsd_file will have its nf_net member + * set. Caller (NFS client) is responsible for calling nfsd_serv_put and + * nfsd_file_put (via nfs_to_nfsd_file_put_local). + */ +struct nfsd_file * +nfsd_open_local_fh(struct net *net, struct auth_domain *dom, + struct rpc_clnt *rpc_clnt, const struct cred *cred, + const struct nfs_fh *nfs_fh, const fmode_t fmode) +{ + int mayflags = NFSD_MAY_LOCALIO; + struct svc_cred rq_cred; + struct svc_fh fh; + struct nfsd_file *localio; + __be32 beres; + + if (nfs_fh->size > NFS4_FHSIZE) + return ERR_PTR(-EINVAL); + + /* nfs_fh -> svc_fh */ + fh_init(&fh, NFS4_FHSIZE); + fh.fh_handle.fh_size = nfs_fh->size; + memcpy(fh.fh_handle.fh_raw, nfs_fh->data, nfs_fh->size); + + if (fmode & FMODE_READ) + mayflags |= NFSD_MAY_READ; + if (fmode & FMODE_WRITE) + mayflags |= NFSD_MAY_WRITE; + + svcauth_map_clnt_to_svc_cred_local(rpc_clnt, cred, &rq_cred); + + beres = nfsd_file_acquire_local(net, &rq_cred, dom, + &fh, mayflags, &localio); + if (beres) + localio = ERR_PTR(nfs_stat_to_errno(be32_to_cpu(beres))); + + fh_put(&fh); + if (rq_cred.cr_group_info) + put_group_info(rq_cred.cr_group_info); + + return localio; +} +EXPORT_SYMBOL_GPL(nfsd_open_local_fh); + +/* + * UUID_IS_LOCAL XDR functions + */ + +static __be32 localio_proc_null(struct svc_rqst *rqstp) +{ + return rpc_success; +} + +struct localio_uuidarg { + uuid_t uuid; +}; + +static __be32 localio_proc_uuid_is_local(struct svc_rqst *rqstp) +{ + struct localio_uuidarg *argp = rqstp->rq_argp; + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + nfs_uuid_is_local(&argp->uuid, &nn->local_clients, + net, rqstp->rq_client, THIS_MODULE); + + return rpc_success; +} + +static bool localio_decode_uuidarg(struct svc_rqst *rqstp, + struct xdr_stream *xdr) +{ + struct localio_uuidarg *argp = rqstp->rq_argp; + u8 uuid[UUID_SIZE]; + + if (decode_opaque_fixed(xdr, uuid, UUID_SIZE)) + return false; + import_uuid(&argp->uuid, uuid); + + return true; +} + +static const struct svc_procedure localio_procedures1[] = { + [LOCALIOPROC_NULL] = { + .pc_func = localio_proc_null, + .pc_decode = nfssvc_decode_voidarg, + .pc_encode = nfssvc_encode_voidres, + .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_ressize = sizeof(struct nfsd_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = 0, + .pc_name = "NULL", + }, + [LOCALIOPROC_UUID_IS_LOCAL] = { + .pc_func = localio_proc_uuid_is_local, + .pc_decode = localio_decode_uuidarg, + .pc_encode = nfssvc_encode_voidres, + .pc_argsize = sizeof(struct localio_uuidarg), + .pc_argzero = sizeof(struct localio_uuidarg), + .pc_ressize = sizeof(struct nfsd_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_name = "UUID_IS_LOCAL", + }, +}; + +#define LOCALIO_NR_PROCEDURES ARRAY_SIZE(localio_procedures1) +static DEFINE_PER_CPU_ALIGNED(unsigned long, + localio_count[LOCALIO_NR_PROCEDURES]); +const struct svc_version localio_version1 = { + .vs_vers = 1, + .vs_nproc = LOCALIO_NR_PROCEDURES, + .vs_proc = localio_procedures1, + .vs_dispatch = nfsd_dispatch, + .vs_count = localio_count, + .vs_xdrsize = XDR_QUADLEN(UUID_SIZE), + .vs_hidden = true, +}; diff --git a/fs/nfsd/netlink.c b/fs/nfsd/netlink.c new file mode 100644 index 000000000000..ca54aa583530 --- /dev/null +++ b/fs/nfsd/netlink.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/nfsd.yaml */ +/* YNL-GEN kernel source */ + +#include <net/netlink.h> +#include <net/genetlink.h> + +#include "netlink.h" + +#include <uapi/linux/nfsd_netlink.h> + +/* Common nested types */ +const struct nla_policy nfsd_sock_nl_policy[NFSD_A_SOCK_TRANSPORT_NAME + 1] = { + [NFSD_A_SOCK_ADDR] = { .type = NLA_BINARY, }, + [NFSD_A_SOCK_TRANSPORT_NAME] = { .type = NLA_NUL_STRING, }, +}; + +const struct nla_policy nfsd_version_nl_policy[NFSD_A_VERSION_ENABLED + 1] = { + [NFSD_A_VERSION_MAJOR] = { .type = NLA_U32, }, + [NFSD_A_VERSION_MINOR] = { .type = NLA_U32, }, + [NFSD_A_VERSION_ENABLED] = { .type = NLA_FLAG, }, +}; + +/* NFSD_CMD_THREADS_SET - do */ +static const struct nla_policy nfsd_threads_set_nl_policy[NFSD_A_SERVER_SCOPE + 1] = { + [NFSD_A_SERVER_THREADS] = { .type = NLA_U32, }, + [NFSD_A_SERVER_GRACETIME] = { .type = NLA_U32, }, + [NFSD_A_SERVER_LEASETIME] = { .type = NLA_U32, }, + [NFSD_A_SERVER_SCOPE] = { .type = NLA_NUL_STRING, }, +}; + +/* NFSD_CMD_VERSION_SET - do */ +static const struct nla_policy nfsd_version_set_nl_policy[NFSD_A_SERVER_PROTO_VERSION + 1] = { + [NFSD_A_SERVER_PROTO_VERSION] = NLA_POLICY_NESTED(nfsd_version_nl_policy), +}; + +/* NFSD_CMD_LISTENER_SET - do */ +static const struct nla_policy nfsd_listener_set_nl_policy[NFSD_A_SERVER_SOCK_ADDR + 1] = { + [NFSD_A_SERVER_SOCK_ADDR] = NLA_POLICY_NESTED(nfsd_sock_nl_policy), +}; + +/* NFSD_CMD_POOL_MODE_SET - do */ +static const struct nla_policy nfsd_pool_mode_set_nl_policy[NFSD_A_POOL_MODE_MODE + 1] = { + [NFSD_A_POOL_MODE_MODE] = { .type = NLA_NUL_STRING, }, +}; + +/* Ops table for nfsd */ +static const struct genl_split_ops nfsd_nl_ops[] = { + { + .cmd = NFSD_CMD_RPC_STATUS_GET, + .dumpit = nfsd_nl_rpc_status_get_dumpit, + .flags = GENL_CMD_CAP_DUMP, + }, + { + .cmd = NFSD_CMD_THREADS_SET, + .doit = nfsd_nl_threads_set_doit, + .policy = nfsd_threads_set_nl_policy, + .maxattr = NFSD_A_SERVER_SCOPE, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NFSD_CMD_THREADS_GET, + .doit = nfsd_nl_threads_get_doit, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NFSD_CMD_VERSION_SET, + .doit = nfsd_nl_version_set_doit, + .policy = nfsd_version_set_nl_policy, + .maxattr = NFSD_A_SERVER_PROTO_VERSION, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NFSD_CMD_VERSION_GET, + .doit = nfsd_nl_version_get_doit, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NFSD_CMD_LISTENER_SET, + .doit = nfsd_nl_listener_set_doit, + .policy = nfsd_listener_set_nl_policy, + .maxattr = NFSD_A_SERVER_SOCK_ADDR, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NFSD_CMD_LISTENER_GET, + .doit = nfsd_nl_listener_get_doit, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NFSD_CMD_POOL_MODE_SET, + .doit = nfsd_nl_pool_mode_set_doit, + .policy = nfsd_pool_mode_set_nl_policy, + .maxattr = NFSD_A_POOL_MODE_MODE, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NFSD_CMD_POOL_MODE_GET, + .doit = nfsd_nl_pool_mode_get_doit, + .flags = GENL_CMD_CAP_DO, + }, +}; + +struct genl_family nfsd_nl_family __ro_after_init = { + .name = NFSD_FAMILY_NAME, + .version = NFSD_FAMILY_VERSION, + .netnsok = true, + .parallel_ops = true, + .module = THIS_MODULE, + .split_ops = nfsd_nl_ops, + .n_split_ops = ARRAY_SIZE(nfsd_nl_ops), +}; diff --git a/fs/nfsd/netlink.h b/fs/nfsd/netlink.h new file mode 100644 index 000000000000..8eb903f24c41 --- /dev/null +++ b/fs/nfsd/netlink.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/nfsd.yaml */ +/* YNL-GEN kernel header */ + +#ifndef _LINUX_NFSD_GEN_H +#define _LINUX_NFSD_GEN_H + +#include <net/netlink.h> +#include <net/genetlink.h> + +#include <uapi/linux/nfsd_netlink.h> + +/* Common nested types */ +extern const struct nla_policy nfsd_sock_nl_policy[NFSD_A_SOCK_TRANSPORT_NAME + 1]; +extern const struct nla_policy nfsd_version_nl_policy[NFSD_A_VERSION_ENABLED + 1]; + +int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); +int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info); +int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info); +int nfsd_nl_version_set_doit(struct sk_buff *skb, struct genl_info *info); +int nfsd_nl_version_get_doit(struct sk_buff *skb, struct genl_info *info); +int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info); +int nfsd_nl_listener_get_doit(struct sk_buff *skb, struct genl_info *info); +int nfsd_nl_pool_mode_set_doit(struct sk_buff *skb, struct genl_info *info); +int nfsd_nl_pool_mode_get_doit(struct sk_buff *skb, struct genl_info *info); + +extern struct genl_family nfsd_nl_family; + +#endif /* _LINUX_NFSD_GEN_H */ diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 77d4f82096c9..26f7b34d1a03 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -13,6 +13,7 @@ #include <linux/filelock.h> #include <linux/nfs4.h> #include <linux/percpu_counter.h> +#include <linux/percpu-refcount.h> #include <linux/siphash.h> #include <linux/sunrpc/stats.h> @@ -137,14 +138,11 @@ struct nfsd_net { u32 clientid_counter; u32 clverifier_counter; - struct svc_serv *nfsd_serv; - /* When a listening socket is added to nfsd, keep_active is set - * and this justifies a reference on nfsd_serv. This stops - * nfsd_serv from being freed. When the number of threads is - * set, keep_active is cleared and the reference is dropped. So - * when the last thread exits, the service will be destroyed. - */ - int keep_active; + struct svc_info nfsd_info; +#define nfsd_serv nfsd_info.serv + struct percpu_ref nfsd_serv_ref; + struct completion nfsd_serv_confirm_done; + struct completion nfsd_serv_free_done; /* * clientid and stateid data for construction of net unique COPY @@ -158,8 +156,8 @@ struct nfsd_net { /* * Version information */ - bool *nfsd_versions; - bool *nfsd4_minorversions; + bool nfsd_versions[NFSD_MAXVERS + 1]; + bool nfsd4_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1]; /* * Duplicate reply cache @@ -195,7 +193,7 @@ struct nfsd_net { /* size of cache when we saw the longest hash chain */ unsigned int longest_chain_cachesize; - struct shrinker nfsd_reply_cache_shrinker; + struct shrinker *nfsd_reply_cache_shrinker; /* tracking server-to-server copy mounts */ spinlock_t nfsd_ssc_lock; @@ -213,17 +211,27 @@ struct nfsd_net { int nfs4_max_clients; atomic_t nfsd_courtesy_clients; - struct shrinker nfsd_client_shrinker; + struct shrinker *nfsd_client_shrinker; struct work_struct nfsd_shrinker_work; + + /* last time an admin-revoke happened for NFSv4.0 */ + time64_t nfs40_last_revoke; + +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + /* Local clients to be invalidated when net is shut down */ + struct list_head local_clients; +#endif }; /* Simple check to find out if a given net was properly initialized */ #define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl) -extern void nfsd_netns_free_versions(struct nfsd_net *nn); - +extern bool nfsd_support_version(int vers); extern unsigned int nfsd_net_id; +bool nfsd_serv_try_get(struct net *net); +void nfsd_serv_put(struct net *net); + void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn); void nfsd_reset_write_verifier(struct nfsd_net *nn); #endif /* __NFSD_NETNS_H__ */ diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 12b2b9bc07bf..5fb202acb0fd 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -84,6 +84,8 @@ out: fail: posix_acl_release(resp->acl_access); posix_acl_release(resp->acl_default); + resp->acl_access = NULL; + resp->acl_default = NULL; goto out; } @@ -308,8 +310,6 @@ static void nfsaclsvc_release_access(struct svc_rqst *rqstp) fh_put(&resp->fh); } -struct nfsd3_voidargs { int dummy; }; - #define ST 1 /* status*/ #define AT 21 /* attributes */ #define pAT (1+AT) /* post attributes - conditional */ diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 73adca47d373..7b5433bd3019 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -76,6 +76,8 @@ out: fail: posix_acl_release(resp->acl_access); posix_acl_release(resp->acl_default); + resp->acl_access = NULL; + resp->acl_default = NULL; goto out; } @@ -221,8 +223,6 @@ static void nfs3svc_release_getacl(struct svc_rqst *rqstp) posix_acl_release(resp->acl_default); } -struct nfsd3_voidargs { int dummy; }; - #define ST 1 /* status*/ #define AT 21 /* attributes */ #define pAT (1+AT) /* post attributes - conditional */ diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 268ef57751c4..372bdcf5e07a 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -28,6 +28,29 @@ static int nfs3_ftypes[] = { S_IFIFO, /* NF3FIFO */ }; +static __be32 nfsd3_map_status(__be32 status) +{ + switch (status) { + case nfs_ok: + break; + case nfserr_nofilehandle: + status = nfserr_badhandle; + break; + case nfserr_wrongsec: + case nfserr_file_open: + status = nfserr_acces; + break; + case nfserr_symlink_not_dir: + status = nfserr_notdir; + break; + case nfserr_symlink: + case nfserr_wrong_type: + status = nfserr_inval; + break; + } + return status; +} + /* * NULL call. */ @@ -57,6 +80,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp) resp->status = fh_getattr(&resp->fh, &resp->stat); out: + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -71,13 +95,16 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp) struct nfsd_attrs attrs = { .na_iattr = &argp->attrs, }; + const struct timespec64 *guardtime = NULL; dprintk("nfsd: SETATTR(3) %s\n", SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs, - argp->check_guard, argp->guardtime); + if (argp->check_guard) + guardtime = &argp->guardtime; + resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs, guardtime); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -101,6 +128,7 @@ nfsd3_proc_lookup(struct svc_rqst *rqstp) resp->status = nfsd_lookup(rqstp, &resp->dirfh, argp->name, argp->len, &resp->fh); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -120,6 +148,7 @@ nfsd3_proc_access(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->access = argp->access; resp->status = nfsd_access(rqstp, &resp->fh, &resp->access, NULL); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -140,6 +169,7 @@ nfsd3_proc_readlink(struct svc_rqst *rqstp) resp->pages = rqstp->rq_next_page++; resp->status = nfsd_readlink(rqstp, &resp->fh, page_address(*resp->pages), &resp->len); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -171,11 +201,13 @@ nfsd3_proc_read(struct svc_rqst *rqstp) * + 1 (xdr opaque byte count) = 26 */ resp->count = argp->count; - svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); + svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3) << 2) + + resp->count + 4); fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_read(rqstp, &resp->fh, argp->offset, &resp->count, &resp->eof); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -194,7 +226,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp) SVCFH_fmt(&argp->fh), argp->len, (unsigned long long) argp->offset, - argp->stable? " stable" : ""); + argp->stable ? " stable" : ""); resp->status = nfserr_fbig; if (argp->offset > (u64)OFFSET_MAX || @@ -209,6 +241,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp) rqstp->rq_vec, nvecs, &cnt, resp->committed, resp->verf); resp->count = cnt; + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -294,8 +327,8 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, status = nfserr_exist; break; case NFS3_CREATE_EXCLUSIVE: - if (d_inode(child)->i_mtime.tv_sec == v_mtime && - d_inode(child)->i_atime.tv_sec == v_atime && + if (inode_get_mtime_sec(d_inode(child)) == v_mtime && + inode_get_atime_sec(d_inode(child)) == v_atime && d_inode(child)->i_size == 0) { break; } @@ -356,6 +389,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp) newfhp = fh_init(&resp->fh, NFS3_FHSIZE); resp->status = nfsd3_create_file(rqstp, dirfhp, newfhp, argp); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -381,6 +415,7 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp) fh_init(&resp->fh, NFS3_FHSIZE); resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, &attrs, S_IFDIR, 0, &resp->fh); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -421,6 +456,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) argp->flen, argp->tname, &attrs, &resp->fh); kfree(argp->tname); out: + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -462,6 +498,7 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp) resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, &attrs, type, rdev, &resp->fh); out: + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -483,6 +520,7 @@ nfsd3_proc_remove(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, argp->name, argp->len); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -503,6 +541,7 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -525,6 +564,7 @@ nfsd3_proc_rename(struct svc_rqst *rqstp) fh_copy(&resp->tfh, &argp->tfh); resp->status = nfsd_rename(rqstp, &resp->ffh, argp->fname, argp->flen, &resp->tfh, argp->tname, argp->tlen); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -545,6 +585,7 @@ nfsd3_proc_link(struct svc_rqst *rqstp) fh_copy(&resp->tfh, &argp->tfh); resp->status = nfsd_link(rqstp, &resp->tfh, argp->tname, argp->tlen, &resp->fh); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -597,6 +638,7 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp) /* Recycle only pages that were part of the reply */ rqstp->rq_next_page = resp->xdr.page_ptr + 1; + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -641,6 +683,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp) rqstp->rq_next_page = resp->xdr.page_ptr + 1; out: + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -658,6 +701,7 @@ nfsd3_proc_fsstat(struct svc_rqst *rqstp) resp->status = nfsd_statfs(rqstp, &argp->fh, &resp->stats, 0); fh_put(&argp->fh); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -701,6 +745,7 @@ nfsd3_proc_fsinfo(struct svc_rqst *rqstp) } fh_put(&argp->fh); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -743,6 +788,7 @@ nfsd3_proc_pathconf(struct svc_rqst *rqstp) } fh_put(&argp->fh); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -770,6 +816,7 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) argp->count, resp->verf); nfsd_file_put(nf); out: + resp->status = nfsd3_map_status(resp->status); return rpc_success; } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index f32128955ec8..a7a07470c1f8 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -295,17 +295,14 @@ svcxdr_decode_sattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr, static bool svcxdr_decode_sattrguard3(struct xdr_stream *xdr, struct nfsd3_sattrargs *args) { - __be32 *p; u32 check; if (xdr_stream_decode_bool(xdr, &check) < 0) return false; if (check) { - p = xdr_inline_decode(xdr, XDR_UNIT * 2); - if (!p) + if (!svcxdr_decode_nfstime3(xdr, &args->guardtime)) return false; args->check_guard = 1; - args->guardtime = be32_to_cpup(p); } else args->check_guard = 0; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 49a49529c6b8..127626aba7a2 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -31,6 +31,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/nfs4.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/xprt.h> #include <linux/sunrpc/svc_xprt.h> @@ -44,7 +45,7 @@ #define NFSDDBG_FACILITY NFSDDBG_PROC -static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason); +static void nfsd4_mark_cb_fault(struct nfs4_client *clp); #define NFSPROC4_CB_NULL 0 #define NFSPROC4_CB_COMPOUND 1 @@ -84,33 +85,22 @@ static void encode_uint32(struct xdr_stream *xdr, u32 n) static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap, size_t len) { - WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0); + xdr_stream_encode_uint32_array(xdr, bitmap, len); } -/* - * nfs_cb_opnum4 - * - * enum nfs_cb_opnum4 { - * OP_CB_GETATTR = 3, - * ... - * }; - */ -enum nfs_cb_opnum4 { - OP_CB_GETATTR = 3, - OP_CB_RECALL = 4, - OP_CB_LAYOUTRECALL = 5, - OP_CB_NOTIFY = 6, - OP_CB_PUSH_DELEG = 7, - OP_CB_RECALL_ANY = 8, - OP_CB_RECALLABLE_OBJ_AVAIL = 9, - OP_CB_RECALL_SLOT = 10, - OP_CB_SEQUENCE = 11, - OP_CB_WANTS_CANCELLED = 12, - OP_CB_NOTIFY_LOCK = 13, - OP_CB_NOTIFY_DEVICEID = 14, - OP_CB_OFFLOAD = 15, - OP_CB_ILLEGAL = 10044 -}; +static int decode_cb_fattr4(struct xdr_stream *xdr, uint32_t *bitmap, + struct nfs4_cb_fattr *fattr) +{ + fattr->ncf_cb_change = 0; + fattr->ncf_cb_fsize = 0; + if (bitmap[0] & FATTR4_WORD0_CHANGE) + if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_change) < 0) + return -NFSERR_BAD_XDR; + if (bitmap[0] & FATTR4_WORD0_SIZE) + if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_fsize) < 0) + return -NFSERR_BAD_XDR; + return 0; +} static void encode_nfs_cb_opnum4(struct xdr_stream *xdr, enum nfs_cb_opnum4 op) { @@ -358,6 +348,30 @@ encode_cb_recallany4args(struct xdr_stream *xdr, } /* + * CB_GETATTR4args + * struct CB_GETATTR4args { + * nfs_fh4 fh; + * bitmap4 attr_request; + * }; + * + * The size and change attributes are the only one + * guaranteed to be serviced by the client. + */ +static void +encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr, + struct nfs4_cb_fattr *fattr) +{ + struct nfs4_delegation *dp = + container_of(fattr, struct nfs4_delegation, dl_cb_fattr); + struct knfsd_fh *fh = &dp->dl_stid.sc_file->fi_fhandle; + + encode_nfs_cb_opnum4(xdr, OP_CB_GETATTR); + encode_nfs_fh4(xdr, fh); + encode_bitmap4(xdr, fattr->ncf_cb_bmap, ARRAY_SIZE(fattr->ncf_cb_bmap)); + hdr->nops++; +} + +/* * CB_SEQUENCE4args * * struct CB_SEQUENCE4args { @@ -493,6 +507,26 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr, } /* + * 20.1. Operation 3: CB_GETATTR - Get Attributes + */ +static void nfs4_xdr_enc_cb_getattr(struct rpc_rqst *req, + struct xdr_stream *xdr, const void *data) +{ + const struct nfsd4_callback *cb = data; + struct nfs4_cb_fattr *ncf = + container_of(cb, struct nfs4_cb_fattr, ncf_getattr); + struct nfs4_cb_compound_hdr hdr = { + .ident = cb->cb_clp->cl_cb_ident, + .minorversion = cb->cb_clp->cl_minorversion, + }; + + encode_cb_compound4args(xdr, &hdr); + encode_cb_sequence4args(xdr, cb, &hdr); + encode_cb_getattr4args(xdr, &hdr, ncf); + encode_cb_nops(&hdr); +} + +/* * 20.2. Operation 4: CB_RECALL - Recall a Delegation */ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, @@ -548,6 +582,42 @@ static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr, } /* + * 20.1. Operation 3: CB_GETATTR - Get Attributes + */ +static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *data) +{ + struct nfsd4_callback *cb = data; + struct nfs4_cb_compound_hdr hdr; + int status; + u32 bitmap[3] = {0}; + u32 attrlen; + struct nfs4_cb_fattr *ncf = + container_of(cb, struct nfs4_cb_fattr, ncf_getattr); + + status = decode_cb_compound4res(xdr, &hdr); + if (unlikely(status)) + return status; + + status = decode_cb_sequence4res(xdr, cb); + if (unlikely(status || cb->cb_seq_status)) + return status; + + status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status); + if (unlikely(status || cb->cb_status)) + return status; + if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0) + return -NFSERR_BAD_XDR; + if (xdr_stream_decode_u32(xdr, &attrlen) < 0) + return -NFSERR_BAD_XDR; + if (attrlen > (sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize))) + return -NFSERR_BAD_XDR; + status = decode_cb_fattr4(xdr, bitmap, ncf); + return status; +} + +/* * 20.2. Operation 4: CB_RECALL - Recall a Delegation */ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, @@ -698,7 +768,7 @@ static void nfs4_xdr_enc_cb_notify_lock(struct rpc_rqst *req, const struct nfsd4_callback *cb = data; const struct nfsd4_blocked_lock *nbl = container_of(cb, struct nfsd4_blocked_lock, nbl_cb); - struct nfs4_lockowner *lo = (struct nfs4_lockowner *)nbl->nbl_lock.fl_owner; + struct nfs4_lockowner *lo = (struct nfs4_lockowner *)nbl->nbl_lock.c.flc_owner; struct nfs4_cb_compound_hdr hdr = { .ident = 0, .minorversion = cb->cb_clp->cl_minorversion, @@ -855,6 +925,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = { PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock), PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload), PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any), + PROC(CB_GETATTR, COMPOUND, cb_getattr, cb_getattr), }; static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)]; @@ -907,11 +978,12 @@ static int max_cb_time(struct net *net) return max(((u32)nn->nfsd4_lease)/10, 1u) * HZ; } -static struct workqueue_struct *callback_wq; - static bool nfsd4_queue_cb(struct nfsd4_callback *cb) { - return queue_work(callback_wq, &cb->cb_work); + struct nfs4_client *clp = cb->cb_clp; + + trace_nfsd_cb_queue(clp, cb); + return queue_work(clp->cl_callback_wq, &cb->cb_work); } static void nfsd41_cb_inflight_begin(struct nfs4_client *clp) @@ -986,7 +1058,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c args.authflavor = clp->cl_cred.cr_flavor; clp->cl_cb_ident = conn->cb_ident; } else { - if (!conn->cb_xprt) + if (!conn->cb_xprt || !ses) return -EINVAL; clp->cl_cb_session = ses; args.bc_xprt = conn->cb_xprt; @@ -1023,18 +1095,18 @@ static void nfsd4_mark_cb_state(struct nfs4_client *clp, int newstate) { if (clp->cl_cb_state != newstate) { clp->cl_cb_state = newstate; - trace_nfsd_cb_state(clp); + trace_nfsd_cb_new_state(clp); } } -static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason) +static void nfsd4_mark_cb_down(struct nfs4_client *clp) { if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags)) return; nfsd4_mark_cb_state(clp, NFSD4_CB_DOWN); } -static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason) +static void nfsd4_mark_cb_fault(struct nfs4_client *clp) { if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags)) return; @@ -1046,7 +1118,7 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); if (task->tk_status) - nfsd4_mark_cb_down(clp, task->tk_status); + nfsd4_mark_cb_down(clp); else nfsd4_mark_cb_state(clp, NFSD4_CB_UP); } @@ -1081,7 +1153,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp) void nfsd4_probe_callback_sync(struct nfs4_client *clp) { nfsd4_probe_callback(clp); - flush_workqueue(callback_wq); + flush_workqueue(clp->cl_callback_wq); } void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) @@ -1130,6 +1202,7 @@ static void nfsd41_destroy_cb(struct nfsd4_callback *cb) { struct nfs4_client *clp = cb->cb_clp; + trace_nfsd_cb_destroy(clp, cb); nfsd41_cb_release_slot(cb); if (cb->cb_ops && cb->cb_ops->release) cb->cb_ops->release(cb); @@ -1150,6 +1223,7 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) * cb_seq_status is only set in decode_cb_sequence4res, * and so will remain 1 if an rpc level failure occurs. */ + trace_nfsd_cb_rpc_prepare(clp); cb->cb_seq_status = 1; cb->cb_status = 0; if (minorversion && !nfsd41_cb_get_slot(cb, task)) @@ -1182,6 +1256,8 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback if (!cb->cb_holds_slot) goto need_restart; + /* This is the operation status code for CB_SEQUENCE */ + trace_nfsd_cb_seq_status(task, cb); switch (cb->cb_seq_status) { case 0: /* @@ -1195,13 +1271,23 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback break; case -ESERVERFAULT: ++session->se_cb_seq_nr; - fallthrough; + nfsd4_mark_cb_fault(cb->cb_clp); + ret = false; + break; case 1: + /* + * cb_seq_status remains 1 if an RPC Reply was never + * received. NFSD can't know if the client processed + * the CB_SEQUENCE operation. Ask the client to send a + * DESTROY_SESSION to recover. + */ + fallthrough; case -NFS4ERR_BADSESSION: - nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status); + nfsd4_mark_cb_fault(cb->cb_clp); ret = false; - break; + goto need_restart; case -NFS4ERR_DELAY: + cb->cb_seq_status = 1; if (!rpc_restart_call(task)) goto out; @@ -1216,14 +1302,11 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback } break; default: - nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status); - dprintk("%s: unprocessed error %d\n", __func__, - cb->cb_seq_status); + nfsd4_mark_cb_fault(cb->cb_clp); } - nfsd41_cb_release_slot(cb); - dprintk("%s: freed slot, new seqid=%d\n", __func__, - clp->cl_cb_session->se_cb_seq_nr); + + trace_nfsd_cb_free_slot(task, cb); if (RPC_SIGNALLED(task)) goto need_restart; @@ -1235,6 +1318,7 @@ retry_nowait: goto out; need_restart: if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) { + trace_nfsd_cb_restart(clp, cb); task->tk_status = 0; cb->cb_need_restart = true; } @@ -1246,11 +1330,14 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) struct nfsd4_callback *cb = calldata; struct nfs4_client *clp = cb->cb_clp; + trace_nfsd_cb_rpc_done(clp); + if (!nfsd4_cb_sequence_done(task, cb)) return; if (cb->cb_status) { - WARN_ON_ONCE(task->tk_status); + WARN_ONCE(task->tk_status, "cb_status=%d tk_status=%d", + cb->cb_status, task->tk_status); task->tk_status = cb->cb_status; } @@ -1264,7 +1351,7 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) case -EIO: case -ETIMEDOUT: case -EACCES: - nfsd4_mark_cb_down(clp, task->tk_status); + nfsd4_mark_cb_down(clp); } break; default: @@ -1276,6 +1363,8 @@ static void nfsd4_cb_release(void *calldata) { struct nfsd4_callback *cb = calldata; + trace_nfsd_cb_rpc_release(cb->cb_clp); + if (cb->cb_need_restart) nfsd4_queue_cb(cb); else @@ -1289,19 +1378,6 @@ static const struct rpc_call_ops nfsd4_cb_ops = { .rpc_release = nfsd4_cb_release, }; -int nfsd4_create_callback_queue(void) -{ - callback_wq = alloc_ordered_workqueue("nfsd4_callbacks", 0); - if (!callback_wq) - return -ENOMEM; - return 0; -} - -void nfsd4_destroy_callback_queue(void) -{ - destroy_workqueue(callback_wq); -} - /* must be called under the state lock */ void nfsd4_shutdown_callback(struct nfs4_client *clp) { @@ -1315,16 +1391,17 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp) * client, destroy the rpc client, and stop: */ nfsd4_run_cb(&clp->cl_cb_null); - flush_workqueue(callback_wq); + flush_workqueue(clp->cl_callback_wq); nfsd41_cb_inflight_wait_complete(clp); } -/* requires cl_lock: */ static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp) { struct nfsd4_session *s; struct nfsd4_conn *c; + lockdep_assert_held(&clp->cl_lock); + list_for_each_entry(s, &clp->cl_sessions, se_perclnt) { list_for_each_entry(c, &s->se_conns, cn_persession) { if (c->cn_flags & NFS4_CDFC4_BACK) @@ -1336,9 +1413,9 @@ static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp) /* * Note there isn't a lot of locking in this code; instead we depend on - * the fact that it is run from the callback_wq, which won't run two - * work items at once. So, for example, callback_wq handles all access - * of cl_cb_client and all calls to rpc_create or rpc_shutdown_client. + * the fact that it is run from clp->cl_callback_wq, which won't run two + * work items at once. So, for example, clp->cl_callback_wq handles all + * access of cl_cb_client and all calls to rpc_create or rpc_shutdown_client. */ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) { @@ -1348,11 +1425,14 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) struct nfsd4_conn *c; int err; + trace_nfsd_cb_bc_update(clp, cb); + /* * This is either an update, or the client dying; in either case, * kill the old client: */ if (clp->cl_cb_client) { + trace_nfsd_cb_bc_shutdown(clp, cb); rpc_shutdown_client(clp->cl_cb_client); clp->cl_cb_client = NULL; put_cred(clp->cl_cb_cred); @@ -1364,13 +1444,15 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) } if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) return; + spin_lock(&clp->cl_lock); /* * Only serialized callback code is allowed to clear these * flags; main nfsd code can only set them: */ - BUG_ON(!(clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)); + WARN_ON(!(clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)); clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); + memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn)); c = __nfsd4_find_backchannel(clp); if (c) { @@ -1379,12 +1461,10 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) ses = c->cn_session; } spin_unlock(&clp->cl_lock); - if (!c) - return; err = setup_callback_client(clp, &conn, ses); if (err) { - nfsd4_mark_cb_down(clp, err); + nfsd4_mark_cb_down(clp); if (c) svc_xprt_put(c->cn_xprt); return; @@ -1400,19 +1480,17 @@ nfsd4_run_cb_work(struct work_struct *work) struct rpc_clnt *clnt; int flags; - if (cb->cb_need_restart) { - cb->cb_need_restart = false; - } else { - if (cb->cb_ops && cb->cb_ops->prepare) - cb->cb_ops->prepare(cb); - } + trace_nfsd_cb_start(clp); if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK) nfsd4_process_cb_update(cb); clnt = clp->cl_cb_client; - if (!clnt) { - /* Callback channel broken, or client killed; give up: */ + if (!clnt || clp->cl_state == NFSD4_COURTESY) { + /* + * Callback channel broken, client killed or + * nfs4_client in courtesy state; give up. + */ nfsd41_destroy_cb(cb); return; } @@ -1426,6 +1504,12 @@ nfsd4_run_cb_work(struct work_struct *work) return; } + if (cb->cb_need_restart) { + cb->cb_need_restart = false; + } else { + if (cb->cb_ops && cb->cb_ops->prepare) + cb->cb_ops->prepare(cb); + } cb->cb_msg.rpc_cred = clp->cl_cb_cred; flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN; rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags, @@ -1441,7 +1525,6 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, cb->cb_msg.rpc_resp = cb; cb->cb_ops = ops; INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); - cb->cb_seq_status = 1; cb->cb_status = 0; cb->cb_need_restart = false; cb->cb_holds_slot = false; diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index e8a80052cb1b..fbfddd3c4c94 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -25,7 +25,7 @@ static struct kmem_cache *nfs4_layout_cache; static struct kmem_cache *nfs4_layout_stateid_cache; static const struct nfsd4_callback_ops nfsd4_cb_layout_ops; -static const struct lock_manager_operations nfsd4_layouts_lm_ops; +static const struct lease_manager_operations nfsd4_layouts_lm_ops; const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = { #ifdef CONFIG_NFSD_FLEXFILELAYOUT @@ -152,6 +152,23 @@ void nfsd4_setup_layout_type(struct svc_export *exp) #endif } +void nfsd4_close_layout(struct nfs4_layout_stateid *ls) +{ + struct nfsd_file *fl; + + spin_lock(&ls->ls_stid.sc_file->fi_lock); + fl = ls->ls_file; + ls->ls_file = NULL; + spin_unlock(&ls->ls_stid.sc_file->fi_lock); + + if (fl) { + if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls) + kernel_setlease(fl->nf_file, F_UNLCK, NULL, + (void **)&ls); + nfsd_file_put(fl); + } +} + static void nfsd4_free_layout_stateid(struct nfs4_stid *stid) { @@ -169,9 +186,7 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid) list_del_init(&ls->ls_perfile); spin_unlock(&fp->fi_lock); - if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls) - vfs_setlease(ls->ls_file->nf_file, F_UNLCK, NULL, (void **)&ls); - nfsd_file_put(ls->ls_file); + nfsd4_close_layout(ls); if (ls->ls_recalled) atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls); @@ -182,27 +197,26 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid) static int nfsd4_layout_setlease(struct nfs4_layout_stateid *ls) { - struct file_lock *fl; + struct file_lease *fl; int status; if (nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls) return 0; - fl = locks_alloc_lock(); + fl = locks_alloc_lease(); if (!fl) return -ENOMEM; - locks_init_lock(fl); + locks_init_lease(fl); fl->fl_lmops = &nfsd4_layouts_lm_ops; - fl->fl_flags = FL_LAYOUT; - fl->fl_type = F_RDLCK; - fl->fl_end = OFFSET_MAX; - fl->fl_owner = ls; - fl->fl_pid = current->tgid; - fl->fl_file = ls->ls_file->nf_file; - - status = vfs_setlease(fl->fl_file, fl->fl_type, &fl, NULL); + fl->c.flc_flags = FL_LAYOUT; + fl->c.flc_type = F_RDLCK; + fl->c.flc_owner = ls; + fl->c.flc_pid = current->tgid; + fl->c.flc_file = ls->ls_file->nf_file; + + status = kernel_setlease(fl->c.flc_file, fl->c.flc_type, &fl, NULL); if (status) { - locks_free_lock(fl); + locks_free_lease(fl); return status; } BUG_ON(fl != NULL); @@ -236,7 +250,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, nfsd4_init_cb(&ls->ls_recall, clp, &nfsd4_cb_layout_ops, NFSPROC4_CLNT_CB_LAYOUT); - if (parent->sc_type == NFS4_DELEG_STID) + if (parent->sc_type == SC_TYPE_DELEG) ls->ls_file = nfsd_file_get(fp->fi_deleg_file); else ls->ls_file = find_any_file(fp); @@ -250,7 +264,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, } spin_lock(&clp->cl_lock); - stp->sc_type = NFS4_LAYOUT_STID; + stp->sc_type = SC_TYPE_LAYOUT; list_add(&ls->ls_perclnt, &clp->cl_lo_states); spin_unlock(&clp->cl_lock); @@ -269,13 +283,13 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp, { struct nfs4_layout_stateid *ls; struct nfs4_stid *stid; - unsigned char typemask = NFS4_LAYOUT_STID; + unsigned short typemask = SC_TYPE_LAYOUT; __be32 status; if (create) - typemask |= (NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID); + typemask |= (SC_TYPE_OPEN | SC_TYPE_LOCK | SC_TYPE_DELEG); - status = nfsd4_lookup_stateid(cstate, stateid, typemask, &stid, + status = nfsd4_lookup_stateid(cstate, stateid, typemask, 0, &stid, net_generic(SVC_NET(rqstp), nfsd_net_id)); if (status) goto out; @@ -286,7 +300,7 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp, goto out_put_stid; } - if (stid->sc_type != NFS4_LAYOUT_STID) { + if (stid->sc_type != SC_TYPE_LAYOUT) { ls = nfsd4_alloc_layout_stateid(cstate, stid, layout_type); nfs4_put_stid(stid); @@ -515,11 +529,11 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp, if (!list_empty(&ls->ls_layouts)) { if (found) nfs4_inc_and_copy_stateid(&lrp->lr_sid, &ls->ls_stid); - lrp->lrs_present = 1; + lrp->lrs_present = true; } else { trace_nfsd_layoutstate_unhash(&ls->ls_stid.sc_stateid); - nfs4_unhash_stid(&ls->ls_stid); - lrp->lrs_present = 0; + ls->ls_stid.sc_status |= SC_STATUS_CLOSED; + lrp->lrs_present = false; } spin_unlock(&ls->ls_lock); @@ -539,7 +553,7 @@ nfsd4_return_client_layouts(struct svc_rqst *rqstp, struct nfs4_layout *lp, *t; LIST_HEAD(reaplist); - lrp->lrs_present = 0; + lrp->lrs_present = false; spin_lock(&clp->cl_lock); list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) { @@ -605,7 +619,7 @@ nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp) } static void -nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls) +nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls, struct nfsd_file *file) { struct nfs4_client *clp = ls->ls_stid.sc_client; char addr_str[INET6_ADDRSTRLEN]; @@ -627,7 +641,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls) argv[0] = (char *)nfsd_recall_failed; argv[1] = addr_str; - argv[2] = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_id; + argv[2] = file->nf_file->f_path.mnt->mnt_sb->s_id; argv[3] = NULL; error = call_usermodehelper(nfsd_recall_failed, argv, envp, @@ -657,6 +671,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) struct nfsd_net *nn; ktime_t now, cutoff; const struct nfsd4_layout_ops *ops; + struct nfsd_file *fl; trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task); switch (task->tk_status) { @@ -688,12 +703,17 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) * Unknown error or non-responding client, we'll need to fence. */ trace_nfsd_layout_recall_fail(&ls->ls_stid.sc_stateid); - - ops = nfsd4_layout_ops[ls->ls_layout_type]; - if (ops->fence_client) - ops->fence_client(ls); - else - nfsd4_cb_layout_fail(ls); + rcu_read_lock(); + fl = nfsd_file_get(ls->ls_file); + rcu_read_unlock(); + if (fl) { + ops = nfsd4_layout_ops[ls->ls_layout_type]; + if (ops->fence_client) + ops->fence_client(ls, fl); + else + nfsd4_cb_layout_fail(ls, fl); + nfsd_file_put(fl); + } return 1; case -NFS4ERR_NOMATCHING_LAYOUT: trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid); @@ -720,10 +740,11 @@ static const struct nfsd4_callback_ops nfsd4_cb_layout_ops = { .prepare = nfsd4_cb_layout_prepare, .done = nfsd4_cb_layout_done, .release = nfsd4_cb_layout_release, + .opcode = OP_CB_LAYOUTRECALL, }; static bool -nfsd4_layout_lm_break(struct file_lock *fl) +nfsd4_layout_lm_break(struct file_lease *fl) { /* * We don't want the locks code to timeout the lease for us; @@ -731,19 +752,19 @@ nfsd4_layout_lm_break(struct file_lock *fl) * in time: */ fl->fl_break_time = 0; - nfsd4_recall_file_layout(fl->fl_owner); + nfsd4_recall_file_layout(fl->c.flc_owner); return false; } static int -nfsd4_layout_lm_change(struct file_lock *onlist, int arg, +nfsd4_layout_lm_change(struct file_lease *onlist, int arg, struct list_head *dispose) { BUG_ON(!(arg & F_UNLCK)); return lease_modify(onlist, arg, dispose); } -static const struct lock_manager_operations nfsd4_layouts_lm_ops = { +static const struct lease_manager_operations nfsd4_layouts_lm_ops = { .lm_break = nfsd4_layout_lm_break, .lm_change = nfsd4_layout_lm_change, }; @@ -756,13 +777,11 @@ nfsd4_init_pnfs(void) for (i = 0; i < DEVID_HASH_SIZE; i++) INIT_LIST_HEAD(&nfsd_devid_hash[i]); - nfs4_layout_cache = kmem_cache_create("nfs4_layout", - sizeof(struct nfs4_layout), 0, 0, NULL); + nfs4_layout_cache = KMEM_CACHE(nfs4_layout, 0); if (!nfs4_layout_cache) return -ENOMEM; - nfs4_layout_stateid_cache = kmem_cache_create("nfs4_layout_stateid", - sizeof(struct nfs4_layout_stateid), 0, 0, NULL); + nfs4_layout_stateid_cache = KMEM_CACHE(nfs4_layout_stateid, 0); if (!nfs4_layout_stateid_cache) { kmem_cache_destroy(nfs4_layout_cache); return -ENOMEM; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index b3eca08f15b1..02c9f3b312a0 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -158,7 +158,7 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs return fh_verify(rqstp, current_fh, S_IFREG, accmode); } -static __be32 nfsd_check_obj_isreg(struct svc_fh *fh) +static __be32 nfsd_check_obj_isreg(struct svc_fh *fh, u32 minor_version) { umode_t mode = d_inode(fh->fh_dentry)->i_mode; @@ -166,14 +166,15 @@ static __be32 nfsd_check_obj_isreg(struct svc_fh *fh) return nfs_ok; if (S_ISDIR(mode)) return nfserr_isdir; - /* - * Using err_symlink as our catch-all case may look odd; but - * there's no other obvious error for this case in 4.0, and we - * happen to know that it will cause the linux v4 client to do - * the right thing on attempts to open something other than a - * regular file. - */ - return nfserr_symlink; + if (S_ISLNK(mode)) + return nfserr_symlink; + + /* RFC 7530 - 16.16.6 */ + if (minor_version == 0) + return nfserr_symlink; + else + return nfserr_wrong_type; + } static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh *resfh) @@ -322,8 +323,8 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, status = nfserr_exist; break; case NFS4_CREATE_EXCLUSIVE: - if (d_inode(child)->i_mtime.tv_sec == v_mtime && - d_inode(child)->i_atime.tv_sec == v_atime && + if (inode_get_mtime_sec(d_inode(child)) == v_mtime && + inode_get_atime_sec(d_inode(child)) == v_atime && d_inode(child)->i_size == 0) { open->op_created = true; break; /* subtle */ @@ -331,8 +332,8 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, status = nfserr_exist; break; case NFS4_CREATE_EXCLUSIVE4_1: - if (d_inode(child)->i_mtime.tv_sec == v_mtime && - d_inode(child)->i_atime.tv_sec == v_atime && + if (inode_get_mtime_sec(d_inode(child)) == v_mtime && + inode_get_atime_sec(d_inode(child)) == v_atime && d_inode(child)->i_size == 0) { open->op_created = true; goto set_attr; /* subtle */ @@ -466,7 +467,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru } if (status) goto out; - status = nfsd_check_obj_isreg(*resfh); + status = nfsd_check_obj_isreg(*resfh, cstate->minorversion); if (status) goto out; @@ -961,8 +962,11 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * To ensure proper ordering, we therefore turn off zero copy if * the client wants us to do more in this compound: */ - if (!nfsd4_last_compound_op(rqstp)) - clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + if (!nfsd4_last_compound_op(rqstp)) { + struct nfsd4_compoundargs *argp = rqstp->rq_argp; + + argp->splice_ok = false; + } /* check stateid */ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, @@ -1131,6 +1135,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, }; struct inode *inode; __be32 status = nfs_ok; + bool save_no_wcc; int err; if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { @@ -1156,8 +1161,10 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, - 0, (time64_t)0); + save_no_wcc = cstate->current_fh.fh_no_wcc; + cstate->current_fh.fh_no_wcc = true; + status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, NULL); + cstate->current_fh.fh_no_wcc = save_no_wcc; if (!status) status = nfserrno(attrs.na_labelerr); if (!status) @@ -1324,7 +1331,8 @@ extern void nfs_sb_deactive(struct super_block *sb); * setup a work entry in the ssc delayed unmount list. */ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr, - struct nfsd4_ssc_umount_item **nsui) + struct nfsd4_ssc_umount_item **nsui, + struct svc_rqst *rqstp) { struct nfsd4_ssc_umount_item *ni = NULL; struct nfsd4_ssc_umount_item *work = NULL; @@ -1346,7 +1354,7 @@ try_again: spin_unlock(&nn->nfsd_ssc_lock); /* allow 20secs for mount/unmount for now - revisit */ - if (kthread_should_stop() || + if (svc_thread_should_stop(rqstp) || (schedule_timeout(20*HZ) == 0)) { finish_wait(&nn->nfsd_ssc_waitq, &wait); kfree(work); @@ -1462,7 +1470,7 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, goto out_free_rawdata; snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep); - status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui); + status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui, rqstp); if (status) goto out_free_devname; if ((*nsui)->nsui_vfsmount) @@ -1609,7 +1617,8 @@ static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, static const struct nfsd4_callback_ops nfsd4_cb_offload_ops = { .release = nfsd4_cb_offload_release, - .done = nfsd4_cb_offload_done + .done = nfsd4_cb_offload_done, + .opcode = OP_CB_OFFLOAD, }; static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) @@ -1636,6 +1645,7 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy, if (bytes_total == 0) bytes_total = ULLONG_MAX; do { + /* Only async copies can be stopped here */ if (kthread_should_stop()) break; bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos, @@ -1723,7 +1733,7 @@ static void cleanup_async_copy(struct nfsd4_copy *copy) nfs4_put_copy(copy); } -static void nfsd4_send_cb_offload(struct nfsd4_copy *copy, __be32 nfserr) +static void nfsd4_send_cb_offload(struct nfsd4_copy *copy) { struct nfsd4_cb_offload *cbo; @@ -1733,12 +1743,12 @@ static void nfsd4_send_cb_offload(struct nfsd4_copy *copy, __be32 nfserr) memcpy(&cbo->co_res, ©->cp_res, sizeof(copy->cp_res)); memcpy(&cbo->co_fh, ©->fh, sizeof(copy->fh)); - cbo->co_nfserr = nfserr; + cbo->co_nfserr = copy->nfserr; nfsd4_init_cb(&cbo->co_cb, copy->cp_clp, &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD); trace_nfsd_cb_offload(copy->cp_clp, &cbo->co_res.cb_stateid, - &cbo->co_fh, copy->cp_count, nfserr); + &cbo->co_fh, copy->cp_count, copy->nfserr); nfsd4_run_cb(&cbo->co_cb); } @@ -1752,8 +1762,8 @@ static void nfsd4_send_cb_offload(struct nfsd4_copy *copy, __be32 nfserr) static int nfsd4_do_async_copy(void *data) { struct nfsd4_copy *copy = (struct nfsd4_copy *)data; - __be32 nfserr; + trace_nfsd_copy_async(copy); if (nfsd4_ssc_is_inter(copy)) { struct file *filp; @@ -1762,24 +1772,26 @@ static int nfsd4_do_async_copy(void *data) if (IS_ERR(filp)) { switch (PTR_ERR(filp)) { case -EBADF: - nfserr = nfserr_wrong_type; + copy->nfserr = nfserr_wrong_type; break; default: - nfserr = nfserr_offload_denied; + copy->nfserr = nfserr_offload_denied; } /* ss_mnt will be unmounted by the laundromat */ goto do_callback; } - nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file, - false); + copy->nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file, + false); nfsd4_cleanup_inter_ssc(copy->ss_nsui, filp, copy->nf_dst); } else { - nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file, - copy->nf_dst->nf_file, false); + copy->nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file, + copy->nf_dst->nf_file, false); } do_callback: - nfsd4_send_cb_offload(copy, nfserr); + set_bit(NFSD4_COPY_F_COMPLETED, ©->cp_flags); + trace_nfsd_copy_async_done(copy); + nfsd4_send_cb_offload(copy); cleanup_async_copy(copy); return 0; } @@ -1794,22 +1806,35 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd42_write_res *result; __be32 status; + /* + * Currently, async COPY is not reliable. Force all COPY + * requests to be synchronous to avoid client application + * hangs waiting for COPY completion. + */ + nfsd4_copy_set_sync(copy, true); + result = ©->cp_res; nfsd_copy_write_verifier((__be32 *)&result->wr_verifier.data, nn); copy->cp_clp = cstate->clp; if (nfsd4_ssc_is_inter(copy)) { + trace_nfsd_copy_inter(copy); if (!inter_copy_offload_enable || nfsd4_copy_is_sync(copy)) { status = nfserr_notsupp; goto out; } status = nfsd4_setup_inter_ssc(rqstp, cstate, copy); - if (status) + if (status) { + trace_nfsd_copy_done(copy, status); return nfserr_offload_denied; + } } else { + trace_nfsd_copy_intra(copy); status = nfsd4_setup_intra_ssc(rqstp, cstate, copy); - if (status) + if (status) { + trace_nfsd_copy_done(copy, status); return status; + } } memcpy(©->fh, &cstate->current_fh.fh_handle, @@ -1848,6 +1873,7 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, copy->nf_dst->nf_file, true); } out: + trace_nfsd_copy_done(copy, status); release_copy_files(copy); return status; out_err: @@ -1920,7 +1946,7 @@ nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_copy_notify *cn = &u->copy_notify; __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - struct nfs4_stid *stid; + struct nfs4_stid *stid = NULL; struct nfs4_cpntf_state *cps; struct nfs4_client *clp = cstate->clp; @@ -1929,9 +1955,11 @@ nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &stid); if (status) return status; + if (!stid) + return nfserr_bad_stateid; - cn->cpn_sec = nn->nfsd4_lease; - cn->cpn_nsec = 0; + cn->cpn_lease_time.tv_sec = nn->nfsd4_lease; + cn->cpn_lease_time.tv_nsec = 0; status = nfserrno(-ENOMEM); cps = nfs4_alloc_init_cpntf_state(nn, stid); @@ -1988,11 +2016,16 @@ nfsd4_offload_status(struct svc_rqst *rqstp, struct nfsd4_copy *copy; struct nfs4_client *clp = cstate->clp; + os->completed = false; spin_lock(&clp->async_lock); copy = find_async_copy_locked(clp, &os->stateid); - if (copy) + if (copy) { os->count = copy->cp_res.wr_bytes_written; - else + if (test_bit(NFSD4_COPY_F_COMPLETED, ©->cp_flags)) { + os->completed = true; + os->status = copy->nfserr; + } + } else status = nfserr_bad_stateid; spin_unlock(&clp->async_lock); @@ -2139,6 +2172,29 @@ nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status == nfserr_same ? nfs_ok : status; } +static __be32 +nfsd4_get_dir_delegation(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + union nfsd4_op_u *u) +{ + struct nfsd4_get_dir_delegation *gdd = &u->get_dir_delegation; + + /* + * RFC 8881, section 18.39.3 says: + * + * "The server may refuse to grant the delegation. In that case, the + * server will return NFS4ERR_DIRDELEG_UNAVAIL." + * + * This is sub-optimal, since it means that the server would need to + * abort compound processing just because the delegation wasn't + * available. RFC8881bis should change this to allow the server to + * return NFS4_OK with a non-fatal status of GDD4_UNAVAIL in this + * situation. + */ + gdd->gddrnf_status = GDD4_UNAVAIL; + return nfs_ok; +} + #ifdef CONFIG_NFSD_PNFS static const struct nfsd4_layout_ops * nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type) @@ -2181,7 +2237,9 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp, return nfserr_noent; } - exp = rqst_exp_find(rqstp, map->fsid_type, map->fsid); + exp = rqst_exp_find(&rqstp->rq_chandle, SVC_NET(rqstp), + rqstp->rq_client, rqstp->rq_gssclient, + map->fsid_type, map->fsid); if (IS_ERR(exp)) { dprintk("%s: could not find device id\n", __func__); return nfserr_noent; @@ -2349,10 +2407,10 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp, mutex_unlock(&ls->ls_mutex); if (new_size > i_size_read(inode)) { - lcp->lc_size_chg = 1; + lcp->lc_size_chg = true; lcp->lc_newsize = new_size; } else { - lcp->lc_size_chg = 0; + lcp->lc_size_chg = false; } nfserr = ops->proc_layoutcommit(inode, lcp); @@ -3068,6 +3126,18 @@ static u32 nfsd4_copy_notify_rsize(const struct svc_rqst *rqstp, * sizeof(__be32); } +static u32 nfsd4_get_dir_delegation_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) +{ + return (op_encode_hdr_size + + 1 /* gddr_status */ + + op_encode_verifier_maxsz + + op_encode_stateid_maxsz + + 2 /* gddr_notification */ + + 2 /* gddr_child_attributes */ + + 2 /* gddr_dir_attributes */); +} + #ifdef CONFIG_NFSD_PNFS static u32 nfsd4_getdeviceinfo_rsize(const struct svc_rqst *rqstp, const struct nfsd4_op *op) @@ -3202,6 +3272,7 @@ static const struct nfsd4_operation nfsd4_ops[] = { }, [OP_LOCK] = { .op_func = nfsd4_lock, + .op_release = nfsd4_lock_release, .op_flags = OP_MODIFIES_SOMETHING | OP_NONTRIVIAL_ERROR_ENCODE, .op_name = "OP_LOCK", @@ -3210,6 +3281,7 @@ static const struct nfsd4_operation nfsd4_ops[] = { }, [OP_LOCKT] = { .op_func = nfsd4_lockt, + .op_release = nfsd4_lockt_release, .op_flags = OP_NONTRIVIAL_ERROR_ENCODE, .op_name = "OP_LOCKT", .op_rsize_bop = nfsd4_lock_rsize, @@ -3454,6 +3526,12 @@ static const struct nfsd4_operation nfsd4_ops[] = { .op_get_currentstateid = nfsd4_get_freestateid, .op_rsize_bop = nfsd4_only_status_rsize, }, + [OP_GET_DIR_DELEGATION] = { + .op_func = nfsd4_get_dir_delegation, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_GET_DIR_DELEGATION", + .op_rsize_bop = nfsd4_get_dir_delegation_rsize, + }, #ifdef CONFIG_NFSD_PNFS [OP_GETDEVICEINFO] = { .op_func = nfsd4_getdeviceinfo, @@ -3580,7 +3658,8 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp) struct nfs4_op_map *allow = &cstate->clp->cl_spo_must_allow; u32 opiter; - if (!cstate->minorversion) + if (rqstp->rq_procinfo != &nfsd_version4.vs_proc[NFSPROC4_COMPOUND] || + cstate->minorversion == 0) return false; if (cstate->spo_must_allowed) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 892fecce18b8..1c8fcb04b3cd 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -66,6 +66,7 @@ struct nfsd4_client_tracking_ops { static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops; static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2; +#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING /* Globals */ static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; @@ -721,6 +722,7 @@ static const struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = { .version = 1, .msglen = 0, }; +#endif /* CONFIG_NFSD_LEGACY_CLIENT_TRACKING */ /* Globals */ #define NFSD_PIPE_DIR "nfsd" @@ -732,8 +734,10 @@ struct cld_net { spinlock_t cn_lock; struct list_head cn_list; unsigned int cn_xid; - bool cn_has_legacy; struct crypto_shash *cn_tfm; +#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING + bool cn_has_legacy; +#endif }; struct cld_upcall { @@ -794,7 +798,6 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, uint8_t cmd, princhashlen; struct xdr_netobj name, princhash = { .len = 0, .data = NULL }; uint16_t namelen; - struct cld_net *cn = nn->cld_net; if (get_user(cmd, &cmsg->cm_cmd)) { dprintk("%s: error when copying cmd from userspace", __func__); @@ -807,8 +810,8 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, ci = &cmsg->cm_u.cm_clntinfo; if (get_user(namelen, &ci->cc_name.cn_len)) return -EFAULT; - if (!namelen) { - dprintk("%s: namelen should not be zero", __func__); + if (namelen == 0 || namelen > NFS4_OPAQUE_LIMIT) { + dprintk("%s: invalid namelen (%u)", __func__, namelen); return -EINVAL; } name.data = memdup_user(&ci->cc_name.cn_id, namelen); @@ -833,8 +836,8 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, cnm = &cmsg->cm_u.cm_name; if (get_user(namelen, &cnm->cn_len)) return -EFAULT; - if (!namelen) { - dprintk("%s: namelen should not be zero", __func__); + if (namelen == 0 || namelen > NFS4_OPAQUE_LIMIT) { + dprintk("%s: invalid namelen (%u)", __func__, namelen); return -EINVAL; } name.data = memdup_user(&cnm->cn_id, namelen); @@ -842,11 +845,15 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, return PTR_ERR(name.data); name.len = namelen; } +#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) { + struct cld_net *cn = nn->cld_net; + name.len = name.len - 5; memmove(name.data, name.data + 5, name.len); cn->cn_has_legacy = true; } +#endif if (!nfs4_client_to_reclaim(name, princhash, nn)) { kfree(name.data); kfree(princhash.data); @@ -1019,7 +1026,9 @@ __nfsd4_init_cld_pipe(struct net *net) } cn->cn_pipe->dentry = dentry; +#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING cn->cn_has_legacy = false; +#endif nn->cld_net = cn; return 0; @@ -1291,10 +1300,6 @@ nfsd4_cld_check(struct nfs4_client *clp) { struct nfs4_client_reclaim *crp; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - struct cld_net *cn = nn->cld_net; - int status; - char dname[HEXDIR_LEN]; - struct xdr_netobj name; /* did we already find that this client is stable? */ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) @@ -1305,7 +1310,12 @@ nfsd4_cld_check(struct nfs4_client *clp) if (crp) goto found; - if (cn->cn_has_legacy) { +#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING + if (nn->cld_net->cn_has_legacy) { + int status; + char dname[HEXDIR_LEN]; + struct xdr_netobj name; + status = nfs4_make_rec_clidname(dname, &clp->cl_name); if (status) return -ENOENT; @@ -1323,6 +1333,7 @@ nfsd4_cld_check(struct nfs4_client *clp) goto found; } +#endif return -ENOENT; found: crp->cr_clp = clp; @@ -1336,8 +1347,6 @@ nfsd4_cld_check_v2(struct nfs4_client *clp) struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct cld_net *cn = nn->cld_net; int status; - char dname[HEXDIR_LEN]; - struct xdr_netobj name; struct crypto_shash *tfm = cn->cn_tfm; struct xdr_netobj cksum; char *principal = NULL; @@ -1351,7 +1360,11 @@ nfsd4_cld_check_v2(struct nfs4_client *clp) if (crp) goto found; +#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING if (cn->cn_has_legacy) { + struct xdr_netobj name; + char dname[HEXDIR_LEN]; + status = nfs4_make_rec_clidname(dname, &clp->cl_name); if (status) return -ENOENT; @@ -1369,6 +1382,7 @@ nfsd4_cld_check_v2(struct nfs4_client *clp) goto found; } +#endif return -ENOENT; found: if (crp->cr_princhash.len) { @@ -1672,6 +1686,7 @@ static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2 = { .msglen = sizeof(struct cld_msg_v2), }; +#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING /* upcall via usermodehelper */ static char cltrack_prog[PATH_MAX] = "/sbin/nfsdcltrack"; module_param_string(cltrack_prog, cltrack_prog, sizeof(cltrack_prog), @@ -1889,10 +1904,7 @@ nfsd4_cltrack_upcall_lock(struct nfs4_client *clp) static void nfsd4_cltrack_upcall_unlock(struct nfs4_client *clp) { - smp_mb__before_atomic(); - clear_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags); - smp_mb__after_atomic(); - wake_up_bit(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK); + clear_and_wake_up_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags); } static void @@ -2016,28 +2028,10 @@ static const struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = { .msglen = 0, }; -int -nfsd4_client_tracking_init(struct net *net) +static inline int check_for_legacy_methods(int status, struct net *net) { - int status; - struct path path; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - - /* just run the init if it the method is already decided */ - if (nn->client_tracking_ops) - goto do_init; - - /* First, try to use nfsdcld */ - nn->client_tracking_ops = &nfsd4_cld_tracking_ops; - status = nn->client_tracking_ops->init(net); - if (!status) - return status; - if (status != -ETIMEDOUT) { - nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v0; - status = nn->client_tracking_ops->init(net); - if (!status) - return status; - } + struct path path; /* * Next, try the UMH upcall. @@ -2054,20 +2048,51 @@ nfsd4_client_tracking_init(struct net *net) nn->client_tracking_ops = &nfsd4_legacy_tracking_ops; status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path); if (!status) { - status = d_is_dir(path.dentry); + status = !d_is_dir(path.dentry); path_put(&path); - if (!status) { - status = -EINVAL; - goto out; - } + if (status) + return -ENOTDIR; } + return status; +} +#else +static inline int check_for_legacy_methods(int status, struct net *net) +{ + return status; +} +#endif /* CONFIG_LEGACY_NFSD_CLIENT_TRACKING */ +int +nfsd4_client_tracking_init(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int status; + + /* just run the init if it the method is already decided */ + if (nn->client_tracking_ops) + goto do_init; + + /* First, try to use nfsdcld */ + nn->client_tracking_ops = &nfsd4_cld_tracking_ops; + status = nn->client_tracking_ops->init(net); + if (!status) + return status; + if (status != -ETIMEDOUT) { + nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v0; + status = nn->client_tracking_ops->init(net); + if (!status) + return status; + } + + status = check_for_legacy_methods(status, net); + if (status) + goto out; do_init: status = nn->client_tracking_ops->init(net); out: if (status) { - printk(KERN_WARNING "NFSD: Unable to initialize client " - "recovery tracking! (%d)\n", status); + pr_warn("NFSD: Unable to initialize client recovery tracking! (%d)\n", status); + pr_warn("NFSD: Is nfsdcld running? If not, enable CONFIG_NFSD_LEGACY_CLIENT_TRACKING.\n"); nn->client_tracking_ops = NULL; } return status; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a25cb2ff1b0b..bcb44400e243 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -59,7 +59,7 @@ #define NFSDDBG_FACILITY NFSDDBG_PROC -#define all_ones {{~0,~0},~0} +#define all_ones {{ ~0, ~0}, ~0} static const stateid_t one_stateid = { .si_generation = ~0, .si_opaque = all_ones, @@ -87,6 +87,7 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid); void nfsd4_end_grace(struct nfsd_net *nn); static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); static void nfsd4_file_hash_remove(struct nfs4_file *fi); +static void deleg_reaper(struct nfsd_net *nn); /* Locking: */ @@ -127,6 +128,7 @@ static void free_session(struct nfsd4_session *); static const struct nfsd4_callback_ops nfsd4_cb_recall_ops; static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops; +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops; static struct workqueue_struct *laundry_wq; @@ -297,7 +299,7 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, nbl = find_blocked_lock(lo, fh, nn); if (!nbl) { - nbl= kmalloc(sizeof(*nbl), GFP_KERNEL); + nbl = kmalloc(sizeof(*nbl), GFP_KERNEL); if (nbl) { INIT_LIST_HEAD(&nbl->nbl_list); INIT_LIST_HEAD(&nbl->nbl_lru); @@ -318,6 +320,7 @@ free_nbl(struct kref *kref) struct nfsd4_blocked_lock *nbl; nbl = container_of(kref, struct nfsd4_blocked_lock, nbl_kref); + locks_release_private(&nbl->nbl_lock); kfree(nbl); } @@ -325,7 +328,6 @@ static void free_blocked_lock(struct nfsd4_blocked_lock *nbl) { locks_delete_block(&nbl->nbl_lock); - locks_release_private(&nbl->nbl_lock); kref_put(&nbl->nbl_kref, free_nbl); } @@ -398,6 +400,7 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = { .prepare = nfsd4_cb_notify_lock_prepare, .done = nfsd4_cb_notify_lock_done, .release = nfsd4_cb_notify_lock_release, + .opcode = OP_CB_NOTIFY_LOCK, }; /* @@ -539,7 +542,7 @@ same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner) } static struct nfs4_openowner * -find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open, +find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, struct nfs4_client *clp) { struct nfs4_stateowner *so; @@ -556,18 +559,6 @@ find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open, return NULL; } -static struct nfs4_openowner * -find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, - struct nfs4_client *clp) -{ - struct nfs4_openowner *oo; - - spin_lock(&clp->cl_lock); - oo = find_openstateowner_str_locked(hashval, open, clp); - spin_unlock(&clp->cl_lock); - return oo; -} - static inline u32 opaque_hashval(const void *ptr, int nbytes) { @@ -1066,6 +1057,12 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) return openlockstateid(stid); } +/* + * As the sc_free callback of deleg, this may be called by nfs4_put_stid + * in nfsd_break_one_deleg. + * Considering nfsd_break_one_deleg is called with the flc->flc_lock held, + * this function mustn't ever sleep. + */ static void nfs4_free_deleg(struct nfs4_stid *stid) { struct nfs4_delegation *dp = delegstateid(stid); @@ -1160,6 +1157,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate, u32 dl_type) { struct nfs4_delegation *dp; + struct nfs4_stid *stid; long n; dprintk("NFSD alloc_init_deleg\n"); @@ -1168,9 +1166,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, goto out_dec; if (delegation_blocked(&fp->fi_fhandle)) goto out_dec; - dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg)); - if (dp == NULL) + stid = nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg); + if (stid == NULL) goto out_dec; + dp = delegstateid(stid); /* * delegation seqid's are never incremented. The 4.1 special @@ -1188,6 +1187,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, dp->dl_recalled = false; nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL); + nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client, + &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR); + dp->dl_cb_fattr.ncf_file_modified = false; + dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE; get_nfs4_file(fp); dp->dl_stid.sc_file = fp; return dp; @@ -1209,6 +1212,8 @@ nfs4_put_stid(struct nfs4_stid *s) return; } idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); + if (s->sc_status & SC_STATUS_ADMIN_REVOKED) + atomic_dec(&s->sc_client->cl_admin_revoked); nfs4_free_cpntf_statelist(clp->net, s); spin_unlock(&clp->cl_lock); s->sc_free(s); @@ -1248,7 +1253,7 @@ static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp) WARN_ON_ONCE(!fp->fi_delegees); - vfs_setlease(nf->nf_file, F_UNLCK, NULL, (void **)&dp); + kernel_setlease(nf->nf_file, F_UNLCK, NULL, (void **)&dp); put_deleg_file(fp); } @@ -1259,11 +1264,6 @@ static void destroy_unhashed_deleg(struct nfs4_delegation *dp) nfs4_put_stid(&dp->dl_stid); } -void nfs4_unhash_stid(struct nfs4_stid *s) -{ - s->sc_type = 0; -} - /** * nfs4_delegation_exists - Discover if this delegation already exists * @clp: a pointer to the nfs4_client we're granting a delegation to @@ -1311,11 +1311,12 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) lockdep_assert_held(&state_lock); lockdep_assert_held(&fp->fi_lock); + lockdep_assert_held(&clp->cl_lock); if (nfs4_delegation_exists(clp, fp)) return -EAGAIN; refcount_inc(&dp->dl_stid.sc_count); - dp->dl_stid.sc_type = NFS4_DELEG_STID; + dp->dl_stid.sc_type = SC_TYPE_DELEG; list_add(&dp->dl_perfile, &fp->fi_delegations); list_add(&dp->dl_perclnt, &clp->cl_delegations); return 0; @@ -1327,7 +1328,7 @@ static bool delegation_hashed(struct nfs4_delegation *dp) } static bool -unhash_delegation_locked(struct nfs4_delegation *dp) +unhash_delegation_locked(struct nfs4_delegation *dp, unsigned short statusmask) { struct nfs4_file *fp = dp->dl_stid.sc_file; @@ -1336,7 +1337,13 @@ unhash_delegation_locked(struct nfs4_delegation *dp) if (!delegation_hashed(dp)) return false; - dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID; + if (statusmask == SC_STATUS_REVOKED && + dp->dl_stid.sc_client->cl_minorversion == 0) + statusmask = SC_STATUS_CLOSED; + dp->dl_stid.sc_status |= statusmask; + if (statusmask & SC_STATUS_ADMIN_REVOKED) + atomic_inc(&dp->dl_stid.sc_client->cl_admin_revoked); + /* Ensure that deleg break won't try to requeue it */ ++dp->dl_time; spin_lock(&fp->fi_lock); @@ -1352,32 +1359,58 @@ static void destroy_delegation(struct nfs4_delegation *dp) bool unhashed; spin_lock(&state_lock); - unhashed = unhash_delegation_locked(dp); + unhashed = unhash_delegation_locked(dp, SC_STATUS_CLOSED); spin_unlock(&state_lock); if (unhashed) destroy_unhashed_deleg(dp); } +/** + * revoke_delegation - perform nfs4 delegation structure cleanup + * @dp: pointer to the delegation + * + * This function assumes that it's called either from the administrative + * interface (nfsd4_revoke_states()) that's revoking a specific delegation + * stateid or it's called from a laundromat thread (nfsd4_landromat()) that + * determined that this specific state has expired and needs to be revoked + * (both mark state with the appropriate stid sc_status mode). It is also + * assumed that a reference was taken on the @dp state. + * + * If this function finds that the @dp state is SC_STATUS_FREED it means + * that a FREE_STATEID operation for this stateid has been processed and + * we can proceed to removing it from recalled list. However, if @dp state + * isn't marked SC_STATUS_FREED, it means we need place it on the cl_revoked + * list and wait for the FREE_STATEID to arrive from the client. At the same + * time, we need to mark it as SC_STATUS_FREEABLE to indicate to the + * nfsd4_free_stateid() function that this stateid has already been added + * to the cl_revoked list and that nfsd4_free_stateid() is now responsible + * for removing it from the list. Inspection of where the delegation state + * in the revocation process is protected by the clp->cl_lock. + */ static void revoke_delegation(struct nfs4_delegation *dp) { struct nfs4_client *clp = dp->dl_stid.sc_client; WARN_ON(!list_empty(&dp->dl_recall_lru)); + WARN_ON_ONCE(!(dp->dl_stid.sc_status & + (SC_STATUS_REVOKED | SC_STATUS_ADMIN_REVOKED))); trace_nfsd_stid_revoke(&dp->dl_stid); - if (clp->cl_minorversion) { - spin_lock(&clp->cl_lock); - dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; - refcount_inc(&dp->dl_stid.sc_count); - list_add(&dp->dl_recall_lru, &clp->cl_revoked); - spin_unlock(&clp->cl_lock); + spin_lock(&clp->cl_lock); + if (dp->dl_stid.sc_status & SC_STATUS_FREED) { + list_del_init(&dp->dl_recall_lru); + goto out; } + list_add(&dp->dl_recall_lru, &clp->cl_revoked); + dp->dl_stid.sc_status |= SC_STATUS_FREEABLE; +out: + spin_unlock(&clp->cl_lock); destroy_unhashed_deleg(dp); } -/* - * SETCLIENTID state +/* + * SETCLIENTID state */ static unsigned int clientid_hashval(u32 id) @@ -1398,11 +1431,16 @@ static void recalculate_deny_mode(struct nfs4_file *fp) { struct nfs4_ol_stateid *stp; + u32 old_deny; spin_lock(&fp->fi_lock); + old_deny = fp->fi_share_deny; fp->fi_share_deny = 0; - list_for_each_entry(stp, &fp->fi_stateids, st_perfile) + list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { fp->fi_share_deny |= bmap_to_share_mode(stp->st_deny_bmap); + if (fp->fi_share_deny == old_deny) + break; + } spin_unlock(&fp->fi_lock); } @@ -1530,6 +1568,8 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp, } idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); + if (s->sc_status & SC_STATUS_ADMIN_REVOKED) + atomic_dec(&s->sc_client->cl_admin_revoked); list_add(&stp->st_locks, reaplist); } @@ -1540,7 +1580,7 @@ static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp) if (!unhash_ol_stateid(stp)) return false; list_del_init(&stp->st_locks); - nfs4_unhash_stid(&stp->st_stid); + stp->st_stid.sc_status |= SC_STATUS_CLOSED; return true; } @@ -1598,7 +1638,7 @@ static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp, while (!list_empty(&open_stp->st_locks)) { stp = list_entry(open_stp->st_locks.next, struct nfs4_ol_stateid, st_locks); - WARN_ON(!unhash_lock_stateid(stp)); + unhash_lock_stateid(stp); put_ol_stateid_locked(stp, reaplist); } } @@ -1619,6 +1659,7 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp) LIST_HEAD(reaplist); spin_lock(&stp->st_stid.sc_client->cl_lock); + stp->st_stid.sc_status |= SC_STATUS_CLOSED; if (unhash_open_stateid(stp, &reaplist)) put_ol_stateid_locked(stp, &reaplist); spin_unlock(&stp->st_stid.sc_client->cl_lock); @@ -1664,9 +1705,7 @@ static void release_openowner(struct nfs4_openowner *oo) { struct nfs4_ol_stateid *stp; struct nfs4_client *clp = oo->oo_owner.so_client; - struct list_head reaplist; - - INIT_LIST_HEAD(&reaplist); + LIST_HEAD(reaplist); spin_lock(&clp->cl_lock); unhash_openowner_locked(oo); @@ -1682,6 +1721,137 @@ static void release_openowner(struct nfs4_openowner *oo) nfs4_put_stateowner(&oo->oo_owner); } +static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp, + struct super_block *sb, + unsigned int sc_types) +{ + unsigned long id, tmp; + struct nfs4_stid *stid; + + spin_lock(&clp->cl_lock); + idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id) + if ((stid->sc_type & sc_types) && + stid->sc_status == 0 && + stid->sc_file->fi_inode->i_sb == sb) { + refcount_inc(&stid->sc_count); + break; + } + spin_unlock(&clp->cl_lock); + return stid; +} + +/** + * nfsd4_revoke_states - revoke all nfsv4 states associated with given filesystem + * @net: used to identify instance of nfsd (there is one per net namespace) + * @sb: super_block used to identify target filesystem + * + * All nfs4 states (open, lock, delegation, layout) held by the server instance + * and associated with a file on the given filesystem will be revoked resulting + * in any files being closed and so all references from nfsd to the filesystem + * being released. Thus nfsd will no longer prevent the filesystem from being + * unmounted. + * + * The clients which own the states will subsequently being notified that the + * states have been "admin-revoked". + */ +void nfsd4_revoke_states(struct net *net, struct super_block *sb) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + unsigned int idhashval; + unsigned int sc_types; + + sc_types = SC_TYPE_OPEN | SC_TYPE_LOCK | SC_TYPE_DELEG | SC_TYPE_LAYOUT; + + spin_lock(&nn->client_lock); + for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) { + struct list_head *head = &nn->conf_id_hashtbl[idhashval]; + struct nfs4_client *clp; + retry: + list_for_each_entry(clp, head, cl_idhash) { + struct nfs4_stid *stid = find_one_sb_stid(clp, sb, + sc_types); + if (stid) { + struct nfs4_ol_stateid *stp; + struct nfs4_delegation *dp; + struct nfs4_layout_stateid *ls; + + spin_unlock(&nn->client_lock); + switch (stid->sc_type) { + case SC_TYPE_OPEN: + stp = openlockstateid(stid); + mutex_lock_nested(&stp->st_mutex, + OPEN_STATEID_MUTEX); + + spin_lock(&clp->cl_lock); + if (stid->sc_status == 0) { + stid->sc_status |= + SC_STATUS_ADMIN_REVOKED; + atomic_inc(&clp->cl_admin_revoked); + spin_unlock(&clp->cl_lock); + release_all_access(stp); + } else + spin_unlock(&clp->cl_lock); + mutex_unlock(&stp->st_mutex); + break; + case SC_TYPE_LOCK: + stp = openlockstateid(stid); + mutex_lock_nested(&stp->st_mutex, + LOCK_STATEID_MUTEX); + spin_lock(&clp->cl_lock); + if (stid->sc_status == 0) { + struct nfs4_lockowner *lo = + lockowner(stp->st_stateowner); + struct nfsd_file *nf; + + stid->sc_status |= + SC_STATUS_ADMIN_REVOKED; + atomic_inc(&clp->cl_admin_revoked); + spin_unlock(&clp->cl_lock); + nf = find_any_file(stp->st_stid.sc_file); + if (nf) { + get_file(nf->nf_file); + filp_close(nf->nf_file, + (fl_owner_t)lo); + nfsd_file_put(nf); + } + release_all_access(stp); + } else + spin_unlock(&clp->cl_lock); + mutex_unlock(&stp->st_mutex); + break; + case SC_TYPE_DELEG: + refcount_inc(&stid->sc_count); + dp = delegstateid(stid); + spin_lock(&state_lock); + if (!unhash_delegation_locked( + dp, SC_STATUS_ADMIN_REVOKED)) + dp = NULL; + spin_unlock(&state_lock); + if (dp) + revoke_delegation(dp); + break; + case SC_TYPE_LAYOUT: + ls = layoutstateid(stid); + nfsd4_close_layout(ls); + break; + } + nfs4_put_stid(stid); + spin_lock(&nn->client_lock); + if (clp->cl_minorversion == 0) + /* Allow cleanup after a lease period. + * store_release ensures cleanup will + * see any newly revoked states if it + * sees the time updated. + */ + nn->nfs40_last_revoke = + ktime_get_boottime_seconds(); + goto retry; + } + } + } + spin_unlock(&nn->client_lock); +} + static inline int hash_sessionid(struct nfs4_sessionid *sessionid) { @@ -2109,6 +2279,10 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name, GFP_KERNEL); if (!clp->cl_ownerstr_hashtbl) goto err_no_hashtbl; + clp->cl_callback_wq = alloc_ordered_workqueue("nfsd4_callbacks", 0); + if (!clp->cl_callback_wq) + goto err_no_callback_wq; + for (i = 0; i < OWNER_HASH_SIZE; i++) INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]); INIT_LIST_HEAD(&clp->cl_sessions); @@ -2131,6 +2305,8 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name, spin_lock_init(&clp->cl_lock); rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); return clp; +err_no_callback_wq: + kfree(clp->cl_ownerstr_hashtbl); err_no_hashtbl: kfree(clp->cl_name.data); err_no_name: @@ -2144,6 +2320,7 @@ static void __free_client(struct kref *k) struct nfs4_client *clp = container_of(c, struct nfs4_client, cl_nfsdfs); free_svc_cred(&clp->cl_cred); + destroy_workqueue(clp->cl_callback_wq); kfree(clp->cl_ownerstr_hashtbl); kfree(clp->cl_name.data); kfree(clp->cl_nii_domain.data); @@ -2216,7 +2393,11 @@ unhash_client(struct nfs4_client *clp) static __be32 mark_client_expired_locked(struct nfs4_client *clp) { - if (atomic_read(&clp->cl_rpc_users)) + int users = atomic_read(&clp->cl_rpc_users); + + trace_nfsd_mark_client_expired(clp, users); + + if (users) return nfserr_jukebox; unhash_client_locked(clp); return nfs_ok; @@ -2229,13 +2410,12 @@ __destroy_client(struct nfs4_client *clp) int i; struct nfs4_openowner *oo; struct nfs4_delegation *dp; - struct list_head reaplist; + LIST_HEAD(reaplist); - INIT_LIST_HEAD(&reaplist); spin_lock(&state_lock); while (!list_empty(&clp->cl_delegations)) { dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); - WARN_ON(!unhash_delegation_locked(dp)); + unhash_delegation_locked(dp, SC_STATUS_CLOSED); list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); @@ -2467,14 +2647,16 @@ find_stateid_locked(struct nfs4_client *cl, stateid_t *t) } static struct nfs4_stid * -find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) +find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, + unsigned short typemask, unsigned short ok_states) { struct nfs4_stid *s; spin_lock(&cl->cl_lock); s = find_stateid_locked(cl, t); if (s != NULL) { - if (typemask & s->sc_type) + if ((s->sc_status & ~ok_states) == 0 && + (typemask & s->sc_type)) refcount_inc(&s->sc_count); else s = NULL; @@ -2494,9 +2676,9 @@ static struct nfs4_client *get_nfsdfs_clp(struct inode *inode) static void seq_quote_mem(struct seq_file *m, char *data, int len) { - seq_printf(m, "\""); + seq_puts(m, "\""); seq_escape_mem(m, data, len, ESCAPE_HEX | ESCAPE_NAP | ESCAPE_APPEND, "\"\\"); - seq_printf(m, "\""); + seq_puts(m, "\""); } static const char *cb_state2str(int state) @@ -2537,20 +2719,22 @@ static int client_info_show(struct seq_file *m, void *v) seq_puts(m, "status: unconfirmed\n"); seq_printf(m, "seconds from last renew: %lld\n", ktime_get_boottime_seconds() - clp->cl_time); - seq_printf(m, "name: "); + seq_puts(m, "name: "); seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len); seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion); if (clp->cl_nii_domain.data) { - seq_printf(m, "Implementation domain: "); + seq_puts(m, "Implementation domain: "); seq_quote_mem(m, clp->cl_nii_domain.data, clp->cl_nii_domain.len); - seq_printf(m, "\nImplementation name: "); + seq_puts(m, "\nImplementation name: "); seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len); seq_printf(m, "\nImplementation time: [%lld, %ld]\n", clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec); } seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state)); - seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr); + seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr); + seq_printf(m, "admin-revoked states: %d\n", + atomic_read(&clp->cl_admin_revoked)); drop_client(clp); return 0; @@ -2609,7 +2793,7 @@ static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f) static void nfs4_show_owner(struct seq_file *s, struct nfs4_stateowner *oo) { - seq_printf(s, "owner: "); + seq_puts(s, "owner: "); seq_quote_mem(s, oo->so_owner.data, oo->so_owner.len); } @@ -2627,20 +2811,13 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) struct nfs4_stateowner *oo; unsigned int access, deny; - if (st->sc_type != NFS4_OPEN_STID && st->sc_type != NFS4_LOCK_STID) - return 0; /* XXX: or SEQ_SKIP? */ ols = openlockstateid(st); oo = ols->st_stateowner; nf = st->sc_file; - spin_lock(&nf->fi_lock); - file = find_any_file_locked(nf); - if (!file) - goto out; - - seq_printf(s, "- "); + seq_puts(s, "- "); nfs4_show_stateid(s, &st->sc_stateid); - seq_printf(s, ": { type: open, "); + seq_puts(s, ": { type: open, "); access = bmap_to_share_mode(ols->st_access_bmap); deny = bmap_to_share_mode(ols->st_deny_bmap); @@ -2652,14 +2829,22 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) deny & NFS4_SHARE_ACCESS_READ ? "r" : "-", deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-"); - nfs4_show_superblock(s, file); - seq_printf(s, ", "); - nfs4_show_fname(s, file); - seq_printf(s, ", "); + if (nf) { + spin_lock(&nf->fi_lock); + file = find_any_file_locked(nf); + if (file) { + nfs4_show_superblock(s, file); + seq_puts(s, ", "); + nfs4_show_fname(s, file); + seq_puts(s, ", "); + } + spin_unlock(&nf->fi_lock); + } else + seq_puts(s, "closed, "); nfs4_show_owner(s, oo); - seq_printf(s, " }\n"); -out: - spin_unlock(&nf->fi_lock); + if (st->sc_status & SC_STATUS_ADMIN_REVOKED) + seq_puts(s, ", admin-revoked"); + seq_puts(s, " }\n"); return 0; } @@ -2673,30 +2858,31 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st) ols = openlockstateid(st); oo = ols->st_stateowner; nf = st->sc_file; - spin_lock(&nf->fi_lock); - file = find_any_file_locked(nf); - if (!file) - goto out; - seq_printf(s, "- "); + seq_puts(s, "- "); nfs4_show_stateid(s, &st->sc_stateid); - seq_printf(s, ": { type: lock, "); + seq_puts(s, ": { type: lock, "); - /* - * Note: a lock stateid isn't really the same thing as a lock, - * it's the locking state held by one owner on a file, and there - * may be multiple (or no) lock ranges associated with it. - * (Same for the matter is true of open stateids.) - */ + spin_lock(&nf->fi_lock); + file = find_any_file_locked(nf); + if (file) { + /* + * Note: a lock stateid isn't really the same thing as a lock, + * it's the locking state held by one owner on a file, and there + * may be multiple (or no) lock ranges associated with it. + * (Same for the matter is true of open stateids.) + */ - nfs4_show_superblock(s, file); - /* XXX: open stateid? */ - seq_printf(s, ", "); - nfs4_show_fname(s, file); - seq_printf(s, ", "); + nfs4_show_superblock(s, file); + /* XXX: open stateid? */ + seq_puts(s, ", "); + nfs4_show_fname(s, file); + seq_puts(s, ", "); + } nfs4_show_owner(s, oo); - seq_printf(s, " }\n"); -out: + if (st->sc_status & SC_STATUS_ADMIN_REVOKED) + seq_puts(s, ", admin-revoked"); + seq_puts(s, " }\n"); spin_unlock(&nf->fi_lock); return 0; } @@ -2709,27 +2895,28 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) ds = delegstateid(st); nf = st->sc_file; - spin_lock(&nf->fi_lock); - file = nf->fi_deleg_file; - if (!file) - goto out; - seq_printf(s, "- "); + seq_puts(s, "- "); nfs4_show_stateid(s, &st->sc_stateid); - seq_printf(s, ": { type: deleg, "); + seq_puts(s, ": { type: deleg, "); - /* Kinda dead code as long as we only support read delegs: */ - seq_printf(s, "access: %s, ", - ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w"); + seq_printf(s, "access: %s", + ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w"); /* XXX: lease time, whether it's being recalled. */ - nfs4_show_superblock(s, file); - seq_printf(s, ", "); - nfs4_show_fname(s, file); - seq_printf(s, " }\n"); -out: + spin_lock(&nf->fi_lock); + file = nf->fi_deleg_file; + if (file) { + seq_puts(s, ", "); + nfs4_show_superblock(s, file); + seq_puts(s, ", "); + nfs4_show_fname(s, file); + } spin_unlock(&nf->fi_lock); + if (st->sc_status & SC_STATUS_ADMIN_REVOKED) + seq_puts(s, ", admin-revoked"); + seq_puts(s, " }\n"); return 0; } @@ -2739,18 +2926,25 @@ static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st) struct nfsd_file *file; ls = container_of(st, struct nfs4_layout_stateid, ls_stid); - file = ls->ls_file; - seq_printf(s, "- "); + seq_puts(s, "- "); nfs4_show_stateid(s, &st->sc_stateid); - seq_printf(s, ": { type: layout, "); + seq_puts(s, ": { type: layout"); /* XXX: What else would be useful? */ - nfs4_show_superblock(s, file); - seq_printf(s, ", "); - nfs4_show_fname(s, file); - seq_printf(s, " }\n"); + spin_lock(&ls->ls_stid.sc_file->fi_lock); + file = ls->ls_file; + if (file) { + seq_puts(s, ", "); + nfs4_show_superblock(s, file); + seq_puts(s, ", "); + nfs4_show_fname(s, file); + } + spin_unlock(&ls->ls_stid.sc_file->fi_lock); + if (st->sc_status & SC_STATUS_ADMIN_REVOKED) + seq_puts(s, ", admin-revoked"); + seq_puts(s, " }\n"); return 0; } @@ -2760,13 +2954,13 @@ static int states_show(struct seq_file *s, void *v) struct nfs4_stid *st = v; switch (st->sc_type) { - case NFS4_OPEN_STID: + case SC_TYPE_OPEN: return nfs4_show_open(s, st); - case NFS4_LOCK_STID: + case SC_TYPE_LOCK: return nfs4_show_lock(s, st); - case NFS4_DELEG_STID: + case SC_TYPE_DELEG: return nfs4_show_deleg(s, st); - case NFS4_LAYOUT_STID: + case SC_TYPE_LAYOUT: return nfs4_show_layout(s, st); default: return 0; /* XXX: or SEQ_SKIP? */ @@ -2900,11 +3094,63 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) drop_client(clp); } +static int +nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task) +{ + struct nfs4_cb_fattr *ncf = + container_of(cb, struct nfs4_cb_fattr, ncf_getattr); + struct nfs4_delegation *dp = + container_of(ncf, struct nfs4_delegation, dl_cb_fattr); + + trace_nfsd_cb_getattr_done(&dp->dl_stid.sc_stateid, task); + ncf->ncf_cb_status = task->tk_status; + switch (task->tk_status) { + case -NFS4ERR_DELAY: + rpc_delay(task, 2 * HZ); + return 0; + default: + return 1; + } +} + +static void +nfsd4_cb_getattr_release(struct nfsd4_callback *cb) +{ + struct nfs4_cb_fattr *ncf = + container_of(cb, struct nfs4_cb_fattr, ncf_getattr); + struct nfs4_delegation *dp = + container_of(ncf, struct nfs4_delegation, dl_cb_fattr); + + clear_and_wake_up_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags); + nfs4_put_stid(&dp->dl_stid); +} + static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = { .done = nfsd4_cb_recall_any_done, .release = nfsd4_cb_recall_any_release, + .opcode = OP_CB_RECALL_ANY, +}; + +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = { + .done = nfsd4_cb_getattr_done, + .release = nfsd4_cb_getattr_release, + .opcode = OP_CB_GETATTR, }; +static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf) +{ + struct nfs4_delegation *dp = + container_of(ncf, struct nfs4_delegation, dl_cb_fattr); + + if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags)) + return; + /* set to proper status when nfsd4_cb_getattr_done runs */ + ncf->ncf_cb_status = NFS4ERR_IO; + + refcount_inc(&dp->dl_stid.sc_count); + nfsd4_run_cb(&ncf->ncf_getattr); +} + static struct nfs4_client *create_client(struct xdr_netobj name, struct svc_rqst *rqstp, nfs4_verifier *verf) { @@ -3418,6 +3664,9 @@ out_new: new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0]; new->cl_spo_must_allow.u.words[1] = exid->spo_must_allow[1]; + /* Contrived initial CREATE_SESSION response */ + new->cl_cs_slot.sl_status = nfserr_seq_misordered; + add_to_unconfirmed(new); swap(new, conf); out_copy: @@ -3443,12 +3692,8 @@ out_nolock: return status; } -static __be32 -check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse) { - dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid, - slot_seqid); - /* The slot is in use, and no response has been sent. */ if (slot_inuse) { if (seqid == slot_seqid) @@ -3588,10 +3833,10 @@ nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_create_session *cr_ses = &u->create_session; struct sockaddr *sa = svc_addr(rqstp); struct nfs4_client *conf, *unconf; + struct nfsd4_clid_slot *cs_slot; struct nfs4_client *old = NULL; struct nfsd4_session *new; struct nfsd4_conn *conn; - struct nfsd4_clid_slot *cs_slot = NULL; __be32 status = 0; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); @@ -3615,53 +3860,66 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out_free_session; spin_lock(&nn->client_lock); + + /* RFC 8881 Section 18.36.4 Phase 1: Client record look-up. */ unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn); conf = find_confirmed_client(&cr_ses->clientid, true, nn); - WARN_ON_ONCE(conf && unconf); + if (!conf && !unconf) { + status = nfserr_stale_clientid; + goto out_free_conn; + } + + /* RFC 8881 Section 18.36.4 Phase 2: Sequence ID processing. */ + if (conf) { + cs_slot = &conf->cl_cs_slot; + trace_nfsd_slot_seqid_conf(conf, cr_ses); + } else { + cs_slot = &unconf->cl_cs_slot; + trace_nfsd_slot_seqid_unconf(unconf, cr_ses); + } + status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); + switch (status) { + case nfs_ok: + cs_slot->sl_seqid++; + cr_ses->seqid = cs_slot->sl_seqid; + break; + case nfserr_replay_cache: + status = nfsd4_replay_create_session(cr_ses, cs_slot); + fallthrough; + case nfserr_jukebox: + /* The server MUST NOT cache NFS4ERR_DELAY */ + goto out_free_conn; + default: + goto out_cache_error; + } + /* RFC 8881 Section 18.36.4 Phase 3: Client ID confirmation. */ if (conf) { status = nfserr_wrong_cred; if (!nfsd4_mach_creds_match(conf, rqstp)) - goto out_free_conn; - cs_slot = &conf->cl_cs_slot; - status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); - if (status) { - if (status == nfserr_replay_cache) - status = nfsd4_replay_create_session(cr_ses, cs_slot); - goto out_free_conn; - } - } else if (unconf) { + goto out_cache_error; + } else { status = nfserr_clid_inuse; if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { trace_nfsd_clid_cred_mismatch(unconf, rqstp); - goto out_free_conn; + goto out_cache_error; } status = nfserr_wrong_cred; if (!nfsd4_mach_creds_match(unconf, rqstp)) - goto out_free_conn; - cs_slot = &unconf->cl_cs_slot; - status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); - if (status) { - /* an unconfirmed replay returns misordered */ - status = nfserr_seq_misordered; - goto out_free_conn; - } + goto out_cache_error; old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { status = mark_client_expired_locked(old); - if (status) { - old = NULL; - goto out_free_conn; - } + if (status) + goto out_expired_error; trace_nfsd_clid_replaced(&old->cl_clientid); } move_to_confirmed(unconf); conf = unconf; - } else { - status = nfserr_stale_clientid; - goto out_free_conn; } + + /* RFC 8881 Section 18.36.4 Phase 4: Session creation. */ status = nfs_ok; /* Persistent sessions are not supported */ cr_ses->flags &= ~SESSION4_PERSIST; @@ -3673,8 +3931,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, memcpy(cr_ses->sessionid.data, new->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); - cs_slot->sl_seqid++; - cr_ses->seqid = cs_slot->sl_seqid; /* cache solo and embedded create sessions under the client_lock */ nfsd4_cache_create_session(cr_ses, cs_slot, status); @@ -3687,6 +3943,20 @@ nfsd4_create_session(struct svc_rqst *rqstp, if (old) expire_client(old); return status; + +out_expired_error: + old = NULL; + /* + * Revert the slot seq_nr change so the server will process + * the client's resend instead of returning a cached response. + */ + if (status == nfserr_jukebox) { + cs_slot->sl_seqid--; + cr_ses->seqid = cs_slot->sl_seqid; + goto out_free_conn; + } +out_cache_error: + nfsd4_cache_create_session(cr_ses, cs_slot, status); out_free_conn: spin_unlock(&nn->client_lock); free_conn(conn); @@ -4001,6 +4271,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * sr_highest_slotid and the sr_target_slot id to maxslots */ seq->maxslots = session->se_fchannel.maxreqs; + trace_nfsd_slot_seqid_sequence(clp, seq, slot); status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags & NFSD4_SLOT_INUSE); if (status == nfserr_replay_cache) { @@ -4062,6 +4333,9 @@ out: } if (!list_empty(&clp->cl_revoked)) seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED; + if (atomic_read(&clp->cl_admin_revoked)) + seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED; + trace_nfsd_seq4_status(rqstp, seq); out_no_session: if (conn) free_conn(conn); @@ -4276,10 +4550,16 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, } status = nfs_ok; if (conf) { - old = unconf; - unhash_client_locked(old); - nfsd4_change_callback(conf, &unconf->cl_cb_conn); - } else { + if (get_client_locked(conf) == nfs_ok) { + old = unconf; + unhash_client_locked(old); + nfsd4_change_callback(conf, &unconf->cl_cb_conn); + } else { + conf = NULL; + } + } + + if (!conf) { old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { status = nfserr_clid_inuse; @@ -4296,10 +4576,14 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, } trace_nfsd_clid_replaced(&old->cl_clientid); } + status = get_client_locked(unconf); + if (status != nfs_ok) { + old = NULL; + goto out; + } move_to_confirmed(unconf); conf = unconf; } - get_client_locked(conf); spin_unlock(&nn->client_lock); if (conf == unconf) fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY); @@ -4356,32 +4640,25 @@ nfsd4_free_slabs(void) int nfsd4_init_slabs(void) { - client_slab = kmem_cache_create("nfsd4_clients", - sizeof(struct nfs4_client), 0, 0, NULL); + client_slab = KMEM_CACHE(nfs4_client, 0); if (client_slab == NULL) goto out; - openowner_slab = kmem_cache_create("nfsd4_openowners", - sizeof(struct nfs4_openowner), 0, 0, NULL); + openowner_slab = KMEM_CACHE(nfs4_openowner, 0); if (openowner_slab == NULL) goto out_free_client_slab; - lockowner_slab = kmem_cache_create("nfsd4_lockowners", - sizeof(struct nfs4_lockowner), 0, 0, NULL); + lockowner_slab = KMEM_CACHE(nfs4_lockowner, 0); if (lockowner_slab == NULL) goto out_free_openowner_slab; - file_slab = kmem_cache_create("nfsd4_files", - sizeof(struct nfs4_file), 0, 0, NULL); + file_slab = KMEM_CACHE(nfs4_file, 0); if (file_slab == NULL) goto out_free_lockowner_slab; - stateid_slab = kmem_cache_create("nfsd4_stateids", - sizeof(struct nfs4_ol_stateid), 0, 0, NULL); + stateid_slab = KMEM_CACHE(nfs4_ol_stateid, 0); if (stateid_slab == NULL) goto out_free_file_slab; - deleg_slab = kmem_cache_create("nfsd4_delegations", - sizeof(struct nfs4_delegation), 0, 0, NULL); + deleg_slab = KMEM_CACHE(nfs4_delegation, 0); if (deleg_slab == NULL) goto out_free_stateid_slab; - odstate_slab = kmem_cache_create("nfsd4_odstate", - sizeof(struct nfs4_clnt_odstate), 0, 0, NULL); + odstate_slab = KMEM_CACHE(nfs4_clnt_odstate, 0); if (odstate_slab == NULL) goto out_free_deleg_slab; return 0; @@ -4406,8 +4683,7 @@ static unsigned long nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { int count; - struct nfsd_net *nn = container_of(shrink, - struct nfsd_net, nfsd_client_shrinker); + struct nfsd_net *nn = shrink->private_data; count = atomic_read(&nn->nfsd_courtesy_clients); if (!count) @@ -4447,21 +4723,32 @@ nfsd4_init_leases_net(struct nfsd_net *nn) atomic_set(&nn->nfsd_courtesy_clients, 0); } +enum rp_lock { + RP_UNLOCKED, + RP_LOCKED, + RP_UNHASHED, +}; + static void init_nfs4_replay(struct nfs4_replay *rp) { rp->rp_status = nfserr_serverfault; rp->rp_buflen = 0; rp->rp_buf = rp->rp_ibuf; - mutex_init(&rp->rp_mutex); + atomic_set(&rp->rp_locked, RP_UNLOCKED); } -static void nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate, - struct nfs4_stateowner *so) +static int nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate, + struct nfs4_stateowner *so) { if (!nfsd4_has_session(cstate)) { - mutex_lock(&so->so_replay.rp_mutex); + wait_var_event(&so->so_replay.rp_locked, + atomic_cmpxchg(&so->so_replay.rp_locked, + RP_UNLOCKED, RP_LOCKED) != RP_LOCKED); + if (atomic_read(&so->so_replay.rp_locked) == RP_UNHASHED) + return -EAGAIN; cstate->replay_owner = nfs4_get_stateowner(so); } + return 0; } void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate) @@ -4470,7 +4757,9 @@ void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate) if (so != NULL) { cstate->replay_owner = NULL; - mutex_unlock(&so->so_replay.rp_mutex); + atomic_set(&so->so_replay.rp_locked, RP_UNLOCKED); + smp_mb__after_atomic(); + wake_up_var(&so->so_replay.rp_locked); nfs4_put_stateowner(so); } } @@ -4536,7 +4825,8 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) continue; if (local->st_stateowner != &oo->oo_owner) continue; - if (local->st_stid.sc_type == NFS4_OPEN_STID) { + if (local->st_stid.sc_type == SC_TYPE_OPEN && + !local->st_stid.sc_status) { ret = local; refcount_inc(&ret->st_stid.sc_count); break; @@ -4545,22 +4835,75 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) return ret; } -static __be32 -nfsd4_verify_open_stid(struct nfs4_stid *s) +static void nfsd4_drop_revoked_stid(struct nfs4_stid *s) + __releases(&s->sc_client->cl_lock) { - __be32 ret = nfs_ok; + struct nfs4_client *cl = s->sc_client; + LIST_HEAD(reaplist); + struct nfs4_ol_stateid *stp; + struct nfs4_delegation *dp; + bool unhashed; switch (s->sc_type) { - default: + case SC_TYPE_OPEN: + stp = openlockstateid(s); + if (unhash_open_stateid(stp, &reaplist)) + put_ol_stateid_locked(stp, &reaplist); + spin_unlock(&cl->cl_lock); + free_ol_stateid_reaplist(&reaplist); break; - case 0: - case NFS4_CLOSED_STID: - case NFS4_CLOSED_DELEG_STID: - ret = nfserr_bad_stateid; + case SC_TYPE_LOCK: + stp = openlockstateid(s); + unhashed = unhash_lock_stateid(stp); + spin_unlock(&cl->cl_lock); + if (unhashed) + nfs4_put_stid(s); break; - case NFS4_REVOKED_DELEG_STID: - ret = nfserr_deleg_revoked; + case SC_TYPE_DELEG: + dp = delegstateid(s); + list_del_init(&dp->dl_recall_lru); + spin_unlock(&cl->cl_lock); + nfs4_put_stid(s); + break; + default: + spin_unlock(&cl->cl_lock); } +} + +static void nfsd40_drop_revoked_stid(struct nfs4_client *cl, + stateid_t *stid) +{ + /* NFSv4.0 has no way for the client to tell the server + * that it can forget an admin-revoked stateid. + * So we keep it around until the first time that the + * client uses it, and drop it the first time + * nfserr_admin_revoked is returned. + * For v4.1 and later we wait until explicitly told + * to free the stateid. + */ + if (cl->cl_minorversion == 0) { + struct nfs4_stid *st; + + spin_lock(&cl->cl_lock); + st = find_stateid_locked(cl, stid); + if (st) + nfsd4_drop_revoked_stid(st); + else + spin_unlock(&cl->cl_lock); + } +} + +static __be32 +nfsd4_verify_open_stid(struct nfs4_stid *s) +{ + __be32 ret = nfs_ok; + + if (s->sc_status & SC_STATUS_ADMIN_REVOKED) + ret = nfserr_admin_revoked; + else if (s->sc_status & SC_STATUS_REVOKED) + ret = nfserr_deleg_revoked; + else if (s->sc_status & SC_STATUS_CLOSED) + ret = nfserr_bad_stateid; return ret; } @@ -4572,6 +4915,10 @@ nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp) mutex_lock_nested(&stp->st_mutex, LOCK_STATEID_MUTEX); ret = nfsd4_verify_open_stid(&stp->st_stid); + if (ret == nfserr_admin_revoked) + nfsd40_drop_revoked_stid(stp->st_stid.sc_client, + &stp->st_stid.sc_stateid); + if (ret != nfs_ok) mutex_unlock(&stp->st_mutex); return ret; @@ -4593,34 +4940,46 @@ nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) } static struct nfs4_openowner * -alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, - struct nfsd4_compound_state *cstate) +find_or_alloc_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, + struct nfsd4_compound_state *cstate) { struct nfs4_client *clp = cstate->clp; - struct nfs4_openowner *oo, *ret; + struct nfs4_openowner *oo, *new = NULL; - oo = alloc_stateowner(openowner_slab, &open->op_owner, clp); - if (!oo) - return NULL; - oo->oo_owner.so_ops = &openowner_ops; - oo->oo_owner.so_is_open_owner = 1; - oo->oo_owner.so_seqid = open->op_seqid; - oo->oo_flags = 0; - if (nfsd4_has_session(cstate)) - oo->oo_flags |= NFS4_OO_CONFIRMED; - oo->oo_time = 0; - oo->oo_last_closed_stid = NULL; - INIT_LIST_HEAD(&oo->oo_close_lru); +retry: spin_lock(&clp->cl_lock); - ret = find_openstateowner_str_locked(strhashval, open, clp); - if (ret == NULL) { - hash_openowner(oo, clp, strhashval); - ret = oo; - } else - nfs4_free_stateowner(&oo->oo_owner); - + oo = find_openstateowner_str(strhashval, open, clp); + if (!oo && new) { + hash_openowner(new, clp, strhashval); + spin_unlock(&clp->cl_lock); + return new; + } spin_unlock(&clp->cl_lock); - return ret; + + if (oo && !(oo->oo_flags & NFS4_OO_CONFIRMED)) { + /* Replace unconfirmed owners without checking for replay. */ + release_openowner(oo); + oo = NULL; + } + if (oo) { + if (new) + nfs4_free_stateowner(&new->oo_owner); + return oo; + } + + new = alloc_stateowner(openowner_slab, &open->op_owner, clp); + if (!new) + return NULL; + new->oo_owner.so_ops = &openowner_ops; + new->oo_owner.so_is_open_owner = 1; + new->oo_owner.so_seqid = open->op_seqid; + new->oo_flags = 0; + if (nfsd4_has_session(cstate)) + new->oo_flags |= NFS4_OO_CONFIRMED; + new->oo_time = 0; + new->oo_last_closed_stid = NULL; + INIT_LIST_HEAD(&new->oo_close_lru); + goto retry; } static struct nfs4_ol_stateid * @@ -4652,7 +5011,7 @@ retry: open->op_stp = NULL; refcount_inc(&stp->st_stid.sc_count); - stp->st_stid.sc_type = NFS4_OPEN_STID; + stp->st_stid.sc_type = SC_TYPE_OPEN; INIT_LIST_HEAD(&stp->st_locks); stp->st_stateowner = nfs4_get_stateowner(&oo->oo_owner); get_nfs4_file(fp); @@ -4702,7 +5061,12 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) * Wait for the refcount to drop to 2. Since it has been unhashed, * there should be no danger of the refcount going back up again at * this point. + * Some threads with a reference might be waiting for rp_locked, + * so tell them to stop waiting. */ + atomic_set(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED); + smp_mb__after_atomic(); + wake_up_var(&oo->oo_owner.so_replay.rp_locked); wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2); release_all_access(s); @@ -4879,9 +5243,9 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb, trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task); - if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID || - dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) - return 1; + if (dp->dl_stid.sc_status) + /* CLOSED or REVOKED */ + return 1; switch (task->tk_status) { case 0: @@ -4916,10 +5280,12 @@ static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = { .prepare = nfsd4_cb_recall_prepare, .done = nfsd4_cb_recall_done, .release = nfsd4_cb_recall_release, + .opcode = OP_CB_RECALL, }; static void nfsd_break_one_deleg(struct nfs4_delegation *dp) { + bool queued; /* * We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease @@ -4928,14 +5294,17 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) * we know it's safe to take a reference. */ refcount_inc(&dp->dl_stid.sc_count); - WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall)); + queued = nfsd4_run_cb(&dp->dl_recall); + WARN_ON_ONCE(!queued); + if (!queued) + refcount_dec(&dp->dl_stid.sc_count); } /* Called from break_lease() with flc_lock held. */ static bool -nfsd_break_deleg_cb(struct file_lock *fl) +nfsd_break_deleg_cb(struct file_lease *fl) { - struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; + struct nfs4_delegation *dp = (struct nfs4_delegation *) fl->c.flc_owner; struct nfs4_file *fp = dp->dl_stid.sc_file; struct nfs4_client *clp = dp->dl_stid.sc_client; struct nfsd_net *nn; @@ -4969,27 +5338,24 @@ nfsd_break_deleg_cb(struct file_lock *fl) * %true: Lease conflict was resolved * %false: Lease conflict was not resolved. */ -static bool nfsd_breaker_owns_lease(struct file_lock *fl) +static bool nfsd_breaker_owns_lease(struct file_lease *fl) { - struct nfs4_delegation *dl = fl->fl_owner; + struct nfs4_delegation *dl = fl->c.flc_owner; struct svc_rqst *rqst; struct nfs4_client *clp; - if (!i_am_nfsd()) - return false; - rqst = kthread_data(current); - /* Note rq_prog == NFS_ACL_PROGRAM is also possible: */ - if (rqst->rq_prog != NFS_PROGRAM || rqst->rq_vers < 4) + rqst = nfsd_current_rqst(); + if (!nfsd_v4client(rqst)) return false; clp = *(rqst->rq_lease_breaker); return dl->dl_stid.sc_client == clp; } static int -nfsd_change_deleg_cb(struct file_lock *onlist, int arg, +nfsd_change_deleg_cb(struct file_lease *onlist, int arg, struct list_head *dispose) { - struct nfs4_delegation *dp = (struct nfs4_delegation *)onlist->fl_owner; + struct nfs4_delegation *dp = (struct nfs4_delegation *) onlist->c.flc_owner; struct nfs4_client *clp = dp->dl_stid.sc_client; if (arg & F_UNLCK) { @@ -5000,7 +5366,7 @@ nfsd_change_deleg_cb(struct file_lock *onlist, int arg, return -EAGAIN; } -static const struct lock_manager_operations nfsd_lease_mng_ops = { +static const struct lease_manager_operations nfsd_lease_mng_ops = { .lm_breaker_owns_lease = nfsd_breaker_owns_lease, .lm_break = nfsd_break_deleg_cb, .lm_change = nfsd_change_deleg_cb, @@ -5075,27 +5441,19 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, clp = cstate->clp; strhashval = ownerstr_hashval(&open->op_owner); - oo = find_openstateowner_str(strhashval, open, clp); +retry: + oo = find_or_alloc_open_stateowner(strhashval, open, cstate); open->op_openowner = oo; - if (!oo) { - goto new_owner; - } - if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { - /* Replace unconfirmed owners without checking for replay. */ - release_openowner(oo); - open->op_openowner = NULL; - goto new_owner; + if (!oo) + return nfserr_jukebox; + if (nfsd4_cstate_assign_replay(cstate, &oo->oo_owner) == -EAGAIN) { + nfs4_put_stateowner(&oo->oo_owner); + goto retry; } status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid); if (status) return status; - goto alloc_stateid; -new_owner: - oo = alloc_init_open_stateowner(strhashval, open, cstate); - if (oo == NULL) - return nfserr_jukebox; - open->op_openowner = oo; -alloc_stateid: + open->op_stp = nfs4_alloc_open_stateid(clp); if (!open->op_stp) return nfserr_jukebox; @@ -5124,12 +5482,12 @@ static int share_access_to_flags(u32 share_access) return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE; } -static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, stateid_t *s) +static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, + stateid_t *s) { struct nfs4_stid *ret; - ret = find_stateid_by_type(cl, s, - NFS4_DELEG_STID|NFS4_REVOKED_DELEG_STID); + ret = find_stateid_by_type(cl, s, SC_TYPE_DELEG, SC_STATUS_REVOKED); if (!ret) return NULL; return delegstateid(ret); @@ -5152,10 +5510,15 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open, deleg = find_deleg_stateid(cl, &open->op_delegate_stateid); if (deleg == NULL) goto out; - if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) { + if (deleg->dl_stid.sc_status & SC_STATUS_ADMIN_REVOKED) { nfs4_put_stid(&deleg->dl_stid); - if (cl->cl_minorversion) - status = nfserr_deleg_revoked; + status = nfserr_admin_revoked; + goto out; + } + if (deleg->dl_stid.sc_status & SC_STATUS_REVOKED) { + nfs4_put_stid(&deleg->dl_stid); + nfsd40_drop_revoked_stid(cl, &open->op_delegate_stateid); + status = nfserr_deleg_revoked; goto out; } flags = share_access_to_flags(open->op_share_access); @@ -5200,7 +5563,7 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, return 0; if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) return nfserr_inval; - return nfsd_setattr(rqstp, fh, &attrs, 0, (time64_t)0); + return nfsd_setattr(rqstp, fh, &attrs, NULL); } static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, @@ -5340,21 +5703,20 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp) return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; } -static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, +static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int flag) { - struct file_lock *fl; + struct file_lease *fl; - fl = locks_alloc_lock(); + fl = locks_alloc_lease(); if (!fl) return NULL; fl->fl_lmops = &nfsd_lease_mng_ops; - fl->fl_flags = FL_DELEG; - fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; - fl->fl_end = OFFSET_MAX; - fl->fl_owner = (fl_owner_t)dp; - fl->fl_pid = current->tgid; - fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file; + fl->c.flc_flags = FL_DELEG; + fl->c.flc_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; + fl->c.flc_owner = (fl_owner_t)dp; + fl->c.flc_pid = current->tgid; + fl->c.flc_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file; return fl; } @@ -5472,7 +5834,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate; struct nfs4_delegation *dp; struct nfsd_file *nf = NULL; - struct file_lock *fl; + struct file_lease *fl; u32 dl_type; /* @@ -5542,9 +5904,10 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, if (!fl) goto out_clnt_odstate; - status = vfs_setlease(fp->fi_deleg_file->nf_file, fl->fl_type, &fl, NULL); + status = kernel_setlease(fp->fi_deleg_file->nf_file, + fl->c.flc_type, &fl, NULL); if (fl) - locks_free_lock(fl); + locks_free_lease(fl); if (status) goto out_clnt_odstate; @@ -5560,7 +5923,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, /* * Now that the deleg is set, check again to ensure that nothing - * raced in and changed the mode while we weren't lookng. + * raced in and changed the mode while we weren't looking. */ status = nfsd4_verify_setuid_write(open, fp->fi_deleg_file); if (status) @@ -5571,9 +5934,11 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, goto out_unlock; spin_lock(&state_lock); + spin_lock(&clp->cl_lock); spin_lock(&fp->fi_lock); status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); + spin_unlock(&clp->cl_lock); spin_unlock(&state_lock); if (status) @@ -5581,7 +5946,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, return dp; out_unlock: - vfs_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp); + kernel_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp); out_clnt_odstate: put_clnt_odstate(dp->dl_clnt_odstate); nfs4_put_stid(&dp->dl_stid); @@ -5611,6 +5976,28 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) } } +static bool +nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh, + struct kstat *stat) +{ + struct nfsd_file *nf = find_rw_file(dp->dl_stid.sc_file); + struct path path; + int rc; + + if (!nf) + return false; + + path.mnt = currentfh->fh_export->ex_path.mnt; + path.dentry = file_dentry(nf->nf_file); + + rc = vfs_getattr(&path, stat, + (STATX_MODE | STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE), + AT_STATX_SYNC_AS_STAT); + + nfsd_file_put(nf); + return rc == 0; +} + /* * The Linux NFS server does not offer write delegations to NFSv4.0 * clients in order to avoid conflicts between write delegations and @@ -5645,13 +6032,14 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, struct svc_fh *parent = NULL; int cb_up; int status = 0; + struct kstat stat; cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); - open->op_recall = 0; + open->op_recall = false; switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_PREVIOUS: if (!cb_up) - open->op_recall = 1; + open->op_recall = true; break; case NFS4_OPEN_CLAIM_NULL: parent = currentfh; @@ -5680,7 +6068,14 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid)); if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) { + if (!nfs4_delegation_stat(dp, currentfh, &stat)) { + nfs4_put_stid(&dp->dl_stid); + destroy_delegation(dp); + goto out_no_deleg; + } open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE; + dp->dl_cb_fattr.ncf_cur_fsize = stat.size; + dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat); trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid); } else { open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; @@ -5693,7 +6088,7 @@ out_no_deleg: if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) { dprintk("NFSD: WARNING: refusing delegation reclaim\n"); - open->op_recall = 1; + open->op_recall = true; } /* 4.1 client asking for a delegation? */ @@ -5755,6 +6150,20 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; + if (dp && nfsd4_is_deleg_cur(open) && + (dp->dl_stid.sc_file != fp)) { + /* + * RFC8881 section 8.2.4 mandates the server to return + * NFS4ERR_BAD_STATEID if the selected table entry does + * not match the current filehandle. However returning + * NFS4ERR_BAD_STATEID in the OPEN can cause the client + * to repeatedly retry the operation with the same + * stateid, since the stateid itself is valid. To avoid + * this situation NFSD returns NFS4ERR_INVAL instead. + */ + status = nfserr_inval; + goto out; + } stp = nfsd4_find_and_lock_existing_open(fp, open); } else { open->op_file = NULL; @@ -5790,7 +6199,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf } else { status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open, true); if (status) { - stp->st_stid.sc_type = NFS4_CLOSED_STID; release_open_stateid(stp); mutex_unlock(&stp->st_mutex); goto out; @@ -5851,12 +6259,8 @@ out: void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { - if (open->op_openowner) { - struct nfs4_stateowner *so = &open->op_openowner->oo_owner; - - nfsd4_cstate_assign_replay(cstate, so); - nfs4_put_stateowner(so); - } + if (open->op_openowner) + nfs4_put_stateowner(&open->op_openowner->oo_owner); if (open->op_file) kmem_cache_free(file_slab, open->op_file); if (open->op_stp) @@ -5966,7 +6370,6 @@ void nfsd4_ssc_init_umount_work(struct nfsd_net *nn) INIT_LIST_HEAD(&nn->nfsd_ssc_mount_list); init_waitqueue_head(&nn->nfsd_ssc_waitq); } -EXPORT_SYMBOL_GPL(nfsd4_ssc_init_umount_work); /* * This is called when nfsd is being shutdown, after all inter_ssc @@ -6144,6 +6547,43 @@ nfs4_process_client_reaplist(struct list_head *reaplist) } } +static void nfs40_clean_admin_revoked(struct nfsd_net *nn, + struct laundry_time *lt) +{ + struct nfs4_client *clp; + + spin_lock(&nn->client_lock); + if (nn->nfs40_last_revoke == 0 || + nn->nfs40_last_revoke > lt->cutoff) { + spin_unlock(&nn->client_lock); + return; + } + nn->nfs40_last_revoke = 0; + +retry: + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + unsigned long id, tmp; + struct nfs4_stid *stid; + + if (atomic_read(&clp->cl_admin_revoked) == 0) + continue; + + spin_lock(&clp->cl_lock); + idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id) + if (stid->sc_status & SC_STATUS_ADMIN_REVOKED) { + refcount_inc(&stid->sc_count); + spin_unlock(&nn->client_lock); + /* this function drops ->cl_lock */ + nfsd4_drop_revoked_stid(stid); + nfs4_put_stid(stid); + spin_lock(&nn->client_lock); + goto retry; + } + spin_unlock(&clp->cl_lock); + } + spin_unlock(&nn->client_lock); +} + static time64_t nfs4_laundromat(struct nfsd_net *nn) { @@ -6177,12 +6617,15 @@ nfs4_laundromat(struct nfsd_net *nn) nfs4_get_client_reaplist(nn, &reaplist, <); nfs4_process_client_reaplist(&reaplist); + nfs40_clean_admin_revoked(nn, <); + spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); if (!state_expired(<, dp->dl_time)) break; - WARN_ON(!unhash_delegation_locked(dp)); + refcount_inc(&dp->dl_stid.sc_count); + unhash_delegation_locked(dp, SC_STATUS_REVOKED); list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); @@ -6241,6 +6684,8 @@ nfs4_laundromat(struct nfsd_net *nn) /* service the server-to-server copy delayed unmount list */ nfsd4_ssc_expire_umount(nn); #endif + if (atomic_long_read(&num_delegations) >= max_delegations) + deleg_reaper(nn); out: return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); } @@ -6273,20 +6718,24 @@ deleg_reaper(struct nfsd_net *nn) { struct list_head *pos, *next; struct nfs4_client *clp; - struct list_head cblist; + LIST_HEAD(cblist); - INIT_LIST_HEAD(&cblist); spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); - if (clp->cl_state != NFSD4_ACTIVE || - list_empty(&clp->cl_delegations) || - atomic_read(&clp->cl_delegs_in_recall) || - test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) || - (ktime_get_boottime_seconds() - - clp->cl_ra_time < 5)) { + + if (clp->cl_state != NFSD4_ACTIVE) + continue; + if (list_empty(&clp->cl_delegations)) + continue; + if (atomic_read(&clp->cl_delegs_in_recall)) + continue; + if (test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags)) + continue; + if (ktime_get_boottime_seconds() - clp->cl_ra_time < 5) + continue; + if (clp->cl_cb_state != NFSD4_CB_UP) continue; - } list_add(&clp->cl_ra_cblist, &cblist); /* release in nfsd4_cb_recall_any_release */ @@ -6301,7 +6750,8 @@ deleg_reaper(struct nfsd_net *nn) cl_ra_cblist); list_del_init(&clp->cl_ra_cblist); clp->cl_ra->ra_keep = 0; - clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG); + clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) | + BIT(RCA4_TYPE_MASK_WDATA_DLG); trace_nfsd_cb_recall_any(clp->cl_ra); nfsd4_run_cb(&clp->cl_ra->ra_cb); } @@ -6395,6 +6845,9 @@ static __be32 nfsd4_stid_check_stateid_generation(stateid_t *in, struct nfs4_sti if (ret == nfs_ok) ret = check_stateid_generation(in, &s->sc_stateid, has_session); spin_unlock(&s->sc_lock); + if (ret == nfserr_admin_revoked) + nfsd40_drop_revoked_stid(s->sc_client, + &s->sc_stateid); return ret; } @@ -6421,32 +6874,33 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) status = nfsd4_stid_check_stateid_generation(stateid, s, 1); if (status) goto out_unlock; + status = nfsd4_verify_open_stid(s); + if (status) + goto out_unlock; + switch (s->sc_type) { - case NFS4_DELEG_STID: + case SC_TYPE_DELEG: status = nfs_ok; break; - case NFS4_REVOKED_DELEG_STID: - status = nfserr_deleg_revoked; - break; - case NFS4_OPEN_STID: - case NFS4_LOCK_STID: + case SC_TYPE_OPEN: + case SC_TYPE_LOCK: status = nfsd4_check_openowner_confirmed(openlockstateid(s)); break; default: printk("unknown stateid type %x\n", s->sc_type); - fallthrough; - case NFS4_CLOSED_STID: - case NFS4_CLOSED_DELEG_STID: status = nfserr_bad_stateid; } out_unlock: spin_unlock(&cl->cl_lock); + if (status == nfserr_admin_revoked) + nfsd40_drop_revoked_stid(cl, stateid); return status; } __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, - stateid_t *stateid, unsigned char typemask, + stateid_t *stateid, + unsigned short typemask, unsigned short statusmask, struct nfs4_stid **s, struct nfsd_net *nn) { __be32 status; @@ -6457,10 +6911,15 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, * only return revoked delegations if explicitly asked. * otherwise we report revoked or bad_stateid status. */ - if (typemask & NFS4_REVOKED_DELEG_STID) + if (statusmask & SC_STATUS_REVOKED) return_revoked = true; - else if (typemask & NFS4_DELEG_STID) - typemask |= NFS4_REVOKED_DELEG_STID; + if (typemask & SC_TYPE_DELEG) + /* Always allow REVOKED for DELEG so we can + * retturn the appropriate error. + */ + statusmask |= SC_STATUS_REVOKED; + + statusmask |= SC_STATUS_ADMIN_REVOKED | SC_STATUS_FREEABLE; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || CLOSE_STATEID(stateid)) @@ -6473,14 +6932,17 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, } if (status) return status; - stid = find_stateid_by_type(cstate->clp, stateid, typemask); + stid = find_stateid_by_type(cstate->clp, stateid, typemask, statusmask); if (!stid) return nfserr_bad_stateid; - if ((stid->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) { + if ((stid->sc_status & SC_STATUS_REVOKED) && !return_revoked) { nfs4_put_stid(stid); - if (cstate->minorversion) - return nfserr_deleg_revoked; - return nfserr_bad_stateid; + return nfserr_deleg_revoked; + } + if (stid->sc_status & SC_STATUS_ADMIN_REVOKED) { + nfsd40_drop_revoked_stid(cstate->clp, stateid); + nfs4_put_stid(stid); + return nfserr_admin_revoked; } *s = stid; return nfs_ok; @@ -6491,17 +6953,17 @@ nfs4_find_file(struct nfs4_stid *s, int flags) { struct nfsd_file *ret = NULL; - if (!s) + if (!s || s->sc_status) return NULL; switch (s->sc_type) { - case NFS4_DELEG_STID: + case SC_TYPE_DELEG: spin_lock(&s->sc_file->fi_lock); ret = nfsd_file_get(s->sc_file->fi_deleg_file); spin_unlock(&s->sc_file->fi_lock); break; - case NFS4_OPEN_STID: - case NFS4_LOCK_STID: + case SC_TYPE_OPEN: + case SC_TYPE_LOCK: if (flags & RD_STATE) ret = find_readable_file(s->sc_file); else @@ -6532,7 +6994,8 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s, nf = nfs4_find_file(s, flags); if (nf) { - status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, + status = nfsd_permission(&rqstp->rq_cred, + fhp->fh_export, fhp->fh_dentry, acc | NFSD_MAY_OWNER_OVERRIDE); if (status) { nfsd_file_put(nf); @@ -6590,7 +7053,7 @@ unlock: spin_unlock(&nn->s2s_cp_lock); if (!state) return nfserr_bad_stateid; - if (!clp && state) + if (!clp) *cps = state; return 0; } @@ -6614,7 +7077,8 @@ static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st, goto out; *stid = find_stateid_by_type(found, &cps->cp_p_stateid, - NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID); + SC_TYPE_DELEG|SC_TYPE_OPEN|SC_TYPE_LOCK, + 0); if (*stid) status = nfs_ok; else @@ -6662,17 +7126,13 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, *nfp = NULL; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) { - if (cstid) - status = nfserr_bad_stateid; - else - status = check_special_stateids(net, fhp, stateid, - flags); + status = check_special_stateids(net, fhp, stateid, flags); goto done; } status = nfsd4_lookup_stateid(cstate, stateid, - NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, - &s, nn); + SC_TYPE_DELEG|SC_TYPE_OPEN|SC_TYPE_LOCK, + 0, &s, nn); if (status == nfserr_bad_stateid) status = find_cpntf_state(nn, stateid, &s); if (status) @@ -6683,16 +7143,13 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, goto out; switch (s->sc_type) { - case NFS4_DELEG_STID: + case SC_TYPE_DELEG: status = nfs4_check_delegmode(delegstateid(s), flags); break; - case NFS4_OPEN_STID: - case NFS4_LOCK_STID: + case SC_TYPE_OPEN: + case SC_TYPE_LOCK: status = nfs4_check_olstateid(openlockstateid(s), flags); break; - default: - status = nfserr_bad_stateid; - break; } if (status) goto out; @@ -6771,34 +7228,42 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, spin_lock(&cl->cl_lock); s = find_stateid_locked(cl, stateid); - if (!s) + if (!s || s->sc_status & SC_STATUS_CLOSED) goto out_unlock; + if (s->sc_status & SC_STATUS_ADMIN_REVOKED) { + nfsd4_drop_revoked_stid(s); + ret = nfs_ok; + goto out; + } spin_lock(&s->sc_lock); switch (s->sc_type) { - case NFS4_DELEG_STID: + case SC_TYPE_DELEG: + if (s->sc_status & SC_STATUS_REVOKED) { + s->sc_status |= SC_STATUS_CLOSED; + spin_unlock(&s->sc_lock); + dp = delegstateid(s); + if (s->sc_status & SC_STATUS_FREEABLE) + list_del_init(&dp->dl_recall_lru); + s->sc_status |= SC_STATUS_FREED; + spin_unlock(&cl->cl_lock); + nfs4_put_stid(s); + ret = nfs_ok; + goto out; + } ret = nfserr_locks_held; break; - case NFS4_OPEN_STID: + case SC_TYPE_OPEN: ret = check_stateid_generation(stateid, &s->sc_stateid, 1); if (ret) break; ret = nfserr_locks_held; break; - case NFS4_LOCK_STID: + case SC_TYPE_LOCK: spin_unlock(&s->sc_lock); refcount_inc(&s->sc_count); spin_unlock(&cl->cl_lock); ret = nfsd4_free_lock_stateid(stateid, s); goto out; - case NFS4_REVOKED_DELEG_STID: - spin_unlock(&s->sc_lock); - dp = delegstateid(s); - list_del_init(&dp->dl_recall_lru); - spin_unlock(&cl->cl_lock); - nfs4_put_stid(s); - ret = nfs_ok; - goto out; - /* Default falls through and returns nfserr_bad_stateid */ } spin_unlock(&s->sc_lock); out_unlock: @@ -6840,6 +7305,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ * @seqid: seqid (provided by client) * @stateid: stateid (provided by client) * @typemask: mask of allowable types for this operation + * @statusmask: mask of allowed states: 0 or STID_CLOSED * @stpp: return pointer for the stateid found * @nn: net namespace for request * @@ -6849,7 +7315,8 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ */ static __be32 nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, - stateid_t *stateid, char typemask, + stateid_t *stateid, + unsigned short typemask, unsigned short statusmask, struct nfs4_ol_stateid **stpp, struct nfsd_net *nn) { @@ -6860,11 +7327,16 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, trace_nfsd_preprocess(seqid, stateid); *stpp = NULL; - status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn); +retry: + status = nfsd4_lookup_stateid(cstate, stateid, + typemask, statusmask, &s, nn); if (status) return status; stp = openlockstateid(s); - nfsd4_cstate_assign_replay(cstate, stp->st_stateowner); + if (nfsd4_cstate_assign_replay(cstate, stp->st_stateowner) == -EAGAIN) { + nfs4_put_stateowner(stp->st_stateowner); + goto retry; + } status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp); if (!status) @@ -6882,7 +7354,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs struct nfs4_ol_stateid *stp; status = nfs4_preprocess_seqid_op(cstate, seqid, stateid, - NFS4_OPEN_STID, &stp, nn); + SC_TYPE_OPEN, 0, &stp, nn); if (status) return status; oo = openowner(stp->st_stateowner); @@ -6913,8 +7385,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status; status = nfs4_preprocess_seqid_op(cstate, - oc->oc_seqid, &oc->oc_req_stateid, - NFS4_OPEN_STID, &stp, nn); + oc->oc_seqid, &oc->oc_req_stateid, + SC_TYPE_OPEN, 0, &stp, nn); if (status) goto out; oo = openowner(stp->st_stateowner); @@ -7006,7 +7478,7 @@ out: return status; } -static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) +static bool nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) { struct nfs4_client *clp = s->st_stid.sc_client; bool unhashed; @@ -7023,11 +7495,11 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) list_for_each_entry(stp, &reaplist, st_locks) nfs4_free_cpntf_statelist(clp->net, &stp->st_stid); free_ol_stateid_reaplist(&reaplist); + return false; } else { spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); - if (unhashed) - move_to_close_lru(s, clp->net); + return unhashed; } } @@ -7043,19 +7515,22 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *stp; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); + bool need_move_to_close_list; - dprintk("NFSD: nfsd4_close on file %pd\n", + dprintk("NFSD: nfsd4_close on file %pd\n", cstate->current_fh.fh_dentry); status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, - &close->cl_stateid, - NFS4_OPEN_STID|NFS4_CLOSED_STID, - &stp, nn); + &close->cl_stateid, + SC_TYPE_OPEN, SC_STATUS_CLOSED, + &stp, nn); nfsd4_bump_seqid(cstate, status); if (status) - goto out; + goto out; - stp->st_stid.sc_type = NFS4_CLOSED_STID; + spin_lock(&stp->st_stid.sc_client->cl_lock); + stp->st_stid.sc_status |= SC_STATUS_CLOSED; + spin_unlock(&stp->st_stid.sc_client->cl_lock); /* * Technically we don't _really_ have to increment or copy it, since @@ -7065,8 +7540,10 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, */ nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); - nfsd4_close_open_stateid(stp); + need_move_to_close_list = nfsd4_close_open_stateid(stp); mutex_unlock(&stp->st_mutex); + if (need_move_to_close_list) + move_to_close_lru(stp, net); /* v4.1+ suggests that we send a special stateid in here, since the * clients should just ignore this anyway. Since this is not useful @@ -7097,7 +7574,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) return status; - status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, &s, nn); + status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, SC_STATUS_REVOKED, &s, nn); if (status) goto out; dp = delegstateid(s); @@ -7106,8 +7583,9 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto put_stateid; trace_nfsd_deleg_return(stateid); - wake_up_var(d_inode(cstate->current_fh.fh_dentry)); destroy_delegation(dp); + smp_mb__after_atomic(); + wake_up_var(d_inode(cstate->current_fh.fh_dentry)); put_stateid: nfs4_put_stid(&dp->dl_stid); out: @@ -7164,7 +7642,7 @@ nfsd4_lm_put_owner(fl_owner_t owner) static bool nfsd4_lm_lock_expirable(struct file_lock *cfl) { - struct nfs4_lockowner *lo = (struct nfs4_lockowner *)cfl->fl_owner; + struct nfs4_lockowner *lo = (struct nfs4_lockowner *) cfl->c.flc_owner; struct nfs4_client *clp = lo->lo_owner.so_client; struct nfsd_net *nn; @@ -7186,7 +7664,7 @@ nfsd4_lm_expire_lock(void) static void nfsd4_lm_notify(struct file_lock *fl) { - struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner; + struct nfs4_lockowner *lo = (struct nfs4_lockowner *) fl->c.flc_owner; struct net *net = lo->lo_owner.so_client->net; struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct nfsd4_blocked_lock *nbl = container_of(fl, @@ -7223,7 +7701,7 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) struct nfs4_lockowner *lo; if (fl->fl_lmops == &nfsd_posix_mng_ops) { - lo = (struct nfs4_lockowner *) fl->fl_owner; + lo = (struct nfs4_lockowner *) fl->c.flc_owner; xdr_netobj_dup(&deny->ld_owner, &lo->lo_owner.so_owner, GFP_KERNEL); if (!deny->ld_owner.data) @@ -7242,7 +7720,7 @@ nevermind: if (fl->fl_end != NFS4_MAX_UINT64) deny->ld_length = fl->fl_end - fl->fl_start + 1; deny->ld_type = NFS4_READ_LT; - if (fl->fl_type != F_RDLCK) + if (fl->c.flc_type != F_RDLCK) deny->ld_type = NFS4_WRITE_LT; } @@ -7364,7 +7842,7 @@ retry: if (retstp) goto out_found; refcount_inc(&stp->st_stid.sc_count); - stp->st_stid.sc_type = NFS4_LOCK_STID; + stp->st_stid.sc_type = SC_TYPE_LOCK; stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); get_nfs4_file(fp); stp->st_stid.sc_file = fp; @@ -7503,12 +7981,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_blocked_lock *nbl = NULL; struct file_lock *file_lock = NULL; struct file_lock *conflock = NULL; + struct super_block *sb; __be32 status = 0; int lkflg; int err; bool new = false; - unsigned char fl_type; - unsigned int fl_flags = FL_POSIX; + unsigned char type; + unsigned int flags = FL_POSIX; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -7524,6 +8003,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("NFSD: nfsd4_lock: permission denied!\n"); return status; } + sb = cstate->current_fh.fh_dentry->d_sb; if (lock->lk_is_new) { if (nfsd4_has_session(cstate)) @@ -7549,9 +8029,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &lock_stp, &new); } else { status = nfs4_preprocess_seqid_op(cstate, - lock->lk_old_lock_seqid, - &lock->lk_old_lock_stateid, - NFS4_LOCK_STID, &lock_stp, nn); + lock->lk_old_lock_seqid, + &lock->lk_old_lock_stateid, + SC_TYPE_LOCK, 0, &lock_stp, + nn); } if (status) goto out; @@ -7570,13 +8051,14 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; if (lock->lk_reclaim) - fl_flags |= FL_RECLAIM; + flags |= FL_RECLAIM; fp = lock_stp->st_stid.sc_file; switch (lock->lk_type) { case NFS4_READW_LT: - if (nfsd4_has_session(cstate)) - fl_flags |= FL_SLEEP; + if (nfsd4_has_session(cstate) || + exportfs_lock_op_is_async(sb->s_export_op)) + flags |= FL_SLEEP; fallthrough; case NFS4_READ_LT: spin_lock(&fp->fi_lock); @@ -7584,11 +8066,12 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (nf) get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); spin_unlock(&fp->fi_lock); - fl_type = F_RDLCK; + type = F_RDLCK; break; case NFS4_WRITEW_LT: - if (nfsd4_has_session(cstate)) - fl_flags |= FL_SLEEP; + if (nfsd4_has_session(cstate) || + exportfs_lock_op_is_async(sb->s_export_op)) + flags |= FL_SLEEP; fallthrough; case NFS4_WRITE_LT: spin_lock(&fp->fi_lock); @@ -7596,7 +8079,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (nf) get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); spin_unlock(&fp->fi_lock); - fl_type = F_WRLCK; + type = F_WRLCK; break; default: status = nfserr_inval; @@ -7615,8 +8098,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * for file locks), so don't attempt blocking lock notifications * on those filesystems: */ - if (nf->nf_file->f_op->lock) - fl_flags &= ~FL_SLEEP; + if (!exportfs_lock_op_is_async(sb->s_export_op)) + flags &= ~FL_SLEEP; nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn); if (!nbl) { @@ -7626,11 +8109,11 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } file_lock = &nbl->nbl_lock; - file_lock->fl_type = fl_type; - file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner)); - file_lock->fl_pid = current->tgid; - file_lock->fl_file = nf->nf_file; - file_lock->fl_flags = fl_flags; + file_lock->c.flc_type = type; + file_lock->c.flc_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner)); + file_lock->c.flc_pid = current->tgid; + file_lock->c.flc_file = nf->nf_file; + file_lock->c.flc_flags = flags; file_lock->fl_lmops = &nfsd_posix_mng_ops; file_lock->fl_start = lock->lk_offset; file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length); @@ -7643,7 +8126,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } - if (fl_flags & FL_SLEEP) { + if (flags & FL_SLEEP) { nbl->nbl_time = ktime_get_boottime_seconds(); spin_lock(&nn->blocked_locks_lock); list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); @@ -7680,7 +8163,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, out: if (nbl) { /* dequeue it if we queued it before */ - if (fl_flags & FL_SLEEP) { + if (flags & FL_SLEEP) { spin_lock(&nn->blocked_locks_lock); if (!list_empty(&nbl->nbl_list) && !list_empty(&nbl->nbl_lru)) { @@ -7721,6 +8204,14 @@ out: return status; } +void nfsd4_lock_release(union nfsd4_op_u *u) +{ + struct nfsd4_lock *lock = &u->lock; + struct nfsd4_lock_denied *deny = &lock->lk_denied; + + kfree(deny->ld_owner.data); +} + /* * The NFSv4 spec allows a client to do a LOCKT without holding an OPEN, * so we do a temporary open here just to get an open file to pass to @@ -7740,9 +8231,9 @@ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct err = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); if (err) goto out; - lock->fl_file = nf->nf_file; + lock->c.flc_file = nf->nf_file; err = nfserrno(vfs_test_lock(nf->nf_file, lock)); - lock->fl_file = NULL; + lock->c.flc_file = NULL; out: inode_unlock(inode); nfsd_file_put(nf); @@ -7787,11 +8278,11 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, switch (lockt->lt_type) { case NFS4_READ_LT: case NFS4_READW_LT: - file_lock->fl_type = F_RDLCK; + file_lock->c.flc_type = F_RDLCK; break; case NFS4_WRITE_LT: case NFS4_WRITEW_LT: - file_lock->fl_type = F_WRLCK; + file_lock->c.flc_type = F_WRLCK; break; default: dprintk("NFSD: nfs4_lockt: bad lock type!\n"); @@ -7801,9 +8292,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, lo = find_lockowner_str(cstate->clp, &lockt->lt_owner); if (lo) - file_lock->fl_owner = (fl_owner_t)lo; - file_lock->fl_pid = current->tgid; - file_lock->fl_flags = FL_POSIX; + file_lock->c.flc_owner = (fl_owner_t)lo; + file_lock->c.flc_pid = current->tgid; + file_lock->c.flc_flags = FL_POSIX; file_lock->fl_start = lockt->lt_offset; file_lock->fl_end = last_byte_offset(lockt->lt_offset, lockt->lt_length); @@ -7814,7 +8305,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - if (file_lock->fl_type != F_UNLCK) { + if (file_lock->c.flc_type != F_UNLCK) { status = nfserr_denied; nfs4_set_lock_denied(file_lock, &lockt->lt_denied); } @@ -7826,6 +8317,14 @@ out: return status; } +void nfsd4_lockt_release(union nfsd4_op_u *u) +{ + struct nfsd4_lockt *lockt = &u->lockt; + struct nfsd4_lock_denied *deny = &lockt->lt_denied; + + kfree(deny->ld_owner.data); +} + __be32 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -7846,8 +8345,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_inval; status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid, - &locku->lu_stateid, NFS4_LOCK_STID, - &stp, nn); + &locku->lu_stateid, SC_TYPE_LOCK, 0, + &stp, nn); if (status) goto out; nf = find_any_file(stp->st_stid.sc_file); @@ -7862,11 +8361,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto put_file; } - file_lock->fl_type = F_UNLCK; - file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner)); - file_lock->fl_pid = current->tgid; - file_lock->fl_file = nf->nf_file; - file_lock->fl_flags = FL_POSIX; + file_lock->c.flc_type = F_UNLCK; + file_lock->c.flc_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner)); + file_lock->c.flc_pid = current->tgid; + file_lock->c.flc_file = nf->nf_file; + file_lock->c.flc_flags = FL_POSIX; file_lock->fl_lmops = &nfsd_posix_mng_ops; file_lock->fl_start = locku->lu_offset; @@ -7923,8 +8422,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) if (flctx && !list_empty_careful(&flctx->flc_posix)) { spin_lock(&flctx->flc_lock); - list_for_each_entry(fl, &flctx->flc_posix, fl_list) { - if (fl->fl_owner == (fl_owner_t)lowner) { + for_each_file_lock(fl, &flctx->flc_posix) { + if (fl->c.flc_owner == (fl_owner_t)lowner) { status = true; break; } @@ -7942,7 +8441,7 @@ out: * @cstate: NFSv4 COMPOUND state * @u: RELEASE_LOCKOWNER arguments * - * Check if theree are any locks still held and if not - free the lockowner + * Check if there are any locks still held and if not, free the lockowner * and any lock state that is owned. * * Return values: @@ -7992,7 +8491,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, stp = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); - WARN_ON(!unhash_lock_stateid(stp)); + unhash_lock_stateid(stp); put_ol_stateid_locked(stp, &reaplist); } spin_unlock(&clp->cl_lock); @@ -8170,12 +8669,16 @@ static int nfs4_state_create_net(struct net *net) INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); - nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; - nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; - nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; - - if (register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client")) + nn->nfsd_client_shrinker = shrinker_alloc(0, "nfsd-client"); + if (!nn->nfsd_client_shrinker) goto err_shrinker; + + nn->nfsd_client_shrinker->scan_objects = nfsd4_state_shrinker_scan; + nn->nfsd_client_shrinker->count_objects = nfsd4_state_shrinker_count; + nn->nfsd_client_shrinker->private_data = nn; + + shrinker_register(nn->nfsd_client_shrinker); + return 0; err_shrinker: @@ -8256,12 +8759,6 @@ nfs4_state_start(void) if (ret) return ret; - ret = nfsd4_create_callback_queue(); - if (ret) { - rhltable_destroy(&nfs4_file_rhltable); - return ret; - } - set_max_delegations(); return 0; } @@ -8273,7 +8770,7 @@ nfs4_state_shutdown_net(struct net *net) struct list_head *pos, *next, reaplist; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - unregister_shrinker(&nn->nfsd_client_shrinker); + shrinker_free(nn->nfsd_client_shrinker); cancel_work_sync(&nn->nfsd_shrinker_work); cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); @@ -8282,7 +8779,7 @@ nfs4_state_shutdown_net(struct net *net) spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - WARN_ON(!unhash_delegation_locked(dp)); + unhash_delegation_locked(dp, SC_STATUS_CLOSED); list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); @@ -8302,7 +8799,6 @@ nfs4_state_shutdown_net(struct net *net) void nfs4_state_shutdown(void) { - nfsd4_destroy_callback_queue(); rhltable_destroy(&nfs4_file_rhltable); } @@ -8423,7 +8919,9 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, /** * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict * @rqstp: RPC transaction context - * @inode: file to be checked for a conflict + * @dentry: dentry of inode to be checked for a conflict + * @modified: return true if file was modified + * @size: new size of file if modified is true * * This function is called when there is a conflict between a write * delegation and a change/size GETATTR from another client. The server @@ -8432,56 +8930,96 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, * delegation before replying to the GETATTR. See RFC 8881 section * 18.7.4. * - * The current implementation does not support CB_GETATTR yet. However - * this can avoid recalling the delegation could be added in follow up - * work. - * * Returns 0 if there is no conflict; otherwise an nfs_stat * code is returned. */ __be32 -nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode) +nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, + bool *modified, u64 *size) { __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct file_lock_context *ctx; - struct file_lock *fl; - struct nfs4_delegation *dp; + struct nfs4_delegation *dp = NULL; + struct file_lease *fl; + struct iattr attrs; + struct nfs4_cb_fattr *ncf; + struct inode *inode = d_inode(dentry); + *modified = false; ctx = locks_inode_context(inode); if (!ctx) return 0; + +#define NON_NFSD_LEASE ((void *)1) + spin_lock(&ctx->flc_lock); - list_for_each_entry(fl, &ctx->flc_lease, fl_list) { - if (fl->fl_flags == FL_LAYOUT) + for_each_file_lock(fl, &ctx->flc_lease) { + if (fl->c.flc_flags == FL_LAYOUT) continue; - if (fl->fl_lmops != &nfsd_lease_mng_ops) { - /* - * non-nfs lease, if it's a lease with F_RDLCK then - * we are done; there isn't any write delegation - * on this inode - */ - if (fl->fl_type == F_RDLCK) - break; - goto break_lease; + if (fl->c.flc_type == F_WRLCK) { + if (fl->fl_lmops == &nfsd_lease_mng_ops) + dp = fl->c.flc_owner; + else + dp = NON_NFSD_LEASE; } - if (fl->fl_type == F_WRLCK) { - dp = fl->fl_owner; - if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) { - spin_unlock(&ctx->flc_lock); - return 0; - } -break_lease: - spin_unlock(&ctx->flc_lock); - nfsd_stats_wdeleg_getattr_inc(nn); - status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); + break; + } + if (dp == NULL || dp == NON_NFSD_LEASE || + dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) { + spin_unlock(&ctx->flc_lock); + if (dp == NON_NFSD_LEASE) { + status = nfserrno(nfsd_open_break_lease(inode, + NFSD_MAY_READ)); if (status != nfserr_jukebox || - !nfsd_wait_for_delegreturn(rqstp, inode)) + !nfsd_wait_for_delegreturn(rqstp, inode)) return status; - return 0; } - break; + return 0; } + + nfsd_stats_wdeleg_getattr_inc(nn); + refcount_inc(&dp->dl_stid.sc_count); + ncf = &dp->dl_cb_fattr; + nfs4_cb_getattr(&dp->dl_cb_fattr); spin_unlock(&ctx->flc_lock); - return 0; + + wait_on_bit_timeout(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, + TASK_INTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT); + if (ncf->ncf_cb_status) { + /* Recall delegation only if client didn't respond */ + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); + if (status != nfserr_jukebox || + !nfsd_wait_for_delegreturn(rqstp, inode)) + goto out_status; + } + if (!ncf->ncf_file_modified && + (ncf->ncf_initial_cinfo != ncf->ncf_cb_change || + ncf->ncf_cur_fsize != ncf->ncf_cb_fsize)) + ncf->ncf_file_modified = true; + if (ncf->ncf_file_modified) { + int err; + + /* + * Per section 10.4.3 of RFC 8881, the server would + * not update the file's metadata with the client's + * modified size + */ + attrs.ia_mtime = attrs.ia_ctime = current_time(inode); + attrs.ia_valid = ATTR_MTIME | ATTR_CTIME | ATTR_DELEG; + inode_lock(inode); + err = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL); + inode_unlock(inode); + if (err) { + status = nfserrno(err); + goto out_status; + } + ncf->ncf_cur_fsize = ncf->ncf_cb_fsize; + *size = ncf->ncf_cur_fsize; + *modified = true; + } + status = 0; +out_status: + nfs4_put_stid(&dp->dl_stid); + return status; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 76dfbb99277f..6edeb3bdf81b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -118,11 +118,11 @@ static int zero_clientid(clientid_t *clid) * operation described in @argp finishes. */ static void * -svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len) +svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, size_t len) { struct svcxdr_tmpbuf *tb; - tb = kmalloc(sizeof(*tb) + len, GFP_KERNEL); + tb = kmalloc(struct_size(tb, buf, len), GFP_KERNEL); if (!tb) return NULL; tb->next = argp->to_free; @@ -138,9 +138,9 @@ svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len) * buffer might end on a page boundary. */ static char * -svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len) +svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, size_t len) { - char *p = svcxdr_tmpalloc(argp, len + 1); + char *p = svcxdr_tmpalloc(argp, size_add(len, 1)); if (!p) return NULL; @@ -150,7 +150,7 @@ svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len) } static void * -svcxdr_savemem(struct nfsd4_compoundargs *argp, __be32 *p, u32 len) +svcxdr_savemem(struct nfsd4_compoundargs *argp, __be32 *p, size_t len) { __be32 *tmp; @@ -1724,6 +1724,35 @@ nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid); } +static __be32 +nfsd4_decode_get_dir_delegation(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) +{ + struct nfsd4_get_dir_delegation *gdd = &u->get_dir_delegation; + __be32 status; + + memset(gdd, 0, sizeof(*gdd)); + + if (xdr_stream_decode_bool(argp->xdr, &gdd->gdda_signal_deleg_avail) < 0) + return nfserr_bad_xdr; + status = nfsd4_decode_bitmap4(argp, gdd->gdda_notification_types, + ARRAY_SIZE(gdd->gdda_notification_types)); + if (status) + return status; + status = nfsd4_decode_nfstime4(argp, &gdd->gdda_child_attr_delay); + if (status) + return status; + status = nfsd4_decode_nfstime4(argp, &gdd->gdda_dir_attr_delay); + if (status) + return status; + status = nfsd4_decode_bitmap4(argp, gdd->gdda_child_attributes, + ARRAY_SIZE(gdd->gdda_child_attributes)); + if (status) + return status; + return nfsd4_decode_bitmap4(argp, gdd->gdda_dir_attributes, + ARRAY_SIZE(gdd->gdda_dir_attributes)); +} + #ifdef CONFIG_NFSD_PNFS static __be32 nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, @@ -2109,7 +2138,7 @@ nfsd4_decode_clone(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) */ static __be32 nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct xdr_buf *xdr, - char **bufp, u32 buflen) + char **bufp, size_t buflen) { struct page **pages = xdr->pages; struct kvec *head = xdr->head; @@ -2362,7 +2391,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = { [OP_CREATE_SESSION] = nfsd4_decode_create_session, [OP_DESTROY_SESSION] = nfsd4_decode_destroy_session, [OP_FREE_STATEID] = nfsd4_decode_free_stateid, - [OP_GET_DIR_DELEGATION] = nfsd4_decode_notsupp, + [OP_GET_DIR_DELEGATION] = nfsd4_decode_get_dir_delegation, #ifdef CONFIG_NFSD_PNFS [OP_GETDEVICEINFO] = nfsd4_decode_getdeviceinfo, [OP_GETDEVICELIST] = nfsd4_decode_notsupp, @@ -2516,72 +2545,69 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) svc_reserve(argp->rqstp, max_reply + readbytes); argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; + argp->splice_ok = nfsd_read_splice_ok(argp->rqstp); if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) - clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); + argp->splice_ok = false; return true; } -static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, - struct svc_export *exp) +static __be32 nfsd4_encode_nfs_fh4(struct xdr_stream *xdr, + struct knfsd_fh *fh_handle) { - if (exp->ex_flags & NFSEXP_V4ROOT) { - *p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time)); - *p++ = 0; - } else - p = xdr_encode_hyper(p, nfsd4_change_attribute(stat, inode)); - return p; + return nfsd4_encode_opaque(xdr, fh_handle->fh_raw, fh_handle->fh_size); } +/* This is a frequently-encoded type; open-coded for speed */ static __be32 nfsd4_encode_nfstime4(struct xdr_stream *xdr, - struct timespec64 *tv) + const struct timespec64 *tv) { __be32 *p; p = xdr_reserve_space(xdr, XDR_UNIT * 3); if (!p) return nfserr_resource; - - p = xdr_encode_hyper(p, (s64)tv->tv_sec); + p = xdr_encode_hyper(p, tv->tv_sec); *p = cpu_to_be32(tv->tv_nsec); return nfs_ok; } -/* - * ctime (in NFSv4, time_metadata) is not writeable, and the client - * doesn't really care what resolution could theoretically be stored by - * the filesystem. - * - * The client cares how close together changes can be while still - * guaranteeing ctime changes. For most filesystems (which have - * timestamps with nanosecond fields) that is limited by the resolution - * of the time returned from current_time() (which I'm assuming to be - * 1/HZ). - */ -static __be32 *encode_time_delta(__be32 *p, struct inode *inode) +static __be32 nfsd4_encode_specdata4(struct xdr_stream *xdr, + unsigned int major, unsigned int minor) { - struct timespec64 ts; - u32 ns; + __be32 status; - ns = max_t(u32, NSEC_PER_SEC/HZ, inode->i_sb->s_time_gran); - ts = ns_to_timespec64(ns); + status = nfsd4_encode_uint32_t(xdr, major); + if (status != nfs_ok) + return status; + return nfsd4_encode_uint32_t(xdr, minor); +} - p = xdr_encode_hyper(p, ts.tv_sec); - *p++ = cpu_to_be32(ts.tv_nsec); +static __be32 +nfsd4_encode_change_info4(struct xdr_stream *xdr, const struct nfsd4_change_info *c) +{ + __be32 status; - return p; + status = nfsd4_encode_bool(xdr, c->atomic); + if (status != nfs_ok) + return status; + status = nfsd4_encode_changeid4(xdr, c->before_change); + if (status != nfs_ok) + return status; + return nfsd4_encode_changeid4(xdr, c->after_change); } -static __be32 -nfsd4_encode_change_info4(struct xdr_stream *xdr, struct nfsd4_change_info *c) +static __be32 nfsd4_encode_netaddr4(struct xdr_stream *xdr, + const struct nfs42_netaddr *addr) { - if (xdr_stream_encode_bool(xdr, c->atomic) < 0) - return nfserr_resource; - if (xdr_stream_encode_u64(xdr, c->before_change) < 0) - return nfserr_resource; - if (xdr_stream_encode_u64(xdr, c->after_change) < 0) - return nfserr_resource; - return nfs_ok; + __be32 status; + + /* na_r_netid */ + status = nfsd4_encode_opaque(xdr, addr->netid, addr->netid_len); + if (status != nfs_ok) + return status; + /* na_r_addr */ + return nfsd4_encode_opaque(xdr, addr->addr, addr->addr_len); } /* Encode as an array of strings the string given with components @@ -2653,9 +2679,6 @@ static __be32 nfsd4_encode_components(struct xdr_stream *xdr, char sep, return nfsd4_encode_components_esc(xdr, sep, components, 0, 0); } -/* - * encode a location element of a fs_locations structure - */ static __be32 nfsd4_encode_fs_location4(struct xdr_stream *xdr, struct nfsd4_fs_location *location) { @@ -2668,15 +2691,12 @@ static __be32 nfsd4_encode_fs_location4(struct xdr_stream *xdr, status = nfsd4_encode_components(xdr, '/', location->path); if (status) return status; - return 0; + return nfs_ok; } -/* - * Encode a path in RFC3530 'pathname4' format - */ -static __be32 nfsd4_encode_path(struct xdr_stream *xdr, - const struct path *root, - const struct path *path) +static __be32 nfsd4_encode_pathname4(struct xdr_stream *xdr, + const struct path *root, + const struct path *path) { struct path cur = *path; __be32 *p; @@ -2744,89 +2764,59 @@ out_free: return err; } -static __be32 nfsd4_encode_fsloc_fsroot(struct xdr_stream *xdr, - struct svc_rqst *rqstp, const struct path *path) +static __be32 nfsd4_encode_fs_locations4(struct xdr_stream *xdr, + struct svc_rqst *rqstp, + struct svc_export *exp) { + struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs; struct svc_export *exp_ps; - __be32 res; + unsigned int i; + __be32 status; + /* fs_root */ exp_ps = rqst_find_fsidzero_export(rqstp); if (IS_ERR(exp_ps)) return nfserrno(PTR_ERR(exp_ps)); - res = nfsd4_encode_path(xdr, &exp_ps->ex_path, path); + status = nfsd4_encode_pathname4(xdr, &exp_ps->ex_path, &exp->ex_path); exp_put(exp_ps); - return res; -} - -/* - * encode a fs_locations structure - */ -static __be32 nfsd4_encode_fs_locations(struct xdr_stream *xdr, - struct svc_rqst *rqstp, struct svc_export *exp) -{ - __be32 status; - int i; - __be32 *p; - struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs; - - status = nfsd4_encode_fsloc_fsroot(xdr, rqstp, &exp->ex_path); - if (status) + if (status != nfs_ok) return status; - p = xdr_reserve_space(xdr, 4); - if (!p) + + /* locations<> */ + if (xdr_stream_encode_u32(xdr, fslocs->locations_count) != XDR_UNIT) return nfserr_resource; - *p++ = cpu_to_be32(fslocs->locations_count); - for (i=0; i<fslocs->locations_count; i++) { + for (i = 0; i < fslocs->locations_count; i++) { status = nfsd4_encode_fs_location4(xdr, &fslocs->locations[i]); - if (status) + if (status != nfs_ok) return status; } - return 0; -} -static u32 nfs4_file_type(umode_t mode) -{ - switch (mode & S_IFMT) { - case S_IFIFO: return NF4FIFO; - case S_IFCHR: return NF4CHR; - case S_IFDIR: return NF4DIR; - case S_IFBLK: return NF4BLK; - case S_IFLNK: return NF4LNK; - case S_IFREG: return NF4REG; - case S_IFSOCK: return NF4SOCK; - default: return NF4BAD; - } + return nfs_ok; } -static inline __be32 -nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp, - struct nfs4_ace *ace) +static __be32 nfsd4_encode_nfsace4(struct xdr_stream *xdr, struct svc_rqst *rqstp, + struct nfs4_ace *ace) { + __be32 status; + + /* type */ + status = nfsd4_encode_acetype4(xdr, ace->type); + if (status != nfs_ok) + return nfserr_resource; + /* flag */ + status = nfsd4_encode_aceflag4(xdr, ace->flag); + if (status != nfs_ok) + return nfserr_resource; + /* access mask */ + status = nfsd4_encode_acemask4(xdr, ace->access_mask & NFS4_ACE_MASK_ALL); + if (status != nfs_ok) + return nfserr_resource; + /* who */ if (ace->whotype != NFS4_ACL_WHO_NAMED) return nfs4_acl_write_who(xdr, ace->whotype); - else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) + if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) return nfsd4_encode_group(xdr, rqstp, ace->who_gid); - else - return nfsd4_encode_user(xdr, rqstp, ace->who_uid); -} - -static inline __be32 -nfsd4_encode_layout_types(struct xdr_stream *xdr, u32 layout_types) -{ - __be32 *p; - unsigned long i = hweight_long(layout_types); - - p = xdr_reserve_space(xdr, 4 + 4 * i); - if (!p) - return nfserr_resource; - - *p++ = cpu_to_be32(i); - - for (i = LAYOUT_NFSV4_1_FILES; i < LAYOUT_TYPE_MAX; ++i) - if (layout_types & (1 << i)) - *p++ = cpu_to_be32(i); - - return 0; + return nfsd4_encode_user(xdr, rqstp, ace->who_uid); } #define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ @@ -2898,12 +2888,12 @@ static int nfsd4_get_mounted_on_ino(struct svc_export *exp, u64 *pino) } static __be32 -nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2) +nfsd4_encode_bitmap4(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2) { __be32 *p; if (bmval2) { - p = xdr_reserve_space(xdr, 16); + p = xdr_reserve_space(xdr, XDR_UNIT * 4); if (!p) goto out_resource; *p++ = cpu_to_be32(3); @@ -2911,94 +2901,708 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2) *p++ = cpu_to_be32(bmval1); *p++ = cpu_to_be32(bmval2); } else if (bmval1) { - p = xdr_reserve_space(xdr, 12); + p = xdr_reserve_space(xdr, XDR_UNIT * 3); if (!p) goto out_resource; *p++ = cpu_to_be32(2); *p++ = cpu_to_be32(bmval0); *p++ = cpu_to_be32(bmval1); } else { - p = xdr_reserve_space(xdr, 8); + p = xdr_reserve_space(xdr, XDR_UNIT * 2); if (!p) goto out_resource; *p++ = cpu_to_be32(1); *p++ = cpu_to_be32(bmval0); } - return 0; + return nfs_ok; out_resource: return nfserr_resource; } +struct nfsd4_fattr_args { + struct svc_rqst *rqstp; + struct svc_fh *fhp; + struct svc_export *exp; + struct dentry *dentry; + struct kstat stat; + struct kstatfs statfs; + struct nfs4_acl *acl; + u64 size; +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + void *context; + int contextlen; +#endif + u32 rdattr_err; + bool contextsupport; + bool ignore_crossmnt; +}; + +typedef __be32(*nfsd4_enc_attr)(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args); + +static __be32 nfsd4_encode_fattr4__noop(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfs_ok; +} + +static __be32 nfsd4_encode_fattr4__true(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_bool(xdr, true); +} + +static __be32 nfsd4_encode_fattr4__false(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_bool(xdr, false); +} + +static __be32 nfsd4_encode_fattr4_supported_attrs(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + struct nfsd4_compoundres *resp = args->rqstp->rq_resp; + u32 minorversion = resp->cstate.minorversion; + u32 supp[3]; + + memcpy(supp, nfsd_suppattrs[minorversion], sizeof(supp)); + if (!IS_POSIXACL(d_inode(args->dentry))) + supp[0] &= ~FATTR4_WORD0_ACL; + if (!args->contextsupport) + supp[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + + return nfsd4_encode_bitmap4(xdr, supp[0], supp[1], supp[2]); +} + +static __be32 nfsd4_encode_fattr4_type(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT); + if (!p) + return nfserr_resource; + + switch (args->stat.mode & S_IFMT) { + case S_IFIFO: + *p = cpu_to_be32(NF4FIFO); + break; + case S_IFCHR: + *p = cpu_to_be32(NF4CHR); + break; + case S_IFDIR: + *p = cpu_to_be32(NF4DIR); + break; + case S_IFBLK: + *p = cpu_to_be32(NF4BLK); + break; + case S_IFLNK: + *p = cpu_to_be32(NF4LNK); + break; + case S_IFREG: + *p = cpu_to_be32(NF4REG); + break; + case S_IFSOCK: + *p = cpu_to_be32(NF4SOCK); + break; + default: + return nfserr_serverfault; + } + + return nfs_ok; +} + +static __be32 nfsd4_encode_fattr4_fh_expire_type(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + u32 mask; + + mask = NFS4_FH_PERSISTENT; + if (!(args->exp->ex_flags & NFSEXP_NOSUBTREECHECK)) + mask |= NFS4_FH_VOL_RENAME; + return nfsd4_encode_uint32_t(xdr, mask); +} + +static __be32 nfsd4_encode_fattr4_change(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + const struct svc_export *exp = args->exp; + u64 c; + + if (unlikely(exp->ex_flags & NFSEXP_V4ROOT)) { + u32 flush_time = convert_to_wallclock(exp->cd->flush_time); + + if (xdr_stream_encode_u32(xdr, flush_time) != XDR_UNIT) + return nfserr_resource; + if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT) + return nfserr_resource; + return nfs_ok; + } + + c = nfsd4_change_attribute(&args->stat); + return nfsd4_encode_changeid4(xdr, c); +} + +static __be32 nfsd4_encode_fattr4_size(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_uint64_t(xdr, args->size); +} + +static __be32 nfsd4_encode_fattr4_fsid(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT * 2 + XDR_UNIT * 2); + if (!p) + return nfserr_resource; + + if (unlikely(args->exp->ex_fslocs.migrated)) { + p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MAJOR); + xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MINOR); + return nfs_ok; + } + switch (fsid_source(args->fhp)) { + case FSIDSOURCE_FSID: + p = xdr_encode_hyper(p, (u64)args->exp->ex_fsid); + xdr_encode_hyper(p, (u64)0); + break; + case FSIDSOURCE_DEV: + *p++ = xdr_zero; + *p++ = cpu_to_be32(MAJOR(args->stat.dev)); + *p++ = xdr_zero; + *p = cpu_to_be32(MINOR(args->stat.dev)); + break; + case FSIDSOURCE_UUID: + xdr_encode_opaque_fixed(p, args->exp->ex_uuid, EX_UUID_LEN); + break; + } + + return nfs_ok; +} + +static __be32 nfsd4_encode_fattr4_lease_time(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + struct nfsd_net *nn = net_generic(SVC_NET(args->rqstp), nfsd_net_id); + + return nfsd4_encode_nfs_lease4(xdr, nn->nfsd4_lease); +} + +static __be32 nfsd4_encode_fattr4_rdattr_error(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_uint32_t(xdr, args->rdattr_err); +} + +static __be32 nfsd4_encode_fattr4_aclsupport(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + u32 mask; + + mask = 0; + if (IS_POSIXACL(d_inode(args->dentry))) + mask = ACL4_SUPPORT_ALLOW_ACL | ACL4_SUPPORT_DENY_ACL; + return nfsd4_encode_uint32_t(xdr, mask); +} + +static __be32 nfsd4_encode_fattr4_acl(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + struct nfs4_acl *acl = args->acl; + struct nfs4_ace *ace; + __be32 status; + + /* nfsace4<> */ + if (!acl) { + if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT) + return nfserr_resource; + } else { + if (xdr_stream_encode_u32(xdr, acl->naces) != XDR_UNIT) + return nfserr_resource; + for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) { + status = nfsd4_encode_nfsace4(xdr, args->rqstp, ace); + if (status != nfs_ok) + return status; + } + } + return nfs_ok; +} + +static __be32 nfsd4_encode_fattr4_filehandle(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_nfs_fh4(xdr, &args->fhp->fh_handle); +} + +static __be32 nfsd4_encode_fattr4_fileid(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_uint64_t(xdr, args->stat.ino); +} + +static __be32 nfsd4_encode_fattr4_files_avail(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_uint64_t(xdr, args->statfs.f_ffree); +} + +static __be32 nfsd4_encode_fattr4_files_free(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_uint64_t(xdr, args->statfs.f_ffree); +} + +static __be32 nfsd4_encode_fattr4_files_total(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_uint64_t(xdr, args->statfs.f_files); +} + +static __be32 nfsd4_encode_fattr4_fs_locations(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_fs_locations4(xdr, args->rqstp, args->exp); +} + +static __be32 nfsd4_encode_fattr4_maxfilesize(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + struct super_block *sb = args->exp->ex_path.mnt->mnt_sb; + + return nfsd4_encode_uint64_t(xdr, sb->s_maxbytes); +} + +static __be32 nfsd4_encode_fattr4_maxlink(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_uint32_t(xdr, 255); +} + +static __be32 nfsd4_encode_fattr4_maxname(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_uint32_t(xdr, args->statfs.f_namelen); +} + +static __be32 nfsd4_encode_fattr4_maxread(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_uint64_t(xdr, svc_max_payload(args->rqstp)); +} + +static __be32 nfsd4_encode_fattr4_maxwrite(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_uint64_t(xdr, svc_max_payload(args->rqstp)); +} + +static __be32 nfsd4_encode_fattr4_mode(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_mode4(xdr, args->stat.mode & S_IALLUGO); +} + +static __be32 nfsd4_encode_fattr4_numlinks(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_uint32_t(xdr, args->stat.nlink); +} + +static __be32 nfsd4_encode_fattr4_owner(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_user(xdr, args->rqstp, args->stat.uid); +} + +static __be32 nfsd4_encode_fattr4_owner_group(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_group(xdr, args->rqstp, args->stat.gid); +} + +static __be32 nfsd4_encode_fattr4_rawdev(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_specdata4(xdr, MAJOR(args->stat.rdev), + MINOR(args->stat.rdev)); +} + +static __be32 nfsd4_encode_fattr4_space_avail(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + u64 avail = (u64)args->statfs.f_bavail * (u64)args->statfs.f_bsize; + + return nfsd4_encode_uint64_t(xdr, avail); +} + +static __be32 nfsd4_encode_fattr4_space_free(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + u64 free = (u64)args->statfs.f_bfree * (u64)args->statfs.f_bsize; + + return nfsd4_encode_uint64_t(xdr, free); +} + +static __be32 nfsd4_encode_fattr4_space_total(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + u64 total = (u64)args->statfs.f_blocks * (u64)args->statfs.f_bsize; + + return nfsd4_encode_uint64_t(xdr, total); +} + +static __be32 nfsd4_encode_fattr4_space_used(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_uint64_t(xdr, (u64)args->stat.blocks << 9); +} + +static __be32 nfsd4_encode_fattr4_time_access(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_nfstime4(xdr, &args->stat.atime); +} + +static __be32 nfsd4_encode_fattr4_time_create(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_nfstime4(xdr, &args->stat.btime); +} + +/* + * ctime (in NFSv4, time_metadata) is not writeable, and the client + * doesn't really care what resolution could theoretically be stored by + * the filesystem. + * + * The client cares how close together changes can be while still + * guaranteeing ctime changes. For most filesystems (which have + * timestamps with nanosecond fields) that is limited by the resolution + * of the time returned from current_time() (which I'm assuming to be + * 1/HZ). + */ +static __be32 nfsd4_encode_fattr4_time_delta(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + const struct inode *inode = d_inode(args->dentry); + u32 ns = max_t(u32, NSEC_PER_SEC/HZ, inode->i_sb->s_time_gran); + struct timespec64 ts = ns_to_timespec64(ns); + + return nfsd4_encode_nfstime4(xdr, &ts); +} + +static __be32 nfsd4_encode_fattr4_time_metadata(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_nfstime4(xdr, &args->stat.ctime); +} + +static __be32 nfsd4_encode_fattr4_time_modify(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_nfstime4(xdr, &args->stat.mtime); +} + +static __be32 nfsd4_encode_fattr4_mounted_on_fileid(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + u64 ino; + int err; + + if (!args->ignore_crossmnt && + args->dentry == args->exp->ex_path.mnt->mnt_root) { + err = nfsd4_get_mounted_on_ino(args->exp, &ino); + if (err) + return nfserrno(err); + } else + ino = args->stat.ino; + + return nfsd4_encode_uint64_t(xdr, ino); +} + +#ifdef CONFIG_NFSD_PNFS + +static __be32 nfsd4_encode_fattr4_fs_layout_types(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + unsigned long mask = args->exp->ex_layout_types; + int i; + + /* Hamming weight of @mask is the number of layout types to return */ + if (xdr_stream_encode_u32(xdr, hweight_long(mask)) != XDR_UNIT) + return nfserr_resource; + for (i = LAYOUT_NFSV4_1_FILES; i < LAYOUT_TYPE_MAX; ++i) + if (mask & BIT(i)) { + /* layouttype4 */ + if (xdr_stream_encode_u32(xdr, i) != XDR_UNIT) + return nfserr_resource; + } + return nfs_ok; +} + +static __be32 nfsd4_encode_fattr4_layout_types(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + unsigned long mask = args->exp->ex_layout_types; + int i; + + /* Hamming weight of @mask is the number of layout types to return */ + if (xdr_stream_encode_u32(xdr, hweight_long(mask)) != XDR_UNIT) + return nfserr_resource; + for (i = LAYOUT_NFSV4_1_FILES; i < LAYOUT_TYPE_MAX; ++i) + if (mask & BIT(i)) { + /* layouttype4 */ + if (xdr_stream_encode_u32(xdr, i) != XDR_UNIT) + return nfserr_resource; + } + return nfs_ok; +} + +static __be32 nfsd4_encode_fattr4_layout_blksize(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_uint32_t(xdr, args->stat.blksize); +} + +#endif + +static __be32 nfsd4_encode_fattr4_suppattr_exclcreat(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + struct nfsd4_compoundres *resp = args->rqstp->rq_resp; + u32 supp[3]; + + memcpy(supp, nfsd_suppattrs[resp->cstate.minorversion], sizeof(supp)); + supp[0] &= NFSD_SUPPATTR_EXCLCREAT_WORD0; + supp[1] &= NFSD_SUPPATTR_EXCLCREAT_WORD1; + supp[2] &= NFSD_SUPPATTR_EXCLCREAT_WORD2; + + return nfsd4_encode_bitmap4(xdr, supp[0], supp[1], supp[2]); +} + +/* + * Copied from generic_remap_checks/generic_remap_file_range_prep. + * + * These generic functions use the file system's s_blocksize, but + * individual file systems aren't required to use + * generic_remap_file_range_prep. Until there is a mechanism for + * determining a particular file system's (or file's) clone block + * size, this is the best NFSD can do. + */ +static __be32 nfsd4_encode_fattr4_clone_blksize(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + struct inode *inode = d_inode(args->dentry); + + return nfsd4_encode_uint32_t(xdr, inode->i_sb->s_blocksize); +} + +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +static __be32 nfsd4_encode_fattr4_sec_label(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_security_label(xdr, args->rqstp, + args->context, args->contextlen); +} +#endif + +static __be32 nfsd4_encode_fattr4_xattr_support(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + int err = xattr_supports_user_prefix(d_inode(args->dentry)); + + return nfsd4_encode_bool(xdr, err == 0); +} + +static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = { + [FATTR4_SUPPORTED_ATTRS] = nfsd4_encode_fattr4_supported_attrs, + [FATTR4_TYPE] = nfsd4_encode_fattr4_type, + [FATTR4_FH_EXPIRE_TYPE] = nfsd4_encode_fattr4_fh_expire_type, + [FATTR4_CHANGE] = nfsd4_encode_fattr4_change, + [FATTR4_SIZE] = nfsd4_encode_fattr4_size, + [FATTR4_LINK_SUPPORT] = nfsd4_encode_fattr4__true, + [FATTR4_SYMLINK_SUPPORT] = nfsd4_encode_fattr4__true, + [FATTR4_NAMED_ATTR] = nfsd4_encode_fattr4__false, + [FATTR4_FSID] = nfsd4_encode_fattr4_fsid, + [FATTR4_UNIQUE_HANDLES] = nfsd4_encode_fattr4__true, + [FATTR4_LEASE_TIME] = nfsd4_encode_fattr4_lease_time, + [FATTR4_RDATTR_ERROR] = nfsd4_encode_fattr4_rdattr_error, + [FATTR4_ACL] = nfsd4_encode_fattr4_acl, + [FATTR4_ACLSUPPORT] = nfsd4_encode_fattr4_aclsupport, + [FATTR4_ARCHIVE] = nfsd4_encode_fattr4__noop, + [FATTR4_CANSETTIME] = nfsd4_encode_fattr4__true, + [FATTR4_CASE_INSENSITIVE] = nfsd4_encode_fattr4__false, + [FATTR4_CASE_PRESERVING] = nfsd4_encode_fattr4__true, + [FATTR4_CHOWN_RESTRICTED] = nfsd4_encode_fattr4__true, + [FATTR4_FILEHANDLE] = nfsd4_encode_fattr4_filehandle, + [FATTR4_FILEID] = nfsd4_encode_fattr4_fileid, + [FATTR4_FILES_AVAIL] = nfsd4_encode_fattr4_files_avail, + [FATTR4_FILES_FREE] = nfsd4_encode_fattr4_files_free, + [FATTR4_FILES_TOTAL] = nfsd4_encode_fattr4_files_total, + [FATTR4_FS_LOCATIONS] = nfsd4_encode_fattr4_fs_locations, + [FATTR4_HIDDEN] = nfsd4_encode_fattr4__noop, + [FATTR4_HOMOGENEOUS] = nfsd4_encode_fattr4__true, + [FATTR4_MAXFILESIZE] = nfsd4_encode_fattr4_maxfilesize, + [FATTR4_MAXLINK] = nfsd4_encode_fattr4_maxlink, + [FATTR4_MAXNAME] = nfsd4_encode_fattr4_maxname, + [FATTR4_MAXREAD] = nfsd4_encode_fattr4_maxread, + [FATTR4_MAXWRITE] = nfsd4_encode_fattr4_maxwrite, + [FATTR4_MIMETYPE] = nfsd4_encode_fattr4__noop, + [FATTR4_MODE] = nfsd4_encode_fattr4_mode, + [FATTR4_NO_TRUNC] = nfsd4_encode_fattr4__true, + [FATTR4_NUMLINKS] = nfsd4_encode_fattr4_numlinks, + [FATTR4_OWNER] = nfsd4_encode_fattr4_owner, + [FATTR4_OWNER_GROUP] = nfsd4_encode_fattr4_owner_group, + [FATTR4_QUOTA_AVAIL_HARD] = nfsd4_encode_fattr4__noop, + [FATTR4_QUOTA_AVAIL_SOFT] = nfsd4_encode_fattr4__noop, + [FATTR4_QUOTA_USED] = nfsd4_encode_fattr4__noop, + [FATTR4_RAWDEV] = nfsd4_encode_fattr4_rawdev, + [FATTR4_SPACE_AVAIL] = nfsd4_encode_fattr4_space_avail, + [FATTR4_SPACE_FREE] = nfsd4_encode_fattr4_space_free, + [FATTR4_SPACE_TOTAL] = nfsd4_encode_fattr4_space_total, + [FATTR4_SPACE_USED] = nfsd4_encode_fattr4_space_used, + [FATTR4_SYSTEM] = nfsd4_encode_fattr4__noop, + [FATTR4_TIME_ACCESS] = nfsd4_encode_fattr4_time_access, + [FATTR4_TIME_ACCESS_SET] = nfsd4_encode_fattr4__noop, + [FATTR4_TIME_BACKUP] = nfsd4_encode_fattr4__noop, + [FATTR4_TIME_CREATE] = nfsd4_encode_fattr4_time_create, + [FATTR4_TIME_DELTA] = nfsd4_encode_fattr4_time_delta, + [FATTR4_TIME_METADATA] = nfsd4_encode_fattr4_time_metadata, + [FATTR4_TIME_MODIFY] = nfsd4_encode_fattr4_time_modify, + [FATTR4_TIME_MODIFY_SET] = nfsd4_encode_fattr4__noop, + [FATTR4_MOUNTED_ON_FILEID] = nfsd4_encode_fattr4_mounted_on_fileid, + [FATTR4_DIR_NOTIF_DELAY] = nfsd4_encode_fattr4__noop, + [FATTR4_DIRENT_NOTIF_DELAY] = nfsd4_encode_fattr4__noop, + [FATTR4_DACL] = nfsd4_encode_fattr4__noop, + [FATTR4_SACL] = nfsd4_encode_fattr4__noop, + [FATTR4_CHANGE_POLICY] = nfsd4_encode_fattr4__noop, + [FATTR4_FS_STATUS] = nfsd4_encode_fattr4__noop, + +#ifdef CONFIG_NFSD_PNFS + [FATTR4_FS_LAYOUT_TYPES] = nfsd4_encode_fattr4_fs_layout_types, + [FATTR4_LAYOUT_HINT] = nfsd4_encode_fattr4__noop, + [FATTR4_LAYOUT_TYPES] = nfsd4_encode_fattr4_layout_types, + [FATTR4_LAYOUT_BLKSIZE] = nfsd4_encode_fattr4_layout_blksize, + [FATTR4_LAYOUT_ALIGNMENT] = nfsd4_encode_fattr4__noop, +#else + [FATTR4_FS_LAYOUT_TYPES] = nfsd4_encode_fattr4__noop, + [FATTR4_LAYOUT_HINT] = nfsd4_encode_fattr4__noop, + [FATTR4_LAYOUT_TYPES] = nfsd4_encode_fattr4__noop, + [FATTR4_LAYOUT_BLKSIZE] = nfsd4_encode_fattr4__noop, + [FATTR4_LAYOUT_ALIGNMENT] = nfsd4_encode_fattr4__noop, +#endif + + [FATTR4_FS_LOCATIONS_INFO] = nfsd4_encode_fattr4__noop, + [FATTR4_MDSTHRESHOLD] = nfsd4_encode_fattr4__noop, + [FATTR4_RETENTION_GET] = nfsd4_encode_fattr4__noop, + [FATTR4_RETENTION_SET] = nfsd4_encode_fattr4__noop, + [FATTR4_RETENTEVT_GET] = nfsd4_encode_fattr4__noop, + [FATTR4_RETENTEVT_SET] = nfsd4_encode_fattr4__noop, + [FATTR4_RETENTION_HOLD] = nfsd4_encode_fattr4__noop, + [FATTR4_MODE_SET_MASKED] = nfsd4_encode_fattr4__noop, + [FATTR4_SUPPATTR_EXCLCREAT] = nfsd4_encode_fattr4_suppattr_exclcreat, + [FATTR4_FS_CHARSET_CAP] = nfsd4_encode_fattr4__noop, + [FATTR4_CLONE_BLKSIZE] = nfsd4_encode_fattr4_clone_blksize, + [FATTR4_SPACE_FREED] = nfsd4_encode_fattr4__noop, + [FATTR4_CHANGE_ATTR_TYPE] = nfsd4_encode_fattr4__noop, + +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + [FATTR4_SEC_LABEL] = nfsd4_encode_fattr4_sec_label, +#else + [FATTR4_SEC_LABEL] = nfsd4_encode_fattr4__noop, +#endif + + [FATTR4_MODE_UMASK] = nfsd4_encode_fattr4__noop, + [FATTR4_XATTR_SUPPORT] = nfsd4_encode_fattr4_xattr_support, +}; + /* * Note: @fhp can be NULL; in this case, we might have to compose the filehandle * ourselves. */ static __be32 -nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, - struct svc_export *exp, - struct dentry *dentry, u32 *bmval, - struct svc_rqst *rqstp, int ignore_crossmnt) +nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, + struct svc_fh *fhp, struct svc_export *exp, + struct dentry *dentry, const u32 *bmval, + int ignore_crossmnt) { - u32 bmval0 = bmval[0]; - u32 bmval1 = bmval[1]; - u32 bmval2 = bmval[2]; - struct kstat stat; + DECLARE_BITMAP(attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)); + struct nfsd4_fattr_args args; struct svc_fh *tempfh = NULL; - struct kstatfs statfs; - __be32 *p, *attrlen_p; int starting_len = xdr->buf->len; + __be32 *attrlen_p, status; int attrlen_offset; - u32 dummy; - u64 dummy64; - u32 rdattr_err = 0; - __be32 status; + u32 attrmask[3]; int err; - struct nfs4_acl *acl = NULL; -#ifdef CONFIG_NFSD_V4_SECURITY_LABEL - void *context = NULL; - int contextlen; -#endif - bool contextsupport = false; struct nfsd4_compoundres *resp = rqstp->rq_resp; u32 minorversion = resp->cstate.minorversion; struct path path = { .mnt = exp->ex_path.mnt, .dentry = dentry, }; - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + unsigned long bit; + bool file_modified = false; + u64 size = 0; + + WARN_ON_ONCE(bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1); + WARN_ON_ONCE(!nfsd_attrs_supported(minorversion, bmval)); + + args.rqstp = rqstp; + args.exp = exp; + args.dentry = dentry; + args.ignore_crossmnt = (ignore_crossmnt != 0); + args.acl = NULL; +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + args.context = NULL; +#endif - BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); - BUG_ON(!nfsd_attrs_supported(minorversion, bmval)); + /* + * Make a local copy of the attribute bitmap that can be modified. + */ + attrmask[0] = bmval[0]; + attrmask[1] = bmval[1]; + attrmask[2] = bmval[2]; + args.rdattr_err = 0; if (exp->ex_fslocs.migrated) { - status = fattr_handle_absent_fs(&bmval0, &bmval1, &bmval2, &rdattr_err); + status = fattr_handle_absent_fs(&attrmask[0], &attrmask[1], + &attrmask[2], &args.rdattr_err); if (status) goto out; } - if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { - status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry)); + args.size = 0; + if (attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { + status = nfsd4_deleg_getattr_conflict(rqstp, dentry, + &file_modified, &size); if (status) goto out; } - err = vfs_getattr(&path, &stat, + err = vfs_getattr(&path, &args.stat, STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE, AT_STATX_SYNC_AS_STAT); if (err) goto out_nfserr; - if (!(stat.result_mask & STATX_BTIME)) + if (file_modified) + args.size = size; + else + args.size = args.stat.size; + + if (!(args.stat.result_mask & STATX_BTIME)) /* underlying FS does not offer btime so we can't share it */ - bmval1 &= ~FATTR4_WORD1_TIME_CREATE; - if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | + attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE; + if ((attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) || - (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | + (attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL))) { - err = vfs_statfs(&path, &statfs); + err = vfs_statfs(&path, &args.statfs); if (err) goto out_nfserr; } - if ((bmval0 & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) { + if ((attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && + !fhp) { tempfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); status = nfserr_jukebox; if (!tempfh) @@ -3007,12 +3611,14 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, status = fh_compose(tempfh, exp, dentry, NULL); if (status) goto out; - fhp = tempfh; - } - if (bmval0 & FATTR4_WORD0_ACL) { - err = nfsd4_get_nfs4_acl(rqstp, dentry, &acl); + args.fhp = tempfh; + } else + args.fhp = fhp; + + if (attrmask[0] & FATTR4_WORD0_ACL) { + err = nfsd4_get_nfs4_acl(rqstp, dentry, &args.acl); if (err == -EOPNOTSUPP) - bmval0 &= ~FATTR4_WORD0_ACL; + attrmask[0] &= ~FATTR4_WORD0_ACL; else if (err == -EINVAL) { status = nfserr_attrnotsupp; goto out; @@ -3020,452 +3626,54 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, goto out_nfserr; } + args.contextsupport = false; + #ifdef CONFIG_NFSD_V4_SECURITY_LABEL - if ((bmval2 & FATTR4_WORD2_SECURITY_LABEL) || - bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { + if ((attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) || + attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { if (exp->ex_flags & NFSEXP_SECURITY_LABEL) err = security_inode_getsecctx(d_inode(dentry), - &context, &contextlen); + &args.context, &args.contextlen); else err = -EOPNOTSUPP; - contextsupport = (err == 0); - if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { + args.contextsupport = (err == 0); + if (attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) { if (err == -EOPNOTSUPP) - bmval2 &= ~FATTR4_WORD2_SECURITY_LABEL; + attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL; else if (err) goto out_nfserr; } } #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ - status = nfsd4_encode_bitmap(xdr, bmval0, bmval1, bmval2); + /* attrmask */ + status = nfsd4_encode_bitmap4(xdr, attrmask[0], attrmask[1], + attrmask[2]); if (status) goto out; + /* attr_vals */ attrlen_offset = xdr->buf->len; attrlen_p = xdr_reserve_space(xdr, XDR_UNIT); if (!attrlen_p) goto out_resource; - - if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { - u32 supp[3]; - - memcpy(supp, nfsd_suppattrs[minorversion], sizeof(supp)); - - if (!IS_POSIXACL(dentry->d_inode)) - supp[0] &= ~FATTR4_WORD0_ACL; - if (!contextsupport) - supp[2] &= ~FATTR4_WORD2_SECURITY_LABEL; - if (!supp[2]) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(2); - *p++ = cpu_to_be32(supp[0]); - *p++ = cpu_to_be32(supp[1]); - } else { - p = xdr_reserve_space(xdr, 16); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(3); - *p++ = cpu_to_be32(supp[0]); - *p++ = cpu_to_be32(supp[1]); - *p++ = cpu_to_be32(supp[2]); - } - } - if (bmval0 & FATTR4_WORD0_TYPE) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - dummy = nfs4_file_type(stat.mode); - if (dummy == NF4BAD) { - status = nfserr_serverfault; - goto out; - } - *p++ = cpu_to_be32(dummy); - } - if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) - *p++ = cpu_to_be32(NFS4_FH_PERSISTENT); - else - *p++ = cpu_to_be32(NFS4_FH_PERSISTENT| - NFS4_FH_VOL_RENAME); - } - if (bmval0 & FATTR4_WORD0_CHANGE) { - p = xdr_reserve_space(xdr, 8); - if (!p) - goto out_resource; - p = encode_change(p, &stat, d_inode(dentry), exp); - } - if (bmval0 & FATTR4_WORD0_SIZE) { - p = xdr_reserve_space(xdr, 8); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, stat.size); - } - if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(1); - } - if (bmval0 & FATTR4_WORD0_SYMLINK_SUPPORT) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(1); - } - if (bmval0 & FATTR4_WORD0_NAMED_ATTR) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(0); - } - if (bmval0 & FATTR4_WORD0_FSID) { - p = xdr_reserve_space(xdr, 16); - if (!p) - goto out_resource; - if (exp->ex_fslocs.migrated) { - p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MAJOR); - p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MINOR); - } else switch(fsid_source(fhp)) { - case FSIDSOURCE_FSID: - p = xdr_encode_hyper(p, (u64)exp->ex_fsid); - p = xdr_encode_hyper(p, (u64)0); - break; - case FSIDSOURCE_DEV: - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(MAJOR(stat.dev)); - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(MINOR(stat.dev)); - break; - case FSIDSOURCE_UUID: - p = xdr_encode_opaque_fixed(p, exp->ex_uuid, - EX_UUID_LEN); - break; - } - } - if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(0); - } - if (bmval0 & FATTR4_WORD0_LEASE_TIME) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(nn->nfsd4_lease); - } - if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(rdattr_err); - } - if (bmval0 & FATTR4_WORD0_ACL) { - struct nfs4_ace *ace; - - if (acl == NULL) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - - *p++ = cpu_to_be32(0); - goto out_acl; - } - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(acl->naces); - - for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) { - p = xdr_reserve_space(xdr, 4*3); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(ace->type); - *p++ = cpu_to_be32(ace->flag); - *p++ = cpu_to_be32(ace->access_mask & - NFS4_ACE_MASK_ALL); - status = nfsd4_encode_aclname(xdr, rqstp, ace); - if (status) - goto out; - } - } -out_acl: - if (bmval0 & FATTR4_WORD0_ACLSUPPORT) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(IS_POSIXACL(dentry->d_inode) ? - ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0); - } - if (bmval0 & FATTR4_WORD0_CANSETTIME) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(1); - } - if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(0); - } - if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(1); - } - if (bmval0 & FATTR4_WORD0_CHOWN_RESTRICTED) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(1); - } - if (bmval0 & FATTR4_WORD0_FILEHANDLE) { - p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4); - if (!p) - goto out_resource; - p = xdr_encode_opaque(p, &fhp->fh_handle.fh_raw, - fhp->fh_handle.fh_size); - } - if (bmval0 & FATTR4_WORD0_FILEID) { - p = xdr_reserve_space(xdr, 8); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, stat.ino); - } - if (bmval0 & FATTR4_WORD0_FILES_AVAIL) { - p = xdr_reserve_space(xdr, 8); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (u64) statfs.f_ffree); - } - if (bmval0 & FATTR4_WORD0_FILES_FREE) { - p = xdr_reserve_space(xdr, 8); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (u64) statfs.f_ffree); - } - if (bmval0 & FATTR4_WORD0_FILES_TOTAL) { - p = xdr_reserve_space(xdr, 8); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (u64) statfs.f_files); - } - if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) { - status = nfsd4_encode_fs_locations(xdr, rqstp, exp); - if (status) - goto out; - } - if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(1); - } - if (bmval0 & FATTR4_WORD0_MAXFILESIZE) { - p = xdr_reserve_space(xdr, 8); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, exp->ex_path.mnt->mnt_sb->s_maxbytes); - } - if (bmval0 & FATTR4_WORD0_MAXLINK) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(255); - } - if (bmval0 & FATTR4_WORD0_MAXNAME) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(statfs.f_namelen); - } - if (bmval0 & FATTR4_WORD0_MAXREAD) { - p = xdr_reserve_space(xdr, 8); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp)); - } - if (bmval0 & FATTR4_WORD0_MAXWRITE) { - p = xdr_reserve_space(xdr, 8); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp)); - } - if (bmval1 & FATTR4_WORD1_MODE) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(stat.mode & S_IALLUGO); - } - if (bmval1 & FATTR4_WORD1_NO_TRUNC) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(1); - } - if (bmval1 & FATTR4_WORD1_NUMLINKS) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(stat.nlink); - } - if (bmval1 & FATTR4_WORD1_OWNER) { - status = nfsd4_encode_user(xdr, rqstp, stat.uid); - if (status) - goto out; - } - if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { - status = nfsd4_encode_group(xdr, rqstp, stat.gid); - if (status) + bitmap_from_arr32(attr_bitmap, attrmask, + ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)); + for_each_set_bit(bit, attr_bitmap, + ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)) { + status = nfsd4_enc_fattr4_encode_ops[bit](xdr, &args); + if (status != nfs_ok) goto out; } - if (bmval1 & FATTR4_WORD1_RAWDEV) { - p = xdr_reserve_space(xdr, 8); - if (!p) - goto out_resource; - *p++ = cpu_to_be32((u32) MAJOR(stat.rdev)); - *p++ = cpu_to_be32((u32) MINOR(stat.rdev)); - } - if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) { - p = xdr_reserve_space(xdr, 8); - if (!p) - goto out_resource; - dummy64 = (u64)statfs.f_bavail * (u64)statfs.f_bsize; - p = xdr_encode_hyper(p, dummy64); - } - if (bmval1 & FATTR4_WORD1_SPACE_FREE) { - p = xdr_reserve_space(xdr, 8); - if (!p) - goto out_resource; - dummy64 = (u64)statfs.f_bfree * (u64)statfs.f_bsize; - p = xdr_encode_hyper(p, dummy64); - } - if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) { - p = xdr_reserve_space(xdr, 8); - if (!p) - goto out_resource; - dummy64 = (u64)statfs.f_blocks * (u64)statfs.f_bsize; - p = xdr_encode_hyper(p, dummy64); - } - if (bmval1 & FATTR4_WORD1_SPACE_USED) { - p = xdr_reserve_space(xdr, 8); - if (!p) - goto out_resource; - dummy64 = (u64)stat.blocks << 9; - p = xdr_encode_hyper(p, dummy64); - } - if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { - status = nfsd4_encode_nfstime4(xdr, &stat.atime); - if (status) - goto out; - } - if (bmval1 & FATTR4_WORD1_TIME_CREATE) { - status = nfsd4_encode_nfstime4(xdr, &stat.btime); - if (status) - goto out; - } - if (bmval1 & FATTR4_WORD1_TIME_DELTA) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = encode_time_delta(p, d_inode(dentry)); - } - if (bmval1 & FATTR4_WORD1_TIME_METADATA) { - status = nfsd4_encode_nfstime4(xdr, &stat.ctime); - if (status) - goto out; - } - if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { - status = nfsd4_encode_nfstime4(xdr, &stat.mtime); - if (status) - goto out; - } - if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { - u64 ino = stat.ino; - - p = xdr_reserve_space(xdr, 8); - if (!p) - goto out_resource; - /* - * Get ino of mountpoint in parent filesystem, if not ignoring - * crossmount and this is the root of a cross-mounted - * filesystem. - */ - if (ignore_crossmnt == 0 && - dentry == exp->ex_path.mnt->mnt_root) { - err = nfsd4_get_mounted_on_ino(exp, &ino); - if (err) - goto out_nfserr; - } - p = xdr_encode_hyper(p, ino); - } -#ifdef CONFIG_NFSD_PNFS - if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) { - status = nfsd4_encode_layout_types(xdr, exp->ex_layout_types); - if (status) - goto out; - } - - if (bmval2 & FATTR4_WORD2_LAYOUT_TYPES) { - status = nfsd4_encode_layout_types(xdr, exp->ex_layout_types); - if (status) - goto out; - } - - if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(stat.blksize); - } -#endif /* CONFIG_NFSD_PNFS */ - if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { - u32 supp[3]; - - memcpy(supp, nfsd_suppattrs[minorversion], sizeof(supp)); - supp[0] &= NFSD_SUPPATTR_EXCLCREAT_WORD0; - supp[1] &= NFSD_SUPPATTR_EXCLCREAT_WORD1; - supp[2] &= NFSD_SUPPATTR_EXCLCREAT_WORD2; - - status = nfsd4_encode_bitmap(xdr, supp[0], supp[1], supp[2]); - if (status) - goto out; - } - -#ifdef CONFIG_NFSD_V4_SECURITY_LABEL - if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { - status = nfsd4_encode_security_label(xdr, rqstp, context, - contextlen); - if (status) - goto out; - } -#endif - - if (bmval2 & FATTR4_WORD2_XATTR_SUPPORT) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - err = xattr_supports_user_prefix(d_inode(dentry)); - *p++ = cpu_to_be32(err == 0); - } - *attrlen_p = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT); status = nfs_ok; out: #ifdef CONFIG_NFSD_V4_SECURITY_LABEL - if (context) - security_release_secctx(context, contextlen); + if (args.context) + security_release_secctx(args.context, args.contextlen); #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ - kfree(acl); + kfree(args.acl); if (tempfh) { fh_put(tempfh); kfree(tempfh); @@ -3506,12 +3714,28 @@ __be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words, __be32 ret; svcxdr_init_encode_from_buffer(&xdr, &dummy, *p, words << 2); - ret = nfsd4_encode_fattr(&xdr, fhp, exp, dentry, bmval, rqstp, - ignore_crossmnt); + ret = nfsd4_encode_fattr4(rqstp, &xdr, fhp, exp, dentry, bmval, + ignore_crossmnt); *p = xdr.p; return ret; } +/* + * The buffer space for this field was reserved during a previous + * call to nfsd4_encode_entry4(). + */ +static void nfsd4_encode_entry4_nfs_cookie4(const struct nfsd4_readdir *readdir, + u64 offset) +{ + __be64 cookie = cpu_to_be64(offset); + struct xdr_stream *xdr = readdir->xdr; + + if (!readdir->cookie_offset) + return; + write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset, &cookie, + sizeof(cookie)); +} + static inline int attributes_need_mount(u32 *bmval) { if (bmval[0] & ~(FATTR4_WORD0_RDATTR_ERROR | FATTR4_WORD0_LEASE_TIME)) @@ -3522,8 +3746,8 @@ static inline int attributes_need_mount(u32 *bmval) } static __be32 -nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd, - const char *name, int namlen) +nfsd4_encode_entry4_fattr(struct nfsd4_readdir *cd, const char *name, + int namlen) { struct svc_export *exp = cd->rd_fhp->fh_export; struct dentry *dentry; @@ -3566,33 +3790,34 @@ nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd, } out_encode: - nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval, - cd->rd_rqstp, ignore_crossmnt); + nfserr = nfsd4_encode_fattr4(cd->rd_rqstp, cd->xdr, NULL, exp, dentry, + cd->rd_bmval, ignore_crossmnt); out_put: dput(dentry); exp_put(exp); return nfserr; } -static __be32 * -nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr) +static __be32 +nfsd4_encode_entry4_rdattr_error(struct xdr_stream *xdr, __be32 nfserr) { - __be32 *p; - - p = xdr_reserve_space(xdr, 20); - if (!p) - return NULL; - *p++ = htonl(2); - *p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */ - *p++ = htonl(0); /* bmval1 */ + __be32 status; - *p++ = htonl(4); /* attribute length */ - *p++ = nfserr; /* no htonl */ - return p; + /* attrmask */ + status = nfsd4_encode_bitmap4(xdr, FATTR4_WORD0_RDATTR_ERROR, 0, 0); + if (status != nfs_ok) + return status; + /* attr_vals */ + if (xdr_stream_encode_u32(xdr, XDR_UNIT) != XDR_UNIT) + return nfserr_resource; + /* rdattr_error */ + if (xdr_stream_encode_be32(xdr, nfserr) != XDR_UNIT) + return nfserr_resource; + return nfs_ok; } static int -nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, +nfsd4_encode_entry4(void *ccdv, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { struct readdir_cd *ccd = ccdv; @@ -3603,8 +3828,6 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, u32 name_and_cookie; int entry_bytes; __be32 nfserr = nfserr_toosmall; - __be64 wire_offset; - __be32 *p; /* In nfsv4, "." and ".." never make it onto the wire.. */ if (name && isdotent(name, namlen)) { @@ -3612,24 +3835,19 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, return 0; } - if (cd->cookie_offset) { - wire_offset = cpu_to_be64(offset); - write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset, - &wire_offset, 8); - } + /* Encode the previous entry's cookie value */ + nfsd4_encode_entry4_nfs_cookie4(cd, offset); - p = xdr_reserve_space(xdr, 4); - if (!p) + if (xdr_stream_encode_item_present(xdr) != XDR_UNIT) goto fail; - *p++ = xdr_one; /* mark entry present */ + + /* Reserve send buffer space for this entry's cookie value. */ cookie_offset = xdr->buf->len; - p = xdr_reserve_space(xdr, 3*4 + namlen); - if (!p) + if (nfsd4_encode_nfs_cookie4(xdr, OFFSET_MAX) != nfs_ok) goto fail; - p = xdr_encode_hyper(p, OFFSET_MAX); /* offset of next entry */ - p = xdr_encode_array(p, name, namlen); /* name length & name */ - - nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen); + if (nfsd4_encode_component4(xdr, name, namlen) != nfs_ok) + goto fail; + nfserr = nfsd4_encode_entry4_fattr(cd, name, namlen); switch (nfserr) { case nfs_ok: break; @@ -3660,8 +3878,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, */ if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)) goto fail; - p = nfsd4_encode_rdattr_error(xdr, nfserr); - if (p == NULL) { + if (nfsd4_encode_entry4_rdattr_error(xdr, nfserr)) { nfserr = nfserr_toosmall; goto fail; } @@ -3719,18 +3936,26 @@ nfsd4_encode_clientid4(struct xdr_stream *xdr, const clientid_t *clientid) return nfs_ok; } +/* This is a frequently-encoded item; open-coded for speed */ static __be32 -nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) +nfsd4_encode_stateid4(struct xdr_stream *xdr, const stateid_t *sid) { __be32 *p; - p = xdr_reserve_space(xdr, sizeof(stateid_t)); + p = xdr_reserve_space(xdr, NFS4_STATEID_SIZE); if (!p) return nfserr_resource; *p++ = cpu_to_be32(sid->si_generation); - p = xdr_encode_opaque_fixed(p, &sid->si_opaque, - sizeof(stateid_opaque_t)); - return 0; + memcpy(p, &sid->si_opaque, sizeof(sid->si_opaque)); + return nfs_ok; +} + +static __be32 +nfsd4_encode_sessionid4(struct xdr_stream *xdr, + const struct nfs4_sessionid *sessionid) +{ + return nfsd4_encode_opaque_fixed(xdr, sessionid->data, + NFS4_MAX_SESSIONID_LEN); } static __be32 @@ -3739,14 +3964,14 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_access *access = &u->access; struct xdr_stream *xdr = resp->xdr; - __be32 *p; + __be32 status; - p = xdr_reserve_space(xdr, 8); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(access->ac_supported); - *p++ = cpu_to_be32(access->ac_resp_access); - return 0; + /* supported */ + status = nfsd4_encode_uint32_t(xdr, access->ac_supported); + if (status != nfs_ok) + return status; + /* access */ + return nfsd4_encode_uint32_t(xdr, access->ac_resp_access); } static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, @@ -3754,17 +3979,16 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, { struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session; struct xdr_stream *xdr = resp->xdr; - __be32 *p; - p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8); - if (!p) + /* bctsr_sessid */ + nfserr = nfsd4_encode_sessionid4(xdr, &bcts->sessionid); + if (nfserr != nfs_ok) + return nfserr; + /* bctsr_dir */ + if (xdr_stream_encode_u32(xdr, bcts->dir) != XDR_UNIT) return nfserr_resource; - p = xdr_encode_opaque_fixed(p, bcts->sessionid.data, - NFS4_MAX_SESSIONID_LEN); - *p++ = cpu_to_be32(bcts->dir); - /* Upshifting from TCP to RDMA is not supported */ - *p++ = cpu_to_be32(0); - return 0; + /* bctsr_use_conn_in_rdma_mode */ + return nfsd4_encode_bool(xdr, false); } static __be32 @@ -3774,7 +3998,8 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close = &u->close; struct xdr_stream *xdr = resp->xdr; - return nfsd4_encode_stateid(xdr, &close->cl_stateid); + /* open_stateid */ + return nfsd4_encode_stateid4(xdr, &close->cl_stateid); } @@ -3794,11 +4019,13 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create = &u->create; struct xdr_stream *xdr = resp->xdr; + /* cinfo */ nfserr = nfsd4_encode_change_info4(xdr, &create->cr_cinfo); if (nfserr) return nfserr; - return nfsd4_encode_bitmap(xdr, create->cr_bmval[0], - create->cr_bmval[1], create->cr_bmval[2]); + /* attrset */ + return nfsd4_encode_bitmap4(xdr, create->cr_bmval[0], + create->cr_bmval[1], create->cr_bmval[2]); } static __be32 @@ -3809,65 +4036,56 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh *fhp = getattr->ga_fhp; struct xdr_stream *xdr = resp->xdr; - return nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry, - getattr->ga_bmval, resp->rqstp, 0); + /* obj_attributes */ + return nfsd4_encode_fattr4(resp->rqstp, xdr, fhp, fhp->fh_export, + fhp->fh_dentry, getattr->ga_bmval, 0); } static __be32 nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, union nfsd4_op_u *u) { - struct svc_fh **fhpp = &u->getfh; struct xdr_stream *xdr = resp->xdr; - struct svc_fh *fhp = *fhpp; - unsigned int len; - __be32 *p; + struct svc_fh *fhp = u->getfh; - len = fhp->fh_handle.fh_size; - p = xdr_reserve_space(xdr, len + 4); - if (!p) - return nfserr_resource; - p = xdr_encode_opaque(p, &fhp->fh_handle.fh_raw, len); - return 0; + /* object */ + return nfsd4_encode_nfs_fh4(xdr, &fhp->fh_handle); } -/* -* Including all fields other than the name, a LOCK4denied structure requires -* 8(clientid) + 4(namelen) + 8(offset) + 8(length) + 4(type) = 32 bytes. -*/ static __be32 -nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld) +nfsd4_encode_lock_owner4(struct xdr_stream *xdr, const clientid_t *clientid, + const struct xdr_netobj *owner) { - struct xdr_netobj *conf = &ld->ld_owner; - __be32 *p; + __be32 status; -again: - p = xdr_reserve_space(xdr, 32 + XDR_LEN(conf->len)); - if (!p) { - /* - * Don't fail to return the result just because we can't - * return the conflicting open: - */ - if (conf->len) { - kfree(conf->data); - conf->len = 0; - conf->data = NULL; - goto again; - } + /* clientid */ + status = nfsd4_encode_clientid4(xdr, clientid); + if (status != nfs_ok) + return status; + /* owner */ + return nfsd4_encode_opaque(xdr, owner->data, owner->len); +} + +static __be32 +nfsd4_encode_lock4denied(struct xdr_stream *xdr, + const struct nfsd4_lock_denied *ld) +{ + __be32 status; + + /* offset */ + status = nfsd4_encode_offset4(xdr, ld->ld_start); + if (status != nfs_ok) + return status; + /* length */ + status = nfsd4_encode_length4(xdr, ld->ld_length); + if (status != nfs_ok) + return status; + /* locktype */ + if (xdr_stream_encode_u32(xdr, ld->ld_type) != XDR_UNIT) return nfserr_resource; - } - p = xdr_encode_hyper(p, ld->ld_start); - p = xdr_encode_hyper(p, ld->ld_length); - *p++ = cpu_to_be32(ld->ld_type); - if (conf->len) { - p = xdr_encode_opaque_fixed(p, &ld->ld_clientid, 8); - p = xdr_encode_opaque(p, conf->data, conf->len); - kfree(conf->data); - } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ - p = xdr_encode_hyper(p, (u64)0); /* clientid */ - *p++ = cpu_to_be32(0); /* length of owner name */ - } - return nfserr_denied; + /* owner */ + return nfsd4_encode_lock_owner4(xdr, &ld->ld_clientid, + &ld->ld_owner); } static __be32 @@ -3876,13 +4094,21 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_lock *lock = &u->lock; struct xdr_stream *xdr = resp->xdr; + __be32 status; - if (!nfserr) - nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid); - else if (nfserr == nfserr_denied) - nfserr = nfsd4_encode_lock_denied(xdr, &lock->lk_denied); - - return nfserr; + switch (nfserr) { + case nfs_ok: + /* resok4 */ + status = nfsd4_encode_stateid4(xdr, &lock->lk_resp_stateid); + break; + case nfserr_denied: + /* denied */ + status = nfsd4_encode_lock4denied(xdr, &lock->lk_denied); + break; + default: + return nfserr; + } + return status != nfs_ok ? status : nfserr; } static __be32 @@ -3891,9 +4117,14 @@ nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_lockt *lockt = &u->lockt; struct xdr_stream *xdr = resp->xdr; + __be32 status; - if (nfserr == nfserr_denied) - nfsd4_encode_lock_denied(xdr, &lockt->lt_denied); + if (nfserr == nfserr_denied) { + /* denied */ + status = nfsd4_encode_lock4denied(xdr, &lockt->lt_denied); + if (status != nfs_ok) + return status; + } return nfserr; } @@ -3904,7 +4135,8 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku = &u->locku; struct xdr_stream *xdr = resp->xdr; - return nfsd4_encode_stateid(xdr, &locku->lu_stateid); + /* lock_stateid */ + return nfsd4_encode_stateid4(xdr, &locku->lu_stateid); } @@ -3918,104 +4150,159 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, return nfsd4_encode_change_info4(xdr, &link->li_cinfo); } +/* + * This implementation does not yet support returning an ACE in an + * OPEN that offers a delegation. + */ +static __be32 +nfsd4_encode_open_nfsace4(struct xdr_stream *xdr) +{ + __be32 status; + + /* type */ + status = nfsd4_encode_acetype4(xdr, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); + if (status != nfs_ok) + return nfserr_resource; + /* flag */ + status = nfsd4_encode_aceflag4(xdr, 0); + if (status != nfs_ok) + return nfserr_resource; + /* access mask */ + status = nfsd4_encode_acemask4(xdr, 0); + if (status != nfs_ok) + return nfserr_resource; + /* who - empty for now */ + if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT) + return nfserr_resource; + return nfs_ok; +} static __be32 -nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_open_read_delegation4(struct xdr_stream *xdr, struct nfsd4_open *open) { - struct nfsd4_open *open = &u->open; - struct xdr_stream *xdr = resp->xdr; - __be32 *p; + __be32 status; - nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); - if (nfserr) - return nfserr; - nfserr = nfsd4_encode_change_info4(xdr, &open->op_cinfo); - if (nfserr) - return nfserr; - if (xdr_stream_encode_u32(xdr, open->op_rflags) < 0) + /* stateid */ + status = nfsd4_encode_stateid4(xdr, &open->op_delegate_stateid); + if (status != nfs_ok) + return status; + /* recall */ + status = nfsd4_encode_bool(xdr, open->op_recall); + if (status != nfs_ok) + return status; + /* permissions */ + return nfsd4_encode_open_nfsace4(xdr); +} + +static __be32 +nfsd4_encode_nfs_space_limit4(struct xdr_stream *xdr, u64 filesize) +{ + /* limitby */ + if (xdr_stream_encode_u32(xdr, NFS4_LIMIT_SIZE) != XDR_UNIT) return nfserr_resource; + /* filesize */ + return nfsd4_encode_uint64_t(xdr, filesize); +} - nfserr = nfsd4_encode_bitmap(xdr, open->op_bmval[0], open->op_bmval[1], - open->op_bmval[2]); - if (nfserr) - return nfserr; +static __be32 +nfsd4_encode_open_write_delegation4(struct xdr_stream *xdr, + struct nfsd4_open *open) +{ + __be32 status; - p = xdr_reserve_space(xdr, 4); - if (!p) + /* stateid */ + status = nfsd4_encode_stateid4(xdr, &open->op_delegate_stateid); + if (status != nfs_ok) + return status; + /* recall */ + status = nfsd4_encode_bool(xdr, open->op_recall); + if (status != nfs_ok) + return status; + /* space_limit */ + status = nfsd4_encode_nfs_space_limit4(xdr, 0); + if (status != nfs_ok) + return status; + return nfsd4_encode_open_nfsace4(xdr); +} + +static __be32 +nfsd4_encode_open_none_delegation4(struct xdr_stream *xdr, + struct nfsd4_open *open) +{ + __be32 status = nfs_ok; + + /* ond_why */ + if (xdr_stream_encode_u32(xdr, open->op_why_no_deleg) != XDR_UNIT) return nfserr_resource; + switch (open->op_why_no_deleg) { + case WND4_CONTENTION: + /* ond_server_will_push_deleg */ + status = nfsd4_encode_bool(xdr, false); + break; + case WND4_RESOURCE: + /* ond_server_will_signal_avail */ + status = nfsd4_encode_bool(xdr, false); + } + return status; +} + +static __be32 +nfsd4_encode_open_delegation4(struct xdr_stream *xdr, struct nfsd4_open *open) +{ + __be32 status; - *p++ = cpu_to_be32(open->op_delegate_type); + /* delegation_type */ + if (xdr_stream_encode_u32(xdr, open->op_delegate_type) != XDR_UNIT) + return nfserr_resource; switch (open->op_delegate_type) { case NFS4_OPEN_DELEGATE_NONE: + status = nfs_ok; break; case NFS4_OPEN_DELEGATE_READ: - nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid); - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(open->op_recall); - - /* - * TODO: ACE's in delegations - */ - *p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(0); /* XXX: is NULL principal ok? */ + /* read */ + status = nfsd4_encode_open_read_delegation4(xdr, open); break; case NFS4_OPEN_DELEGATE_WRITE: - nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid); - if (nfserr) - return nfserr; - - p = xdr_reserve_space(xdr, XDR_UNIT * 8); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(open->op_recall); - - /* - * Always flush on close - * - * TODO: space_limit's in delegations - */ - *p++ = cpu_to_be32(NFS4_LIMIT_SIZE); - *p++ = xdr_zero; - *p++ = xdr_zero; - - /* - * TODO: ACE's in delegations - */ - *p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(0); /* XXX: is NULL principal ok? */ + /* write */ + status = nfsd4_encode_open_write_delegation4(xdr, open); break; - case NFS4_OPEN_DELEGATE_NONE_EXT: /* 4.1 */ - switch (open->op_why_no_deleg) { - case WND4_CONTENTION: - case WND4_RESOURCE: - p = xdr_reserve_space(xdr, 8); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(open->op_why_no_deleg); - /* deleg signaling not supported yet: */ - *p++ = cpu_to_be32(0); - break; - default: - p = xdr_reserve_space(xdr, 4); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(open->op_why_no_deleg); - } + case NFS4_OPEN_DELEGATE_NONE_EXT: + /* od_whynone */ + status = nfsd4_encode_open_none_delegation4(xdr, open); break; default: - BUG(); + status = nfserr_serverfault; } - /* XXX save filehandle here */ - return 0; + + return status; +} + +static __be32 +nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) +{ + struct nfsd4_open *open = &u->open; + struct xdr_stream *xdr = resp->xdr; + + /* stateid */ + nfserr = nfsd4_encode_stateid4(xdr, &open->op_stateid); + if (nfserr != nfs_ok) + return nfserr; + /* cinfo */ + nfserr = nfsd4_encode_change_info4(xdr, &open->op_cinfo); + if (nfserr != nfs_ok) + return nfserr; + /* rflags */ + nfserr = nfsd4_encode_uint32_t(xdr, open->op_rflags); + if (nfserr != nfs_ok) + return nfserr; + /* attrset */ + nfserr = nfsd4_encode_bitmap4(xdr, open->op_bmval[0], + open->op_bmval[1], open->op_bmval[2]); + if (nfserr != nfs_ok) + return nfserr; + /* delegation */ + return nfsd4_encode_open_delegation4(xdr, open); } static __be32 @@ -4025,7 +4312,8 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc = &u->open_confirm; struct xdr_stream *xdr = resp->xdr; - return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); + /* open_stateid */ + return nfsd4_encode_stateid4(xdr, &oc->oc_resp_stateid); } static __be32 @@ -4035,7 +4323,8 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od = &u->open_downgrade; struct xdr_stream *xdr = resp->xdr; - return nfsd4_encode_stateid(xdr, &od->od_stateid); + /* open_stateid */ + return nfsd4_encode_stateid4(xdr, &od->od_stateid); } /* @@ -4132,12 +4421,13 @@ static __be32 nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, union nfsd4_op_u *u) { + struct nfsd4_compoundargs *argp = resp->rqstp->rq_argp; struct nfsd4_read *read = &u->read; - bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); - unsigned long maxcount; struct xdr_stream *xdr = resp->xdr; - struct file *file; int starting_len = xdr->buf->len; + bool splice_ok = argp->splice_ok; + unsigned long maxcount; + struct file *file; __be32 *p; if (nfserr) @@ -4219,90 +4509,83 @@ out_err: return nfserr; } -static __be32 -nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +static __be32 nfsd4_encode_dirlist4(struct xdr_stream *xdr, + struct nfsd4_readdir *readdir, + u32 max_payload) { - struct nfsd4_readdir *readdir = &u->readdir; - int maxcount; - int bytes_left; + int bytes_left, maxcount, starting_len = xdr->buf->len; loff_t offset; - __be64 wire_offset; - struct xdr_stream *xdr = resp->xdr; - int starting_len = xdr->buf->len; - __be32 *p; - - nfserr = nfsd4_encode_verifier4(xdr, &readdir->rd_verf); - if (nfserr != nfs_ok) - return nfserr; + __be32 status; /* * Number of bytes left for directory entries allowing for the - * final 8 bytes of the readdir and a following failed op: + * final 8 bytes of the readdir and a following failed op. */ - bytes_left = xdr->buf->buflen - xdr->buf->len - - COMPOUND_ERR_SLACK_SPACE - 8; - if (bytes_left < 0) { - nfserr = nfserr_resource; - goto err_no_verf; - } - maxcount = svc_max_payload(resp->rqstp); - maxcount = min_t(u32, readdir->rd_maxcount, maxcount); + bytes_left = xdr->buf->buflen - xdr->buf->len - + COMPOUND_ERR_SLACK_SPACE - XDR_UNIT * 2; + if (bytes_left < 0) + return nfserr_resource; + maxcount = min_t(u32, readdir->rd_maxcount, max_payload); + /* - * Note the rfc defines rd_maxcount as the size of the - * READDIR4resok structure, which includes the verifier above - * and the 8 bytes encoded at the end of this function: + * The RFC defines rd_maxcount as the size of the + * READDIR4resok structure, which includes the verifier + * and the 8 bytes encoded at the end of this function. */ - if (maxcount < 16) { - nfserr = nfserr_toosmall; - goto err_no_verf; - } - maxcount = min_t(int, maxcount-16, bytes_left); + if (maxcount < XDR_UNIT * 4) + return nfserr_toosmall; + maxcount = min_t(int, maxcount - XDR_UNIT * 4, bytes_left); - /* RFC 3530 14.2.24 allows us to ignore dircount when it's 0: */ + /* RFC 3530 14.2.24 allows us to ignore dircount when it's 0 */ if (!readdir->rd_dircount) - readdir->rd_dircount = svc_max_payload(resp->rqstp); + readdir->rd_dircount = max_payload; + /* *entries */ readdir->xdr = xdr; readdir->rd_maxcount = maxcount; readdir->common.err = 0; readdir->cookie_offset = 0; - offset = readdir->rd_cookie; - nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp, - &offset, - &readdir->common, nfsd4_encode_dirent); - if (nfserr == nfs_ok && - readdir->common.err == nfserr_toosmall && - xdr->buf->len == starting_len + 8) { - /* nothing encoded; which limit did we hit?: */ - if (maxcount - 16 < bytes_left) - /* It was the fault of rd_maxcount: */ - nfserr = nfserr_toosmall; - else - /* We ran out of buffer space: */ - nfserr = nfserr_resource; + status = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp, &offset, + &readdir->common, nfsd4_encode_entry4); + if (status) + return status; + if (readdir->common.err == nfserr_toosmall && + xdr->buf->len == starting_len) { + /* No entries were encoded. Which limit did we hit? */ + if (maxcount - XDR_UNIT * 4 < bytes_left) + /* It was the fault of rd_maxcount */ + return nfserr_toosmall; + /* We ran out of buffer space */ + return nfserr_resource; } - if (nfserr) - goto err_no_verf; + /* Encode the final entry's cookie value */ + nfsd4_encode_entry4_nfs_cookie4(readdir, offset); + /* No entries follow */ + if (xdr_stream_encode_item_absent(xdr) != XDR_UNIT) + return nfserr_resource; - if (readdir->cookie_offset) { - wire_offset = cpu_to_be64(offset); - write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset, - &wire_offset, 8); - } + /* eof */ + return nfsd4_encode_bool(xdr, readdir->common.err == nfserr_eof); +} - p = xdr_reserve_space(xdr, 8); - if (!p) { - WARN_ON_ONCE(1); - goto err_no_verf; - } - *p++ = 0; /* no more entries */ - *p++ = htonl(readdir->common.err == nfserr_eof); +static __be32 +nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) +{ + struct nfsd4_readdir *readdir = &u->readdir; + struct xdr_stream *xdr = resp->xdr; + int starting_len = xdr->buf->len; - return 0; -err_no_verf: - xdr_truncate_encode(xdr, starting_len); + /* cookieverf */ + nfserr = nfsd4_encode_verifier4(xdr, &readdir->rd_verf); + if (nfserr != nfs_ok) + return nfserr; + + /* reply */ + nfserr = nfsd4_encode_dirlist4(xdr, readdir, svc_max_payload(resp->rqstp)); + if (nfserr != nfs_ok) + xdr_truncate_encode(xdr, starting_len); return nfserr; } @@ -4330,13 +4613,34 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 +nfsd4_encode_rpcsec_gss_info(struct xdr_stream *xdr, + struct rpcsec_gss_info *info) +{ + __be32 status; + + /* oid */ + if (xdr_stream_encode_opaque(xdr, info->oid.data, info->oid.len) < 0) + return nfserr_resource; + /* qop */ + status = nfsd4_encode_qop4(xdr, info->qop); + if (status != nfs_ok) + return status; + /* service */ + if (xdr_stream_encode_u32(xdr, info->service) != XDR_UNIT) + return nfserr_resource; + + return nfs_ok; +} + +static __be32 nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp) { u32 i, nflavs, supported; struct exp_flavor_info *flavs; struct exp_flavor_info def_flavs[2]; - __be32 *p, *flavorsp; static bool report = true; + __be32 *flavorsp; + __be32 status; if (exp->ex_nflavors) { flavs = exp->ex_flavors; @@ -4359,10 +4663,9 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp) } supported = 0; - p = xdr_reserve_space(xdr, 4); - if (!p) + flavorsp = xdr_reserve_space(xdr, XDR_UNIT); + if (!flavorsp) return nfserr_resource; - flavorsp = p++; /* to be backfilled later */ for (i = 0; i < nflavs; i++) { rpc_authflavor_t pf = flavs[i].pseudoflavor; @@ -4370,20 +4673,22 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp) if (rpcauth_get_gssinfo(pf, &info) == 0) { supported++; - p = xdr_reserve_space(xdr, 4 + 4 + - XDR_LEN(info.oid.len) + 4 + 4); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(RPC_AUTH_GSS); - p = xdr_encode_opaque(p, info.oid.data, info.oid.len); - *p++ = cpu_to_be32(info.qop); - *p++ = cpu_to_be32(info.service); + + /* flavor */ + status = nfsd4_encode_uint32_t(xdr, RPC_AUTH_GSS); + if (status != nfs_ok) + return status; + /* flavor_info */ + status = nfsd4_encode_rpcsec_gss_info(xdr, &info); + if (status != nfs_ok) + return status; } else if (pf < RPC_AUTH_MAXFLAVOR) { supported++; - p = xdr_reserve_space(xdr, 4); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(pf); + + /* flavor */ + status = nfsd4_encode_uint32_t(xdr, pf); + if (status != nfs_ok) + return status; } else { if (report) pr_warn("NFS: SECINFO: security flavor %u " @@ -4393,7 +4698,7 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp) if (nflavs != supported) report = false; - *flavorsp = htonl(supported); + *flavorsp = cpu_to_be32(supported); return 0; } @@ -4417,34 +4722,25 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp); } -/* - * The SETATTR encode routine is special -- it always encodes a bitmap, - * regardless of the error status. - */ static __be32 nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, union nfsd4_op_u *u) { struct nfsd4_setattr *setattr = &u->setattr; - struct xdr_stream *xdr = resp->xdr; - __be32 *p; + __be32 status; - p = xdr_reserve_space(xdr, 16); - if (!p) - return nfserr_resource; - if (nfserr) { - *p++ = cpu_to_be32(3); - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(0); - } - else { - *p++ = cpu_to_be32(3); - *p++ = cpu_to_be32(setattr->sa_bmval[0]); - *p++ = cpu_to_be32(setattr->sa_bmval[1]); - *p++ = cpu_to_be32(setattr->sa_bmval[2]); + switch (nfserr) { + case nfs_ok: + /* attrsset */ + status = nfsd4_encode_bitmap4(resp->xdr, setattr->sa_bmval[0], + setattr->sa_bmval[1], + setattr->sa_bmval[2]); + break; + default: + /* attrsset */ + status = nfsd4_encode_bitmap4(resp->xdr, 0, 0, 0); } - return nfserr; + return status != nfs_ok ? status : nfserr; } static __be32 @@ -4480,86 +4776,148 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, union nfsd4_op_u *u) { struct nfsd4_write *write = &u->write; + struct xdr_stream *xdr = resp->xdr; - if (xdr_stream_encode_u32(resp->xdr, write->wr_bytes_written) < 0) - return nfserr_resource; - if (xdr_stream_encode_u32(resp->xdr, write->wr_how_written) < 0) + /* count */ + nfserr = nfsd4_encode_count4(xdr, write->wr_bytes_written); + if (nfserr) + return nfserr; + /* committed */ + if (xdr_stream_encode_u32(xdr, write->wr_how_written) != XDR_UNIT) return nfserr_resource; - return nfsd4_encode_verifier4(resp->xdr, &write->wr_verifier); + /* writeverf */ + return nfsd4_encode_verifier4(xdr, &write->wr_verifier); } static __be32 -nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_state_protect_ops4(struct xdr_stream *xdr, + struct nfsd4_exchange_id *exid) { - struct nfsd4_exchange_id *exid = &u->exchange_id; - struct xdr_stream *xdr = resp->xdr; - __be32 *p; - char *major_id; - char *server_scope; - int major_id_sz; - int server_scope_sz; - uint64_t minor_id = 0; - struct nfsd_net *nn = net_generic(SVC_NET(resp->rqstp), nfsd_net_id); + __be32 status; - major_id = nn->nfsd_name; - major_id_sz = strlen(nn->nfsd_name); - server_scope = nn->nfsd_name; - server_scope_sz = strlen(nn->nfsd_name); + /* spo_must_enforce */ + status = nfsd4_encode_bitmap4(xdr, exid->spo_must_enforce[0], + exid->spo_must_enforce[1], + exid->spo_must_enforce[2]); + if (status != nfs_ok) + return status; + /* spo_must_allow */ + return nfsd4_encode_bitmap4(xdr, exid->spo_must_allow[0], + exid->spo_must_allow[1], + exid->spo_must_allow[2]); +} - if (nfsd4_encode_clientid4(xdr, &exid->clientid) != nfs_ok) - return nfserr_resource; - if (xdr_stream_encode_u32(xdr, exid->seqid) < 0) - return nfserr_resource; - if (xdr_stream_encode_u32(xdr, exid->flags) < 0) - return nfserr_resource; +static __be32 +nfsd4_encode_state_protect4_r(struct xdr_stream *xdr, struct nfsd4_exchange_id *exid) +{ + __be32 status; - if (xdr_stream_encode_u32(xdr, exid->spa_how) < 0) + if (xdr_stream_encode_u32(xdr, exid->spa_how) != XDR_UNIT) return nfserr_resource; switch (exid->spa_how) { case SP4_NONE: + status = nfs_ok; break; case SP4_MACH_CRED: - /* spo_must_enforce bitmap: */ - nfserr = nfsd4_encode_bitmap(xdr, - exid->spo_must_enforce[0], - exid->spo_must_enforce[1], - exid->spo_must_enforce[2]); - if (nfserr) - return nfserr; - /* spo_must_allow bitmap: */ - nfserr = nfsd4_encode_bitmap(xdr, - exid->spo_must_allow[0], - exid->spo_must_allow[1], - exid->spo_must_allow[2]); - if (nfserr) - return nfserr; + /* spr_mach_ops */ + status = nfsd4_encode_state_protect_ops4(xdr, exid); break; default: - WARN_ON_ONCE(1); + status = nfserr_serverfault; } + return status; +} - p = xdr_reserve_space(xdr, - 8 /* so_minor_id */ + - 4 /* so_major_id.len */ + - (XDR_QUADLEN(major_id_sz) * 4) + - 4 /* eir_server_scope.len */ + - (XDR_QUADLEN(server_scope_sz) * 4) + - 4 /* eir_server_impl_id.count (0) */); - if (!p) +static __be32 +nfsd4_encode_server_owner4(struct xdr_stream *xdr, struct svc_rqst *rqstp) +{ + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + __be32 status; + + /* so_minor_id */ + status = nfsd4_encode_uint64_t(xdr, 0); + if (status != nfs_ok) + return status; + /* so_major_id */ + return nfsd4_encode_opaque(xdr, nn->nfsd_name, strlen(nn->nfsd_name)); +} + +static __be32 +nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) +{ + struct nfsd_net *nn = net_generic(SVC_NET(resp->rqstp), nfsd_net_id); + struct nfsd4_exchange_id *exid = &u->exchange_id; + struct xdr_stream *xdr = resp->xdr; + + /* eir_clientid */ + nfserr = nfsd4_encode_clientid4(xdr, &exid->clientid); + if (nfserr != nfs_ok) + return nfserr; + /* eir_sequenceid */ + nfserr = nfsd4_encode_sequenceid4(xdr, exid->seqid); + if (nfserr != nfs_ok) + return nfserr; + /* eir_flags */ + nfserr = nfsd4_encode_uint32_t(xdr, exid->flags); + if (nfserr != nfs_ok) + return nfserr; + /* eir_state_protect */ + nfserr = nfsd4_encode_state_protect4_r(xdr, exid); + if (nfserr != nfs_ok) + return nfserr; + /* eir_server_owner */ + nfserr = nfsd4_encode_server_owner4(xdr, resp->rqstp); + if (nfserr != nfs_ok) + return nfserr; + /* eir_server_scope */ + nfserr = nfsd4_encode_opaque(xdr, nn->nfsd_name, + strlen(nn->nfsd_name)); + if (nfserr != nfs_ok) + return nfserr; + /* eir_server_impl_id<1> */ + if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT) return nfserr_resource; - /* The server_owner struct */ - p = xdr_encode_hyper(p, minor_id); /* Minor id */ - /* major id */ - p = xdr_encode_opaque(p, major_id, major_id_sz); + return nfs_ok; +} - /* Server scope */ - p = xdr_encode_opaque(p, server_scope, server_scope_sz); +static __be32 +nfsd4_encode_channel_attrs4(struct xdr_stream *xdr, + const struct nfsd4_channel_attrs *attrs) +{ + __be32 status; - /* Implementation id */ - *p++ = cpu_to_be32(0); /* zero length nfs_impl_id4 array */ - return 0; + /* ca_headerpadsize */ + status = nfsd4_encode_count4(xdr, 0); + if (status != nfs_ok) + return status; + /* ca_maxrequestsize */ + status = nfsd4_encode_count4(xdr, attrs->maxreq_sz); + if (status != nfs_ok) + return status; + /* ca_maxresponsesize */ + status = nfsd4_encode_count4(xdr, attrs->maxresp_sz); + if (status != nfs_ok) + return status; + /* ca_maxresponsesize_cached */ + status = nfsd4_encode_count4(xdr, attrs->maxresp_cached); + if (status != nfs_ok) + return status; + /* ca_maxoperations */ + status = nfsd4_encode_count4(xdr, attrs->maxops); + if (status != nfs_ok) + return status; + /* ca_maxrequests */ + status = nfsd4_encode_count4(xdr, attrs->maxreqs); + if (status != nfs_ok) + return status; + /* ca_rdma_ird<1> */ + if (xdr_stream_encode_u32(xdr, attrs->nr_rdma_attrs) != XDR_UNIT) + return nfserr_resource; + if (attrs->nr_rdma_attrs) + return nfsd4_encode_uint32_t(xdr, attrs->rdma_attrs); + return nfs_ok; } static __be32 @@ -4568,52 +4926,25 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_create_session *sess = &u->create_session; struct xdr_stream *xdr = resp->xdr; - __be32 *p; - - p = xdr_reserve_space(xdr, 24); - if (!p) - return nfserr_resource; - p = xdr_encode_opaque_fixed(p, sess->sessionid.data, - NFS4_MAX_SESSIONID_LEN); - *p++ = cpu_to_be32(sess->seqid); - *p++ = cpu_to_be32(sess->flags); - - p = xdr_reserve_space(xdr, 28); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(0); /* headerpadsz */ - *p++ = cpu_to_be32(sess->fore_channel.maxreq_sz); - *p++ = cpu_to_be32(sess->fore_channel.maxresp_sz); - *p++ = cpu_to_be32(sess->fore_channel.maxresp_cached); - *p++ = cpu_to_be32(sess->fore_channel.maxops); - *p++ = cpu_to_be32(sess->fore_channel.maxreqs); - *p++ = cpu_to_be32(sess->fore_channel.nr_rdma_attrs); - - if (sess->fore_channel.nr_rdma_attrs) { - p = xdr_reserve_space(xdr, 4); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(sess->fore_channel.rdma_attrs); - } - p = xdr_reserve_space(xdr, 28); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(0); /* headerpadsz */ - *p++ = cpu_to_be32(sess->back_channel.maxreq_sz); - *p++ = cpu_to_be32(sess->back_channel.maxresp_sz); - *p++ = cpu_to_be32(sess->back_channel.maxresp_cached); - *p++ = cpu_to_be32(sess->back_channel.maxops); - *p++ = cpu_to_be32(sess->back_channel.maxreqs); - *p++ = cpu_to_be32(sess->back_channel.nr_rdma_attrs); - - if (sess->back_channel.nr_rdma_attrs) { - p = xdr_reserve_space(xdr, 4); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(sess->back_channel.rdma_attrs); - } - return 0; + /* csr_sessionid */ + nfserr = nfsd4_encode_sessionid4(xdr, &sess->sessionid); + if (nfserr != nfs_ok) + return nfserr; + /* csr_sequence */ + nfserr = nfsd4_encode_sequenceid4(xdr, sess->seqid); + if (nfserr != nfs_ok) + return nfserr; + /* csr_flags */ + nfserr = nfsd4_encode_uint32_t(xdr, sess->flags); + if (nfserr != nfs_ok) + return nfserr; + /* csr_fore_chan_attrs */ + nfserr = nfsd4_encode_channel_attrs4(xdr, &sess->fore_channel); + if (nfserr != nfs_ok) + return nfserr; + /* csr_back_chan_attrs */ + return nfsd4_encode_channel_attrs4(xdr, &sess->back_channel); } static __be32 @@ -4622,22 +4953,35 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_sequence *seq = &u->sequence; struct xdr_stream *xdr = resp->xdr; - __be32 *p; - p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20); - if (!p) - return nfserr_resource; - p = xdr_encode_opaque_fixed(p, seq->sessionid.data, - NFS4_MAX_SESSIONID_LEN); - *p++ = cpu_to_be32(seq->seqid); - *p++ = cpu_to_be32(seq->slotid); + /* sr_sessionid */ + nfserr = nfsd4_encode_sessionid4(xdr, &seq->sessionid); + if (nfserr != nfs_ok) + return nfserr; + /* sr_sequenceid */ + nfserr = nfsd4_encode_sequenceid4(xdr, seq->seqid); + if (nfserr != nfs_ok) + return nfserr; + /* sr_slotid */ + nfserr = nfsd4_encode_slotid4(xdr, seq->slotid); + if (nfserr != nfs_ok) + return nfserr; /* Note slotid's are numbered from zero: */ - *p++ = cpu_to_be32(seq->maxslots - 1); /* sr_highest_slotid */ - *p++ = cpu_to_be32(seq->maxslots - 1); /* sr_target_highest_slotid */ - *p++ = cpu_to_be32(seq->status_flags); + /* sr_highest_slotid */ + nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1); + if (nfserr != nfs_ok) + return nfserr; + /* sr_target_highest_slotid */ + nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1); + if (nfserr != nfs_ok) + return nfserr; + /* sr_status_flags */ + nfserr = nfsd4_encode_uint32_t(xdr, seq->status_flags); + if (nfserr != nfs_ok) + return nfserr; resp->cstate.data_offset = xdr->buf->len; /* DRC cache data pointer */ - return 0; + return nfs_ok; } static __be32 @@ -4645,125 +4989,175 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, union nfsd4_op_u *u) { struct nfsd4_test_stateid *test_stateid = &u->test_stateid; - struct xdr_stream *xdr = resp->xdr; struct nfsd4_test_stateid_id *stateid, *next; - __be32 *p; + struct xdr_stream *xdr = resp->xdr; - p = xdr_reserve_space(xdr, 4 + (4 * test_stateid->ts_num_ids)); - if (!p) + /* tsr_status_codes<> */ + if (xdr_stream_encode_u32(xdr, test_stateid->ts_num_ids) != XDR_UNIT) return nfserr_resource; - *p++ = htonl(test_stateid->ts_num_ids); - - list_for_each_entry_safe(stateid, next, &test_stateid->ts_stateid_list, ts_id_list) { - *p++ = stateid->ts_id_status; + list_for_each_entry_safe(stateid, next, + &test_stateid->ts_stateid_list, ts_id_list) { + if (xdr_stream_encode_be32(xdr, stateid->ts_id_status) != XDR_UNIT) + return nfserr_resource; } + return nfs_ok; +} - return 0; +static __be32 +nfsd4_encode_get_dir_delegation(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) +{ + struct nfsd4_get_dir_delegation *gdd = &u->get_dir_delegation; + struct xdr_stream *xdr = resp->xdr; + __be32 status = nfserr_resource; + + switch(gdd->gddrnf_status) { + case GDD4_OK: + if (xdr_stream_encode_u32(xdr, GDD4_OK) != XDR_UNIT) + break; + status = nfsd4_encode_verifier4(xdr, &gdd->gddr_cookieverf); + if (status) + break; + status = nfsd4_encode_stateid4(xdr, &gdd->gddr_stateid); + if (status) + break; + status = nfsd4_encode_bitmap4(xdr, gdd->gddr_notification[0], 0, 0); + if (status) + break; + status = nfsd4_encode_bitmap4(xdr, gdd->gddr_child_attributes[0], + gdd->gddr_child_attributes[1], + gdd->gddr_child_attributes[2]); + if (status) + break; + status = nfsd4_encode_bitmap4(xdr, gdd->gddr_dir_attributes[0], + gdd->gddr_dir_attributes[1], + gdd->gddr_dir_attributes[2]); + break; + default: + pr_warn("nfsd: bad gddrnf_status (%u)\n", gdd->gddrnf_status); + gdd->gddrnf_will_signal_deleg_avail = 0; + fallthrough; + case GDD4_UNAVAIL: + if (xdr_stream_encode_u32(xdr, GDD4_UNAVAIL) != XDR_UNIT) + break; + status = nfsd4_encode_bool(xdr, gdd->gddrnf_will_signal_deleg_avail); + break; + } + return status; } #ifdef CONFIG_NFSD_PNFS static __be32 -nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_device_addr4(struct xdr_stream *xdr, + const struct nfsd4_getdeviceinfo *gdev) { - struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo; - struct xdr_stream *xdr = resp->xdr; + u32 needed_len, starting_len = xdr->buf->len; const struct nfsd4_layout_ops *ops; - u32 starting_len = xdr->buf->len, needed_len; - __be32 *p; + __be32 status; - p = xdr_reserve_space(xdr, 4); - if (!p) + /* da_layout_type */ + if (xdr_stream_encode_u32(xdr, gdev->gd_layout_type) != XDR_UNIT) return nfserr_resource; - - *p++ = cpu_to_be32(gdev->gd_layout_type); - + /* da_addr_body */ ops = nfsd4_layout_ops[gdev->gd_layout_type]; - nfserr = ops->encode_getdeviceinfo(xdr, gdev); - if (nfserr) { + status = ops->encode_getdeviceinfo(xdr, gdev); + if (status != nfs_ok) { /* - * We don't bother to burden the layout drivers with - * enforcing gd_maxcount, just tell the client to - * come back with a bigger buffer if it's not enough. + * Don't burden the layout drivers with enforcing + * gd_maxcount. Just tell the client to come back + * with a bigger buffer if it's not enough. */ - if (xdr->buf->len + 4 > gdev->gd_maxcount) + if (xdr->buf->len + XDR_UNIT > gdev->gd_maxcount) goto toosmall; - return nfserr; + return status; } - if (gdev->gd_notify_types) { - p = xdr_reserve_space(xdr, 4 + 4); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(1); /* bitmap length */ - *p++ = cpu_to_be32(gdev->gd_notify_types); - } else { - p = xdr_reserve_space(xdr, 4); - if (!p) - return nfserr_resource; - *p++ = 0; - } + return nfs_ok; - return 0; toosmall: - dprintk("%s: maxcount too small\n", __func__); - needed_len = xdr->buf->len + 4 /* notifications */; + needed_len = xdr->buf->len + XDR_UNIT; /* notifications */ xdr_truncate_encode(xdr, starting_len); - p = xdr_reserve_space(xdr, 4); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(needed_len); + + status = nfsd4_encode_count4(xdr, needed_len); + if (status != nfs_ok) + return status; return nfserr_toosmall; } static __be32 -nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, +nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, union nfsd4_op_u *u) { - struct nfsd4_layoutget *lgp = &u->layoutget; + struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo; struct xdr_stream *xdr = resp->xdr; - const struct nfsd4_layout_ops *ops; - __be32 *p; - p = xdr_reserve_space(xdr, 36 + sizeof(stateid_opaque_t)); - if (!p) - return nfserr_resource; - - *p++ = cpu_to_be32(1); /* we always set return-on-close */ - *p++ = cpu_to_be32(lgp->lg_sid.si_generation); - p = xdr_encode_opaque_fixed(p, &lgp->lg_sid.si_opaque, - sizeof(stateid_opaque_t)); + /* gdir_device_addr */ + nfserr = nfsd4_encode_device_addr4(xdr, gdev); + if (nfserr) + return nfserr; + /* gdir_notification */ + return nfsd4_encode_bitmap4(xdr, gdev->gd_notify_types, 0, 0); +} - *p++ = cpu_to_be32(1); /* we always return a single layout */ - p = xdr_encode_hyper(p, lgp->lg_seg.offset); - p = xdr_encode_hyper(p, lgp->lg_seg.length); - *p++ = cpu_to_be32(lgp->lg_seg.iomode); - *p++ = cpu_to_be32(lgp->lg_layout_type); +static __be32 +nfsd4_encode_layout4(struct xdr_stream *xdr, const struct nfsd4_layoutget *lgp) +{ + const struct nfsd4_layout_ops *ops = nfsd4_layout_ops[lgp->lg_layout_type]; + __be32 status; - ops = nfsd4_layout_ops[lgp->lg_layout_type]; + /* lo_offset */ + status = nfsd4_encode_offset4(xdr, lgp->lg_seg.offset); + if (status != nfs_ok) + return status; + /* lo_length */ + status = nfsd4_encode_length4(xdr, lgp->lg_seg.length); + if (status != nfs_ok) + return status; + /* lo_iomode */ + if (xdr_stream_encode_u32(xdr, lgp->lg_seg.iomode) != XDR_UNIT) + return nfserr_resource; + /* lo_content */ + if (xdr_stream_encode_u32(xdr, lgp->lg_layout_type) != XDR_UNIT) + return nfserr_resource; return ops->encode_layoutget(xdr, lgp); } static __be32 +nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) +{ + struct nfsd4_layoutget *lgp = &u->layoutget; + struct xdr_stream *xdr = resp->xdr; + + /* logr_return_on_close */ + nfserr = nfsd4_encode_bool(xdr, true); + if (nfserr != nfs_ok) + return nfserr; + /* logr_stateid */ + nfserr = nfsd4_encode_stateid4(xdr, &lgp->lg_sid); + if (nfserr != nfs_ok) + return nfserr; + /* logr_layout<> */ + if (xdr_stream_encode_u32(xdr, 1) != XDR_UNIT) + return nfserr_resource; + return nfsd4_encode_layout4(xdr, lgp); +} + +static __be32 nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, union nfsd4_op_u *u) { struct nfsd4_layoutcommit *lcp = &u->layoutcommit; struct xdr_stream *xdr = resp->xdr; - __be32 *p; - p = xdr_reserve_space(xdr, 4); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(lcp->lc_size_chg); - if (lcp->lc_size_chg) { - p = xdr_reserve_space(xdr, 8); - if (!p) - return nfserr_resource; - p = xdr_encode_hyper(p, lcp->lc_newsize); - } - - return 0; + /* ns_sizechanged */ + nfserr = nfsd4_encode_bool(xdr, lcp->lc_size_chg); + if (nfserr != nfs_ok) + return nfserr; + if (lcp->lc_size_chg) + /* ns_size */ + return nfsd4_encode_length4(xdr, lcp->lc_newsize); + return nfs_ok; } static __be32 @@ -4772,103 +5166,108 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_layoutreturn *lrp = &u->layoutreturn; struct xdr_stream *xdr = resp->xdr; - __be32 *p; - p = xdr_reserve_space(xdr, 4); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(lrp->lrs_present); + /* lrs_present */ + nfserr = nfsd4_encode_bool(xdr, lrp->lrs_present); + if (nfserr != nfs_ok) + return nfserr; if (lrp->lrs_present) - return nfsd4_encode_stateid(xdr, &lrp->lr_sid); - return 0; + /* lrs_stateid */ + return nfsd4_encode_stateid4(xdr, &lrp->lr_sid); + return nfs_ok; } #endif /* CONFIG_NFSD_PNFS */ static __be32 -nfsd42_encode_write_res(struct nfsd4_compoundres *resp, - struct nfsd42_write_res *write, bool sync) +nfsd4_encode_write_response4(struct xdr_stream *xdr, + const struct nfsd4_copy *copy) { - __be32 *p; - p = xdr_reserve_space(resp->xdr, 4); - if (!p) - return nfserr_resource; + const struct nfsd42_write_res *write = ©->cp_res; + u32 count = nfsd4_copy_is_sync(copy) ? 0 : 1; + __be32 status; - if (sync) - *p++ = cpu_to_be32(0); - else { - __be32 nfserr; - *p++ = cpu_to_be32(1); - nfserr = nfsd4_encode_stateid(resp->xdr, &write->cb_stateid); - if (nfserr) - return nfserr; + /* wr_callback_id<1> */ + if (xdr_stream_encode_u32(xdr, count) != XDR_UNIT) + return nfserr_resource; + if (count) { + status = nfsd4_encode_stateid4(xdr, &write->cb_stateid); + if (status != nfs_ok) + return status; } - p = xdr_reserve_space(resp->xdr, 8 + 4 + NFS4_VERIFIER_SIZE); - if (!p) + + /* wr_count */ + status = nfsd4_encode_length4(xdr, write->wr_bytes_written); + if (status != nfs_ok) + return status; + /* wr_committed */ + if (xdr_stream_encode_u32(xdr, write->wr_stable_how) != XDR_UNIT) return nfserr_resource; + /* wr_writeverf */ + return nfsd4_encode_verifier4(xdr, &write->wr_verifier); +} - p = xdr_encode_hyper(p, write->wr_bytes_written); - *p++ = cpu_to_be32(write->wr_stable_how); - p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, - NFS4_VERIFIER_SIZE); - return nfs_ok; +static __be32 nfsd4_encode_copy_requirements4(struct xdr_stream *xdr, + const struct nfsd4_copy *copy) +{ + __be32 status; + + /* cr_consecutive */ + status = nfsd4_encode_bool(xdr, true); + if (status != nfs_ok) + return status; + /* cr_synchronous */ + return nfsd4_encode_bool(xdr, nfsd4_copy_is_sync(copy)); } static __be32 -nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns) +nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { - struct xdr_stream *xdr = resp->xdr; - struct nfs42_netaddr *addr; - __be32 *p; + struct nfsd4_copy *copy = &u->copy; - p = xdr_reserve_space(xdr, 4); - *p++ = cpu_to_be32(ns->nl4_type); + nfserr = nfsd4_encode_write_response4(resp->xdr, copy); + if (nfserr != nfs_ok) + return nfserr; + return nfsd4_encode_copy_requirements4(resp->xdr, copy); +} +static __be32 +nfsd4_encode_netloc4(struct xdr_stream *xdr, const struct nl4_server *ns) +{ + __be32 status; + + if (xdr_stream_encode_u32(xdr, ns->nl4_type) != XDR_UNIT) + return nfserr_resource; switch (ns->nl4_type) { case NL4_NETADDR: - addr = &ns->u.nl4_addr; - - /* netid_len, netid, uaddr_len, uaddr (port included - * in RPCBIND_MAXUADDRLEN) - */ - p = xdr_reserve_space(xdr, - 4 /* netid len */ + - (XDR_QUADLEN(addr->netid_len) * 4) + - 4 /* uaddr len */ + - (XDR_QUADLEN(addr->addr_len) * 4)); - if (!p) - return nfserr_resource; - - *p++ = cpu_to_be32(addr->netid_len); - p = xdr_encode_opaque_fixed(p, addr->netid, - addr->netid_len); - *p++ = cpu_to_be32(addr->addr_len); - p = xdr_encode_opaque_fixed(p, addr->addr, - addr->addr_len); + /* nl_addr */ + status = nfsd4_encode_netaddr4(xdr, &ns->u.nl4_addr); break; default: - WARN_ON_ONCE(ns->nl4_type != NL4_NETADDR); - return nfserr_inval; + status = nfserr_serverfault; } - - return 0; + return status; } static __be32 -nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { - struct nfsd4_copy *copy = &u->copy; - __be32 *p; + struct nfsd4_copy_notify *cn = &u->copy_notify; + struct xdr_stream *xdr = resp->xdr; - nfserr = nfsd42_encode_write_res(resp, ©->cp_res, - nfsd4_copy_is_sync(copy)); + /* cnr_lease_time */ + nfserr = nfsd4_encode_nfstime4(xdr, &cn->cpn_lease_time); if (nfserr) return nfserr; - - p = xdr_reserve_space(resp->xdr, 4 + 4); - *p++ = xdr_one; /* cr_consecutive */ - *p = nfsd4_copy_is_sync(copy) ? xdr_one : xdr_zero; - return 0; + /* cnr_stateid */ + nfserr = nfsd4_encode_stateid4(xdr, &cn->cpn_cnr_stateid); + if (nfserr) + return nfserr; + /* cnr_source_server<> */ + if (xdr_stream_encode_u32(xdr, 1) != XDR_UNIT) + return nfserr_resource; + return nfsd4_encode_netloc4(xdr, cn->cpn_src); } static __be32 @@ -4877,23 +5276,30 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_offload_status *os = &u->offload_status; struct xdr_stream *xdr = resp->xdr; - __be32 *p; - p = xdr_reserve_space(xdr, 8 + 4); - if (!p) + /* osr_count */ + nfserr = nfsd4_encode_length4(xdr, os->count); + if (nfserr != nfs_ok) + return nfserr; + /* osr_complete<1> */ + if (os->completed) { + if (xdr_stream_encode_u32(xdr, 1) != XDR_UNIT) + return nfserr_resource; + if (xdr_stream_encode_be32(xdr, os->status) != XDR_UNIT) + return nfserr_resource; + } else if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT) return nfserr_resource; - p = xdr_encode_hyper(p, os->count); - *p++ = cpu_to_be32(0); - return nfserr; + return nfs_ok; } static __be32 nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, struct nfsd4_read *read) { - bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); + struct nfsd4_compoundargs *argp = resp->rqstp->rq_argp; struct file *file = read->rd_nf->nf_file; struct xdr_stream *xdr = resp->xdr; + bool splice_ok = argp->splice_ok; unsigned long maxcount; __be32 nfserr, *p; @@ -4962,53 +5368,18 @@ out: } static __be32 -nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) -{ - struct nfsd4_copy_notify *cn = &u->copy_notify; - struct xdr_stream *xdr = resp->xdr; - __be32 *p; - - if (nfserr) - return nfserr; - - /* 8 sec, 4 nsec */ - p = xdr_reserve_space(xdr, 12); - if (!p) - return nfserr_resource; - - /* cnr_lease_time */ - p = xdr_encode_hyper(p, cn->cpn_sec); - *p++ = cpu_to_be32(cn->cpn_nsec); - - /* cnr_stateid */ - nfserr = nfsd4_encode_stateid(xdr, &cn->cpn_cnr_stateid); - if (nfserr) - return nfserr; - - /* cnr_src.nl_nsvr */ - p = xdr_reserve_space(xdr, 4); - if (!p) - return nfserr_resource; - - *p++ = cpu_to_be32(1); - - nfserr = nfsd42_encode_nl4_server(resp, cn->cpn_src); - return nfserr; -} - -static __be32 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, union nfsd4_op_u *u) { struct nfsd4_seek *seek = &u->seek; - __be32 *p; - - p = xdr_reserve_space(resp->xdr, 4 + 8); - *p++ = cpu_to_be32(seek->seek_eof); - p = xdr_encode_hyper(p, seek->seek_pos); + struct xdr_stream *xdr = resp->xdr; - return 0; + /* sr_eof */ + nfserr = nfsd4_encode_bool(xdr, seek->seek_eof); + if (nfserr != nfs_ok) + return nfserr; + /* sr_offset */ + return nfsd4_encode_offset4(xdr, seek->seek_pos); } static __be32 @@ -5108,16 +5479,11 @@ nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs, /* * If the cookie is larger than the maximum number we can fit - * in either the buffer we just got back from vfs_listxattr, or, - * XDR-encoded, in the return buffer, it's invalid. + * in the buffer we just got back from vfs_listxattr, it's invalid. */ if (cookie > (listxattrs->lsxa_len) / (XATTR_USER_PREFIX_LEN + 2)) return nfserr_badcookie; - if (cookie > (listxattrs->lsxa_maxcount / - (XDR_QUADLEN(XATTR_USER_PREFIX_LEN + 2) + 4))) - return nfserr_badcookie; - *offsetp = (u32)cookie; return 0; } @@ -5134,6 +5500,7 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr, u64 cookie; char *sp; __be32 status, tmp; + __be64 wire_cookie; __be32 *p; u32 nuser; @@ -5149,7 +5516,7 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr, */ cookie_offset = xdr->buf->len; count_offset = cookie_offset + 8; - p = xdr_reserve_space(xdr, 12); + p = xdr_reserve_space(xdr, XDR_UNIT * 3); if (!p) { status = nfserr_resource; goto out; @@ -5160,7 +5527,8 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr, sp = listxattrs->lsxa_buf; nuser = 0; - xdrleft = listxattrs->lsxa_maxcount; + /* Bytes left is maxcount - 8 (cookie) - 4 (array count) */ + xdrleft = listxattrs->lsxa_maxcount - XDR_UNIT * 3; while (left > 0 && xdrleft > 0) { slen = strlen(sp); @@ -5173,7 +5541,8 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr, slen -= XATTR_USER_PREFIX_LEN; xdrlen = 4 + ((slen + 3) & ~3); - if (xdrlen > xdrleft) { + /* Check if both entry and eof can fit in the XDR buffer */ + if (xdrlen + XDR_UNIT > xdrleft) { if (count == 0) { /* * Can't even fit the first attribute name. @@ -5225,7 +5594,8 @@ wreof: cookie = offset + count; - write_bytes_to_xdr_buf(xdr->buf, cookie_offset, &cookie, 8); + wire_cookie = cpu_to_be64(cookie); + write_bytes_to_xdr_buf(xdr->buf, cookie_offset, &wire_cookie, 8); tmp = cpu_to_be32(count); write_bytes_to_xdr_buf(xdr->buf, count_offset, &tmp, 4); out: @@ -5297,7 +5667,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = { [OP_CREATE_SESSION] = nfsd4_encode_create_session, [OP_DESTROY_SESSION] = nfsd4_encode_noop, [OP_FREE_STATEID] = nfsd4_encode_noop, - [OP_GET_DIR_DELEGATION] = nfsd4_encode_noop, + [OP_GET_DIR_DELEGATION] = nfsd4_encode_get_dir_delegation, #ifdef CONFIG_NFSD_PNFS [OP_GETDEVICEINFO] = nfsd4_encode_getdeviceinfo, [OP_GETDEVICELIST] = nfsd4_encode_noop, @@ -5370,6 +5740,23 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize) return nfserr_rep_too_big; } +static __be32 nfsd4_map_status(__be32 status, u32 minor) +{ + switch (status) { + case nfs_ok: + break; + case nfserr_wrong_type: + /* RFC 8881 - 15.1.2.9 */ + if (minor == 0) + status = nfserr_inval; + break; + case nfserr_symlink_not_dir: + status = nfserr_symlink; + break; + } + return status; +} + void nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { @@ -5377,15 +5764,14 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) struct nfs4_stateowner *so = resp->cstate.replay_owner; struct svc_rqst *rqstp = resp->rqstp; const struct nfsd4_operation *opdesc = op->opdesc; - int post_err_offset; + unsigned int op_status_offset; nfsd4_enc encoder; - __be32 *p; - p = xdr_reserve_space(xdr, 8); - if (!p) + if (xdr_stream_encode_u32(xdr, op->opnum) != XDR_UNIT) + goto release; + op_status_offset = xdr->buf->len; + if (!xdr_reserve_space(xdr, XDR_UNIT)) goto release; - *p++ = cpu_to_be32(op->opnum); - post_err_offset = xdr->buf->len; if (op->opnum == OP_ILLEGAL) goto status; @@ -5426,18 +5812,21 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) * bug if we had to do this on a non-idempotent op: */ warn_on_nonidempotent_op(op); - xdr_truncate_encode(xdr, post_err_offset); + xdr_truncate_encode(xdr, op_status_offset + XDR_UNIT); } if (so) { - int len = xdr->buf->len - post_err_offset; + int len = xdr->buf->len - (op_status_offset + XDR_UNIT); so->so_replay.rp_status = op->status; so->so_replay.rp_buflen = len; - read_bytes_from_xdr_buf(xdr->buf, post_err_offset, + read_bytes_from_xdr_buf(xdr->buf, op_status_offset + XDR_UNIT, so->so_replay.rp_buf, len); } status: - *p = op->status; + op->status = nfsd4_map_status(op->status, + resp->cstate.minorversion); + write_bytes_to_xdr_buf(xdr->buf, op_status_offset, + &op->status, XDR_UNIT); release: if (opdesc && opdesc->op_release) opdesc->op_release(&op->u); @@ -5449,27 +5838,24 @@ release: rqstp->rq_next_page = xdr->page_ptr + 1; } -/* - * Encode the reply stored in the stateowner reply cache - * - * XDR note: do not encode rp->rp_buflen: the buffer contains the - * previously sent already encoded operation. +/** + * nfsd4_encode_replay - encode a result stored in the stateowner reply cache + * @xdr: send buffer's XDR stream + * @op: operation being replayed + * + * @op->replay->rp_buf contains the previously-sent already-encoded result. */ -void -nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) +void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) { - __be32 *p; struct nfs4_replay *rp = op->replay; - p = xdr_reserve_space(xdr, 8 + rp->rp_buflen); - if (!p) { - WARN_ON_ONCE(1); - return; - } - *p++ = cpu_to_be32(op->opnum); - *p++ = rp->rp_status; /* already xdr'ed */ + trace_nfsd_stateowner_replay(op->opnum, rp); - p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen); + if (xdr_stream_encode_u32(xdr, op->opnum) != XDR_UNIT) + return; + if (xdr_stream_encode_be32(xdr, rp->rp_status) != XDR_UNIT) + return; + xdr_stream_encode_opaque_fixed(xdr, rp->rp_buf, rp->rp_buflen); } void nfsd4_release_compoundargs(struct svc_rqst *rqstp) diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index c52132ecb339..ba9d326b3de6 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -166,8 +166,7 @@ nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp, int nfsd_drc_slab_create(void) { - drc_slab = kmem_cache_create("nfsd_drc", - sizeof(struct nfsd_cacherep), 0, 0, NULL); + drc_slab = KMEM_CACHE(nfsd_cacherep, 0); return drc_slab ? 0: -ENOMEM; } @@ -180,26 +179,29 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) { unsigned int hashsize; unsigned int i; - int status = 0; nn->max_drc_entries = nfsd_cache_size_limit(); atomic_set(&nn->num_drc_entries, 0); hashsize = nfsd_hashsize(nn->max_drc_entries); nn->maskbits = ilog2(hashsize); - nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan; - nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count; - nn->nfsd_reply_cache_shrinker.seeks = 1; - status = register_shrinker(&nn->nfsd_reply_cache_shrinker, - "nfsd-reply:%s", nn->nfsd_name); - if (status) - return status; - nn->drc_hashtbl = kvzalloc(array_size(hashsize, sizeof(*nn->drc_hashtbl)), GFP_KERNEL); if (!nn->drc_hashtbl) + return -ENOMEM; + + nn->nfsd_reply_cache_shrinker = shrinker_alloc(0, "nfsd-reply:%s", + nn->nfsd_name); + if (!nn->nfsd_reply_cache_shrinker) goto out_shrinker; + nn->nfsd_reply_cache_shrinker->scan_objects = nfsd_reply_cache_scan; + nn->nfsd_reply_cache_shrinker->count_objects = nfsd_reply_cache_count; + nn->nfsd_reply_cache_shrinker->seeks = 1; + nn->nfsd_reply_cache_shrinker->private_data = nn; + + shrinker_register(nn->nfsd_reply_cache_shrinker); + for (i = 0; i < hashsize; i++) { INIT_LIST_HEAD(&nn->drc_hashtbl[i].lru_head); spin_lock_init(&nn->drc_hashtbl[i].cache_lock); @@ -208,7 +210,7 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) return 0; out_shrinker: - unregister_shrinker(&nn->nfsd_reply_cache_shrinker); + kvfree(nn->drc_hashtbl); printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); return -ENOMEM; } @@ -218,7 +220,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn) struct nfsd_cacherep *rp; unsigned int i; - unregister_shrinker(&nn->nfsd_reply_cache_shrinker); + shrinker_free(nn->nfsd_reply_cache_shrinker); for (i = 0; i < nn->drc_hashsize; i++) { struct list_head *head = &nn->drc_hashtbl[i].lru_head; @@ -302,8 +304,7 @@ nfsd_prune_bucket_locked(struct nfsd_net *nn, struct nfsd_drc_bucket *b, static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) { - struct nfsd_net *nn = container_of(shrink, - struct nfsd_net, nfsd_reply_cache_shrinker); + struct nfsd_net *nn = shrink->private_data; return atomic_read(&nn->num_drc_entries); } @@ -322,8 +323,7 @@ nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct nfsd_net *nn = container_of(shrink, - struct nfsd_net, nfsd_reply_cache_shrinker); + struct nfsd_net *nn = shrink->private_data; unsigned long freed = 0; LIST_HEAD(dispose); unsigned int i; @@ -342,8 +342,6 @@ nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) if (freed > sc->nr_to_scan) break; } - - trace_nfsd_drc_gc(nn, freed); return freed; } @@ -486,7 +484,6 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, __wsum csum; struct nfsd_drc_bucket *b; int type = rqstp->rq_cachetype; - unsigned long freed; LIST_HEAD(dispose); int rtn = RC_DOIT; @@ -515,8 +512,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, nfsd_prune_bucket_locked(nn, b, 3, &dispose); spin_unlock(&b->cache_lock); - freed = nfsd_cacherep_dispose(&dispose); - trace_nfsd_drc_gc(nn, freed); + nfsd_cacherep_dispose(&dispose); nfsd_stats_rc_misses_inc(nn); atomic_inc(&nn->num_drc_entries); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 887035b74467..dcaa31706394 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -15,8 +15,10 @@ #include <linux/sunrpc/addr.h> #include <linux/sunrpc/gss_api.h> #include <linux/sunrpc/rpc_pipe_fs.h> +#include <linux/sunrpc/svc.h> #include <linux/module.h> #include <linux/fsnotify.h> +#include <linux/nfslocalio.h> #include "idmap.h" #include "nfsd.h" @@ -26,6 +28,7 @@ #include "pnfs.h" #include "filecache.h" #include "trace.h" +#include "netlink.h" /* * We have a single directory with several nodes in it. @@ -47,16 +50,10 @@ enum { NFSD_MaxBlkSize, NFSD_MaxConnections, NFSD_Filecache, - /* - * The below MUST come last. Otherwise we leave a hole in nfsd_files[] - * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops - */ -#ifdef CONFIG_NFSD_V4 NFSD_Leasetime, NFSD_Gracetime, NFSD_RecoveryDir, NFSD_V4EndGrace, -#endif NFSD_MaxReserved }; @@ -75,7 +72,9 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size); #ifdef CONFIG_NFSD_V4 static ssize_t write_leasetime(struct file *file, char *buf, size_t size); static ssize_t write_gracetime(struct file *file, char *buf, size_t size); +#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); +#endif static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size); #endif @@ -92,7 +91,9 @@ static ssize_t (*const write_op[])(struct file *, char *, size_t) = { #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = write_leasetime, [NFSD_Gracetime] = write_gracetime, +#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING [NFSD_RecoveryDir] = write_recoverydir, +#endif [NFSD_V4EndGrace] = write_v4_end_grace, #endif }; @@ -174,11 +175,18 @@ static int export_features_show(struct seq_file *m, void *v) DEFINE_SHOW_ATTRIBUTE(export_features); +static int nfsd_pool_stats_open(struct inode *inode, struct file *file) +{ + struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id); + + return svc_pool_stats_open(&nn->nfsd_info, file); +} + static const struct file_operations pool_stats_operations = { .open = nfsd_pool_stats_open, .read = seq_read, .llseek = seq_lseek, - .release = nfsd_pool_stats_release, + .release = seq_release, }; DEFINE_SHOW_ATTRIBUTE(nfsd_reply_cache_stats); @@ -280,6 +288,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size) * 3. Is that directory the root of an exported file system? */ error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb); + nfsd4_revoke_states(netns(file), path.dentry->d_sb); path_put(&path); return error; @@ -404,7 +413,9 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) if (newthreads < 0) return -EINVAL; trace_nfsd_ctl_threads(net, newthreads); - rv = nfsd_svc(newthreads, net, file->f_cred); + mutex_lock(&nfsd_mutex); + rv = nfsd_svc(1, &newthreads, net, file->f_cred, NULL); + mutex_unlock(&nfsd_mutex); if (rv < 0) return rv; } else @@ -478,6 +489,14 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) goto out_free; trace_nfsd_ctl_pool_threads(net, i, nthreads[i]); } + + /* + * There must always be a thread in pool 0; the admin + * can't shut down NFS completely using pool_threads. + */ + if (nthreads[0] == 0) + nthreads[0] = 1; + rv = nfsd_set_nrthreads(i, nthreads, net); if (rv) goto out_free; @@ -706,12 +725,9 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred serv = nn->nfsd_serv; err = svc_addsock(serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred); - if (err < 0 && !serv->sv_nrthreads && !nn->keep_active) - nfsd_last_thread(net); - else if (err >= 0 && !serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) - svc_get(serv); + if (!serv->sv_nrthreads && list_empty(&nn->nfsd_serv->sv_permsocks)) + nfsd_destroy_serv(net); - svc_put(serv); return err; } @@ -749,10 +765,6 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr if (err < 0 && err != -EAFNOSUPPORT) goto out_close; - if (!serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) - svc_get(serv); - - svc_put(serv); return 0; out_close: xprt = svc_find_xprt(serv, transport, net, PF_INET, port); @@ -761,10 +773,9 @@ out_close: svc_xprt_put(xprt); } out_err: - if (!serv->sv_nrthreads && !nn->keep_active) - nfsd_last_thread(net); + if (!serv->sv_nrthreads && list_empty(&nn->nfsd_serv->sv_permsocks)) + nfsd_destroy_serv(net); - svc_put(serv); return err; } @@ -1020,6 +1031,7 @@ static ssize_t write_gracetime(struct file *file, char *buf, size_t size) return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace, nn); } +#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size, struct nfsd_net *nn) { @@ -1080,6 +1092,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) mutex_unlock(&nfsd_mutex); return rv; } +#endif /* * write_v4_end_grace - release grace period for nfsd's v4.x lock manager @@ -1140,7 +1153,7 @@ static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode) /* Following advice from simple_fill_super documentation: */ inode->i_ino = iunique(sb, NFSD_MaxReserved); inode->i_mode = mode; - inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); + simple_inode_init_ts(inode); switch (mode & S_IFMT) { case S_IFDIR: inode->i_fop = &simple_dir_operations; @@ -1243,63 +1256,34 @@ static inline void _nfsd_symlink(struct dentry *parent, const char *name, #endif -static void clear_ncl(struct inode *inode) +static void clear_ncl(struct dentry *dentry) { + struct inode *inode = d_inode(dentry); struct nfsdfs_client *ncl = inode->i_private; + spin_lock(&inode->i_lock); inode->i_private = NULL; + spin_unlock(&inode->i_lock); kref_put(&ncl->cl_ref, ncl->cl_release); } -static struct nfsdfs_client *__get_nfsdfs_client(struct inode *inode) -{ - struct nfsdfs_client *nc = inode->i_private; - - if (nc) - kref_get(&nc->cl_ref); - return nc; -} - struct nfsdfs_client *get_nfsdfs_client(struct inode *inode) { struct nfsdfs_client *nc; - inode_lock_shared(inode); - nc = __get_nfsdfs_client(inode); - inode_unlock_shared(inode); + spin_lock(&inode->i_lock); + nc = inode->i_private; + if (nc) + kref_get(&nc->cl_ref); + spin_unlock(&inode->i_lock); return nc; } -/* from __rpc_unlink */ -static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry) -{ - int ret; - - clear_ncl(d_inode(dentry)); - dget(dentry); - ret = simple_unlink(dir, dentry); - d_drop(dentry); - fsnotify_unlink(dir, dentry); - dput(dentry); - WARN_ON_ONCE(ret); -} - -static void nfsdfs_remove_files(struct dentry *root) -{ - struct dentry *dentry, *tmp; - - list_for_each_entry_safe(dentry, tmp, &root->d_subdirs, d_child) { - if (!simple_positive(dentry)) { - WARN_ON_ONCE(1); /* I think this can't happen? */ - continue; - } - nfsdfs_remove_file(d_inode(root), dentry); - } -} /* XXX: cut'n'paste from simple_fill_super; figure out if we could share * code instead. */ static int nfsdfs_create_files(struct dentry *root, const struct tree_descr *files, + struct nfsdfs_client *ncl, struct dentry **fdentries) { struct inode *dir = d_inode(root); @@ -1318,8 +1302,9 @@ static int nfsdfs_create_files(struct dentry *root, dput(dentry); goto out; } + kref_get(&ncl->cl_ref); inode->i_fop = files->ops; - inode->i_private = __get_nfsdfs_client(dir); + inode->i_private = ncl; d_add(dentry, inode); fsnotify_create(dir, dentry); if (fdentries) @@ -1328,7 +1313,6 @@ static int nfsdfs_create_files(struct dentry *root, inode_unlock(dir); return 0; out: - nfsdfs_remove_files(root); inode_unlock(dir); return -ENOMEM; } @@ -1348,7 +1332,7 @@ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, dentry = nfsd_mkdir(nn->nfsd_client_dir, ncl, name); if (IS_ERR(dentry)) /* XXX: tossing errors? */ return NULL; - ret = nfsdfs_create_files(dentry, files, fdentries); + ret = nfsdfs_create_files(dentry, files, ncl, fdentries); if (ret) { nfsd_client_rmdir(dentry); return NULL; @@ -1359,20 +1343,7 @@ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, /* Taken from __rpc_rmdir: */ void nfsd_client_rmdir(struct dentry *dentry) { - struct inode *dir = d_inode(dentry->d_parent); - struct inode *inode = d_inode(dentry); - int ret; - - inode_lock(dir); - nfsdfs_remove_files(dentry); - clear_ncl(inode); - dget(dentry); - ret = simple_rmdir(dir, dentry); - WARN_ON_ONCE(ret); - d_drop(dentry); - fsnotify_rmdir(dir, dentry); - dput(dentry); - inode_unlock(dir); + simple_recursive_removal(dentry, clear_ncl); } static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) @@ -1406,7 +1377,9 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, +#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, +#endif [NFSD_V4EndGrace] = {"v4_end_grace", &transaction_ops, S_IWUSR|S_IRUGO}, #endif /* last one */ {""} @@ -1503,6 +1476,747 @@ static int create_proc_exports_entry(void) unsigned int nfsd_net_id; +static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb, + struct netlink_callback *cb, + struct nfsd_genl_rqstp *rqstp) +{ + void *hdr; + u32 i; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &nfsd_nl_family, 0, NFSD_CMD_RPC_STATUS_GET); + if (!hdr) + return -ENOBUFS; + + if (nla_put_be32(skb, NFSD_A_RPC_STATUS_XID, rqstp->rq_xid) || + nla_put_u32(skb, NFSD_A_RPC_STATUS_FLAGS, rqstp->rq_flags) || + nla_put_u32(skb, NFSD_A_RPC_STATUS_PROG, rqstp->rq_prog) || + nla_put_u32(skb, NFSD_A_RPC_STATUS_PROC, rqstp->rq_proc) || + nla_put_u8(skb, NFSD_A_RPC_STATUS_VERSION, rqstp->rq_vers) || + nla_put_s64(skb, NFSD_A_RPC_STATUS_SERVICE_TIME, + ktime_to_us(rqstp->rq_stime), + NFSD_A_RPC_STATUS_PAD)) + return -ENOBUFS; + + switch (rqstp->rq_saddr.sa_family) { + case AF_INET: { + const struct sockaddr_in *s_in, *d_in; + + s_in = (const struct sockaddr_in *)&rqstp->rq_saddr; + d_in = (const struct sockaddr_in *)&rqstp->rq_daddr; + if (nla_put_in_addr(skb, NFSD_A_RPC_STATUS_SADDR4, + s_in->sin_addr.s_addr) || + nla_put_in_addr(skb, NFSD_A_RPC_STATUS_DADDR4, + d_in->sin_addr.s_addr) || + nla_put_be16(skb, NFSD_A_RPC_STATUS_SPORT, + s_in->sin_port) || + nla_put_be16(skb, NFSD_A_RPC_STATUS_DPORT, + d_in->sin_port)) + return -ENOBUFS; + break; + } + case AF_INET6: { + const struct sockaddr_in6 *s_in, *d_in; + + s_in = (const struct sockaddr_in6 *)&rqstp->rq_saddr; + d_in = (const struct sockaddr_in6 *)&rqstp->rq_daddr; + if (nla_put_in6_addr(skb, NFSD_A_RPC_STATUS_SADDR6, + &s_in->sin6_addr) || + nla_put_in6_addr(skb, NFSD_A_RPC_STATUS_DADDR6, + &d_in->sin6_addr) || + nla_put_be16(skb, NFSD_A_RPC_STATUS_SPORT, + s_in->sin6_port) || + nla_put_be16(skb, NFSD_A_RPC_STATUS_DPORT, + d_in->sin6_port)) + return -ENOBUFS; + break; + } + } + + for (i = 0; i < rqstp->rq_opcnt; i++) + if (nla_put_u32(skb, NFSD_A_RPC_STATUS_COMPOUND_OPS, + rqstp->rq_opnum[i])) + return -ENOBUFS; + + genlmsg_end(skb, hdr); + return 0; +} + +/** + * nfsd_nl_rpc_status_get_dumpit - Handle rpc_status_get dumpit + * @skb: reply buffer + * @cb: netlink metadata and command arguments + * + * Returns the size of the reply or a negative errno. + */ +int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + int i, ret, rqstp_index = 0; + struct nfsd_net *nn; + + mutex_lock(&nfsd_mutex); + + nn = net_generic(sock_net(skb->sk), nfsd_net_id); + if (!nn->nfsd_serv) { + ret = -ENODEV; + goto out_unlock; + } + + rcu_read_lock(); + + for (i = 0; i < nn->nfsd_serv->sv_nrpools; i++) { + struct svc_rqst *rqstp; + + if (i < cb->args[0]) /* already consumed */ + continue; + + rqstp_index = 0; + list_for_each_entry_rcu(rqstp, + &nn->nfsd_serv->sv_pools[i].sp_all_threads, + rq_all) { + struct nfsd_genl_rqstp genl_rqstp; + unsigned int status_counter; + + if (rqstp_index++ < cb->args[1]) /* already consumed */ + continue; + /* + * Acquire rq_status_counter before parsing the rqst + * fields. rq_status_counter is set to an odd value in + * order to notify the consumers the rqstp fields are + * meaningful. + */ + status_counter = + smp_load_acquire(&rqstp->rq_status_counter); + if (!(status_counter & 1)) + continue; + + genl_rqstp.rq_xid = rqstp->rq_xid; + genl_rqstp.rq_flags = rqstp->rq_flags; + genl_rqstp.rq_vers = rqstp->rq_vers; + genl_rqstp.rq_prog = rqstp->rq_prog; + genl_rqstp.rq_proc = rqstp->rq_proc; + genl_rqstp.rq_stime = rqstp->rq_stime; + genl_rqstp.rq_opcnt = 0; + memcpy(&genl_rqstp.rq_daddr, svc_daddr(rqstp), + sizeof(struct sockaddr)); + memcpy(&genl_rqstp.rq_saddr, svc_addr(rqstp), + sizeof(struct sockaddr)); + +#ifdef CONFIG_NFSD_V4 + if (rqstp->rq_vers == NFS4_VERSION && + rqstp->rq_proc == NFSPROC4_COMPOUND) { + /* NFSv4 compound */ + struct nfsd4_compoundargs *args; + int j; + + args = rqstp->rq_argp; + genl_rqstp.rq_opcnt = args->opcnt; + for (j = 0; j < genl_rqstp.rq_opcnt; j++) + genl_rqstp.rq_opnum[j] = + args->ops[j].opnum; + } +#endif /* CONFIG_NFSD_V4 */ + + /* + * Acquire rq_status_counter before reporting the rqst + * fields to the user. + */ + if (smp_load_acquire(&rqstp->rq_status_counter) != + status_counter) + continue; + + ret = nfsd_genl_rpc_status_compose_msg(skb, cb, + &genl_rqstp); + if (ret) + goto out; + } + } + + cb->args[0] = i; + cb->args[1] = rqstp_index; + ret = skb->len; +out: + rcu_read_unlock(); +out_unlock: + mutex_unlock(&nfsd_mutex); + + return ret; +} + +/** + * nfsd_nl_threads_set_doit - set the number of running threads + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Return 0 on success or a negative errno. + */ +int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info) +{ + int *nthreads, nrpools = 0, i, ret = -EOPNOTSUPP, rem; + struct net *net = genl_info_net(info); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + const struct nlattr *attr; + const char *scope = NULL; + + if (GENL_REQ_ATTR_CHECK(info, NFSD_A_SERVER_THREADS)) + return -EINVAL; + + /* count number of SERVER_THREADS values */ + nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { + if (nla_type(attr) == NFSD_A_SERVER_THREADS) + nrpools++; + } + + mutex_lock(&nfsd_mutex); + + nthreads = kcalloc(nrpools, sizeof(int), GFP_KERNEL); + if (!nthreads) { + ret = -ENOMEM; + goto out_unlock; + } + + i = 0; + nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { + if (nla_type(attr) == NFSD_A_SERVER_THREADS) { + nthreads[i++] = nla_get_u32(attr); + if (i >= nrpools) + break; + } + } + + if (info->attrs[NFSD_A_SERVER_GRACETIME] || + info->attrs[NFSD_A_SERVER_LEASETIME] || + info->attrs[NFSD_A_SERVER_SCOPE]) { + ret = -EBUSY; + if (nn->nfsd_serv && nn->nfsd_serv->sv_nrthreads) + goto out_unlock; + + ret = -EINVAL; + attr = info->attrs[NFSD_A_SERVER_GRACETIME]; + if (attr) { + u32 gracetime = nla_get_u32(attr); + + if (gracetime < 10 || gracetime > 3600) + goto out_unlock; + + nn->nfsd4_grace = gracetime; + } + + attr = info->attrs[NFSD_A_SERVER_LEASETIME]; + if (attr) { + u32 leasetime = nla_get_u32(attr); + + if (leasetime < 10 || leasetime > 3600) + goto out_unlock; + + nn->nfsd4_lease = leasetime; + } + + attr = info->attrs[NFSD_A_SERVER_SCOPE]; + if (attr) + scope = nla_data(attr); + } + + ret = nfsd_svc(nrpools, nthreads, net, get_current_cred(), scope); + if (ret > 0) + ret = 0; +out_unlock: + mutex_unlock(&nfsd_mutex); + kfree(nthreads); + return ret; +} + +/** + * nfsd_nl_threads_get_doit - get the number of running threads + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Return 0 on success or a negative errno. + */ +int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + void *hdr; + int err; + + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + hdr = genlmsg_iput(skb, info); + if (!hdr) { + err = -EMSGSIZE; + goto err_free_msg; + } + + mutex_lock(&nfsd_mutex); + + err = nla_put_u32(skb, NFSD_A_SERVER_GRACETIME, + nn->nfsd4_grace) || + nla_put_u32(skb, NFSD_A_SERVER_LEASETIME, + nn->nfsd4_lease) || + nla_put_string(skb, NFSD_A_SERVER_SCOPE, + nn->nfsd_name); + if (err) + goto err_unlock; + + if (nn->nfsd_serv) { + int i; + + for (i = 0; i < nfsd_nrpools(net); ++i) { + struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i]; + + err = nla_put_u32(skb, NFSD_A_SERVER_THREADS, + sp->sp_nrthreads); + if (err) + goto err_unlock; + } + } else { + err = nla_put_u32(skb, NFSD_A_SERVER_THREADS, 0); + if (err) + goto err_unlock; + } + + mutex_unlock(&nfsd_mutex); + + genlmsg_end(skb, hdr); + + return genlmsg_reply(skb, info); + +err_unlock: + mutex_unlock(&nfsd_mutex); +err_free_msg: + nlmsg_free(skb); + + return err; +} + +/** + * nfsd_nl_version_set_doit - set the nfs enabled versions + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Return 0 on success or a negative errno. + */ +int nfsd_nl_version_set_doit(struct sk_buff *skb, struct genl_info *info) +{ + const struct nlattr *attr; + struct nfsd_net *nn; + int i, rem; + + if (GENL_REQ_ATTR_CHECK(info, NFSD_A_SERVER_PROTO_VERSION)) + return -EINVAL; + + mutex_lock(&nfsd_mutex); + + nn = net_generic(genl_info_net(info), nfsd_net_id); + if (nn->nfsd_serv) { + mutex_unlock(&nfsd_mutex); + return -EBUSY; + } + + /* clear current supported versions. */ + nfsd_vers(nn, 2, NFSD_CLEAR); + nfsd_vers(nn, 3, NFSD_CLEAR); + for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) + nfsd_minorversion(nn, i, NFSD_CLEAR); + + nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { + struct nlattr *tb[NFSD_A_VERSION_MAX + 1]; + u32 major, minor = 0; + bool enabled; + + if (nla_type(attr) != NFSD_A_SERVER_PROTO_VERSION) + continue; + + if (nla_parse_nested(tb, NFSD_A_VERSION_MAX, attr, + nfsd_version_nl_policy, info->extack) < 0) + continue; + + if (!tb[NFSD_A_VERSION_MAJOR]) + continue; + + major = nla_get_u32(tb[NFSD_A_VERSION_MAJOR]); + if (tb[NFSD_A_VERSION_MINOR]) + minor = nla_get_u32(tb[NFSD_A_VERSION_MINOR]); + + enabled = nla_get_flag(tb[NFSD_A_VERSION_ENABLED]); + + switch (major) { + case 4: + nfsd_minorversion(nn, minor, enabled ? NFSD_SET : NFSD_CLEAR); + break; + case 3: + case 2: + if (!minor) + nfsd_vers(nn, major, enabled ? NFSD_SET : NFSD_CLEAR); + break; + default: + break; + } + } + + mutex_unlock(&nfsd_mutex); + + return 0; +} + +/** + * nfsd_nl_version_get_doit - get the enabled status for all supported nfs versions + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Return 0 on success or a negative errno. + */ +int nfsd_nl_version_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct nfsd_net *nn; + int i, err; + void *hdr; + + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + hdr = genlmsg_iput(skb, info); + if (!hdr) { + err = -EMSGSIZE; + goto err_free_msg; + } + + mutex_lock(&nfsd_mutex); + nn = net_generic(genl_info_net(info), nfsd_net_id); + + for (i = 2; i <= 4; i++) { + int j; + + for (j = 0; j <= NFSD_SUPPORTED_MINOR_VERSION; j++) { + struct nlattr *attr; + + /* Don't record any versions the kernel doesn't have + * compiled in + */ + if (!nfsd_support_version(i)) + continue; + + /* NFSv{2,3} does not support minor numbers */ + if (i < 4 && j) + continue; + + attr = nla_nest_start(skb, + NFSD_A_SERVER_PROTO_VERSION); + if (!attr) { + err = -EINVAL; + goto err_nfsd_unlock; + } + + if (nla_put_u32(skb, NFSD_A_VERSION_MAJOR, i) || + nla_put_u32(skb, NFSD_A_VERSION_MINOR, j)) { + err = -EINVAL; + goto err_nfsd_unlock; + } + + /* Set the enabled flag if the version is enabled */ + if (nfsd_vers(nn, i, NFSD_TEST) && + (i < 4 || nfsd_minorversion(nn, j, NFSD_TEST)) && + nla_put_flag(skb, NFSD_A_VERSION_ENABLED)) { + err = -EINVAL; + goto err_nfsd_unlock; + } + + nla_nest_end(skb, attr); + } + } + + mutex_unlock(&nfsd_mutex); + genlmsg_end(skb, hdr); + + return genlmsg_reply(skb, info); + +err_nfsd_unlock: + mutex_unlock(&nfsd_mutex); +err_free_msg: + nlmsg_free(skb); + + return err; +} + +/** + * nfsd_nl_listener_set_doit - set the nfs running sockets + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Return 0 on success or a negative errno. + */ +int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct svc_xprt *xprt, *tmp; + const struct nlattr *attr; + struct svc_serv *serv; + LIST_HEAD(permsocks); + struct nfsd_net *nn; + bool delete = false; + int err, rem; + + mutex_lock(&nfsd_mutex); + + err = nfsd_create_serv(net); + if (err) { + mutex_unlock(&nfsd_mutex); + return err; + } + + nn = net_generic(net, nfsd_net_id); + serv = nn->nfsd_serv; + + spin_lock_bh(&serv->sv_lock); + + /* Move all of the old listener sockets to a temp list */ + list_splice_init(&serv->sv_permsocks, &permsocks); + + /* + * Walk the list of server_socks from userland and move any that match + * back to sv_permsocks + */ + nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { + struct nlattr *tb[NFSD_A_SOCK_MAX + 1]; + const char *xcl_name; + struct sockaddr *sa; + + if (nla_type(attr) != NFSD_A_SERVER_SOCK_ADDR) + continue; + + if (nla_parse_nested(tb, NFSD_A_SOCK_MAX, attr, + nfsd_sock_nl_policy, info->extack) < 0) + continue; + + if (!tb[NFSD_A_SOCK_ADDR] || !tb[NFSD_A_SOCK_TRANSPORT_NAME]) + continue; + + if (nla_len(tb[NFSD_A_SOCK_ADDR]) < sizeof(*sa)) + continue; + + xcl_name = nla_data(tb[NFSD_A_SOCK_TRANSPORT_NAME]); + sa = nla_data(tb[NFSD_A_SOCK_ADDR]); + + /* Put back any matching sockets */ + list_for_each_entry_safe(xprt, tmp, &permsocks, xpt_list) { + /* This shouldn't be possible */ + if (WARN_ON_ONCE(xprt->xpt_net != net)) { + list_move(&xprt->xpt_list, &serv->sv_permsocks); + continue; + } + + /* If everything matches, put it back */ + if (!strcmp(xprt->xpt_class->xcl_name, xcl_name) && + rpc_cmp_addr_port(sa, (struct sockaddr *)&xprt->xpt_local)) { + list_move(&xprt->xpt_list, &serv->sv_permsocks); + break; + } + } + } + + /* + * If there are listener transports remaining on the permsocks list, + * it means we were asked to remove a listener. + */ + if (!list_empty(&permsocks)) { + list_splice_init(&permsocks, &serv->sv_permsocks); + delete = true; + } + spin_unlock_bh(&serv->sv_lock); + + /* Do not remove listeners while there are active threads. */ + if (serv->sv_nrthreads) { + err = -EBUSY; + goto out_unlock_mtx; + } + + /* + * Since we can't delete an arbitrary llist entry, destroy the + * remaining listeners and recreate the list. + */ + if (delete) + svc_xprt_destroy_all(serv, net); + + /* walk list of addrs again, open any that still don't exist */ + nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { + struct nlattr *tb[NFSD_A_SOCK_MAX + 1]; + const char *xcl_name; + struct sockaddr *sa; + int ret; + + if (nla_type(attr) != NFSD_A_SERVER_SOCK_ADDR) + continue; + + if (nla_parse_nested(tb, NFSD_A_SOCK_MAX, attr, + nfsd_sock_nl_policy, info->extack) < 0) + continue; + + if (!tb[NFSD_A_SOCK_ADDR] || !tb[NFSD_A_SOCK_TRANSPORT_NAME]) + continue; + + if (nla_len(tb[NFSD_A_SOCK_ADDR]) < sizeof(*sa)) + continue; + + xcl_name = nla_data(tb[NFSD_A_SOCK_TRANSPORT_NAME]); + sa = nla_data(tb[NFSD_A_SOCK_ADDR]); + + xprt = svc_find_listener(serv, xcl_name, net, sa); + if (xprt) { + if (delete) + WARN_ONCE(1, "Transport type=%s already exists\n", + xcl_name); + svc_xprt_put(xprt); + continue; + } + + ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, 0, + get_current_cred()); + /* always save the latest error */ + if (ret < 0) + err = ret; + } + + if (!serv->sv_nrthreads && list_empty(&nn->nfsd_serv->sv_permsocks)) + nfsd_destroy_serv(net); + +out_unlock_mtx: + mutex_unlock(&nfsd_mutex); + + return err; +} + +/** + * nfsd_nl_listener_get_doit - get the nfs running listeners + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Return 0 on success or a negative errno. + */ +int nfsd_nl_listener_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct svc_xprt *xprt; + struct svc_serv *serv; + struct nfsd_net *nn; + void *hdr; + int err; + + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + hdr = genlmsg_iput(skb, info); + if (!hdr) { + err = -EMSGSIZE; + goto err_free_msg; + } + + mutex_lock(&nfsd_mutex); + nn = net_generic(genl_info_net(info), nfsd_net_id); + + /* no nfs server? Just send empty socket list */ + if (!nn->nfsd_serv) + goto out_unlock_mtx; + + serv = nn->nfsd_serv; + spin_lock_bh(&serv->sv_lock); + list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { + struct nlattr *attr; + + attr = nla_nest_start(skb, NFSD_A_SERVER_SOCK_ADDR); + if (!attr) { + err = -EINVAL; + goto err_serv_unlock; + } + + if (nla_put_string(skb, NFSD_A_SOCK_TRANSPORT_NAME, + xprt->xpt_class->xcl_name) || + nla_put(skb, NFSD_A_SOCK_ADDR, + sizeof(struct sockaddr_storage), + &xprt->xpt_local)) { + err = -EINVAL; + goto err_serv_unlock; + } + + nla_nest_end(skb, attr); + } + spin_unlock_bh(&serv->sv_lock); +out_unlock_mtx: + mutex_unlock(&nfsd_mutex); + genlmsg_end(skb, hdr); + + return genlmsg_reply(skb, info); + +err_serv_unlock: + spin_unlock_bh(&serv->sv_lock); + mutex_unlock(&nfsd_mutex); +err_free_msg: + nlmsg_free(skb); + + return err; +} + +/** + * nfsd_nl_pool_mode_set_doit - set the number of running threads + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Return 0 on success or a negative errno. + */ +int nfsd_nl_pool_mode_set_doit(struct sk_buff *skb, struct genl_info *info) +{ + const struct nlattr *attr; + + if (GENL_REQ_ATTR_CHECK(info, NFSD_A_POOL_MODE_MODE)) + return -EINVAL; + + attr = info->attrs[NFSD_A_POOL_MODE_MODE]; + return sunrpc_set_pool_mode(nla_data(attr)); +} + +/** + * nfsd_nl_pool_mode_get_doit - get info about pool_mode + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Return 0 on success or a negative errno. + */ +int nfsd_nl_pool_mode_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + char buf[16]; + void *hdr; + int err; + + if (sunrpc_get_pool_mode(buf, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf)) + return -ERANGE; + + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + err = -EMSGSIZE; + hdr = genlmsg_iput(skb, info); + if (!hdr) + goto err_free_msg; + + err = nla_put_string(skb, NFSD_A_POOL_MODE_MODE, buf) | + nla_put_u32(skb, NFSD_A_POOL_MODE_NPOOLS, nfsd_nrpools(net)); + if (err) + goto err_free_msg; + + genlmsg_end(skb, hdr); + return genlmsg_reply(skb, info); + +err_free_msg: + nlmsg_free(skb); + return err; +} + /** * nfsd_net_init - Prepare the nfsd_net portion of a new net namespace * @net: a freshly-created network namespace @@ -1515,8 +2229,9 @@ unsigned int nfsd_net_id; */ static __net_init int nfsd_net_init(struct net *net) { - int retval; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int retval; + int i; retval = nfsd_export_init(net); if (retval) @@ -1524,20 +2239,34 @@ static __net_init int nfsd_net_init(struct net *net) retval = nfsd_idmap_init(net); if (retval) goto out_idmap_error; - retval = nfsd_stat_counters_init(nn); + retval = percpu_counter_init_many(nn->counter, 0, GFP_KERNEL, + NFSD_STATS_COUNTERS_NUM); if (retval) goto out_repcache_error; + memset(&nn->nfsd_svcstats, 0, sizeof(nn->nfsd_svcstats)); - nn->nfsd_svcstats.program = &nfsd_program; - nn->nfsd_versions = NULL; - nn->nfsd4_minorversions = NULL; + nn->nfsd_svcstats.program = &nfsd_programs[0]; + if (!nfsd_proc_stat_init(net)) { + retval = -ENOMEM; + goto out_proc_error; + } + + for (i = 0; i < sizeof(nn->nfsd_versions); i++) + nn->nfsd_versions[i] = nfsd_support_version(i); + for (i = 0; i < sizeof(nn->nfsd4_minorversions); i++) + nn->nfsd4_minorversions[i] = nfsd_support_version(4); + nn->nfsd_info.mutex = &nfsd_mutex; + nn->nfsd_serv = NULL; nfsd4_init_leases_net(nn); get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); seqlock_init(&nn->writeverf_lock); - nfsd_proc_stat_init(net); - +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + INIT_LIST_HEAD(&nn->local_clients); +#endif return 0; +out_proc_error: + percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM); out_repcache_error: nfsd_idmap_shutdown(net); out_idmap_error: @@ -1546,6 +2275,22 @@ out_export_error: return retval; } +#if IS_ENABLED(CONFIG_NFS_LOCALIO) +/** + * nfsd_net_pre_exit - Disconnect localio clients from net namespace + * @net: a network namespace that is about to be destroyed + * + * This invalidated ->net pointers held by localio clients + * while they can still safely access nn->counter. + */ +static __net_exit void nfsd_net_pre_exit(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + nfs_uuid_invalidate_clients(&nn->local_clients); +} +#endif + /** * nfsd_net_exit - Release the nfsd_net portion of a net namespace * @net: a network namespace that is about to be destroyed @@ -1556,14 +2301,16 @@ static __net_exit void nfsd_net_exit(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); nfsd_proc_stat_shutdown(net); - nfsd_stat_counters_destroy(nn); + percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM); nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); - nfsd_netns_free_versions(nn); } static struct pernet_operations nfsd_net_ops = { .init = nfsd_net_init, +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + .pre_exit = nfsd_net_pre_exit, +#endif .exit = nfsd_net_exit, .id = &nfsd_net_id, .size = sizeof(struct nfsd_net), @@ -1583,12 +2330,9 @@ static int __init init_nfsd(void) if (retval) goto out_free_pnfs; nfsd_lockd_init(); /* lockd->nfsd callbacks */ - retval = create_proc_exports_entry(); - if (retval) - goto out_free_lockd; retval = register_pernet_subsys(&nfsd_net_ops); if (retval < 0) - goto out_free_exports; + goto out_free_lockd; retval = register_cld_notifier(); if (retval) goto out_free_subsys; @@ -1597,17 +2341,26 @@ static int __init init_nfsd(void) goto out_free_cld; retval = register_filesystem(&nfsd_fs_type); if (retval) + goto out_free_nfsd4; + retval = genl_register_family(&nfsd_nl_family); + if (retval) + goto out_free_filesystem; + retval = create_proc_exports_entry(); + if (retval) goto out_free_all; + nfsd_localio_ops_init(); + return 0; out_free_all: + genl_unregister_family(&nfsd_nl_family); +out_free_filesystem: + unregister_filesystem(&nfsd_fs_type); +out_free_nfsd4: nfsd4_destroy_laundry_wq(); out_free_cld: unregister_cld_notifier(); out_free_subsys: unregister_pernet_subsys(&nfsd_net_ops); -out_free_exports: - remove_proc_entry("fs/nfs/exports", NULL); - remove_proc_entry("fs/nfs", NULL); out_free_lockd: nfsd_lockd_shutdown(); nfsd_drc_slab_free(); @@ -1620,13 +2373,14 @@ out_free_slabs: static void __exit exit_nfsd(void) { + remove_proc_entry("fs/nfs/exports", NULL); + remove_proc_entry("fs/nfs", NULL); + genl_unregister_family(&nfsd_nl_family); unregister_filesystem(&nfsd_fs_type); nfsd4_destroy_laundry_wq(); unregister_cld_notifier(); unregister_pernet_subsys(&nfsd_net_ops); nfsd_drc_slab_free(); - remove_proc_entry("fs/nfs/exports", NULL); - remove_proc_entry("fs/nfs", NULL); nfsd_lockd_shutdown(); nfsd4_free_slabs(); nfsd4_exit_pnfs(); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index d05bd2b811f3..4b56ba1e8e48 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -23,9 +23,7 @@ #include <uapi/linux/nfsd/debug.h> -#include "netns.h" #include "export.h" -#include "stats.h" #undef ifdebug #ifdef CONFIG_SUNRPC_DEBUG @@ -37,7 +35,14 @@ /* * nfsd version */ +#define NFSD_MINVERS 2 +#define NFSD_MAXVERS 4 #define NFSD_SUPPORTED_MINOR_VERSION 2 +bool nfsd_support_version(int vers); + +#include "netns.h" +#include "stats.h" + /* * Maximum blocksizes supported by daemon under various circumstances. */ @@ -62,8 +67,25 @@ struct readdir_cd { __be32 err; /* 0, nfserr, or nfserr_eof */ }; +/* Maximum number of operations per session compound */ +#define NFSD_MAX_OPS_PER_COMPOUND 50 + +struct nfsd_genl_rqstp { + struct sockaddr rq_daddr; + struct sockaddr rq_saddr; + unsigned long rq_flags; + ktime_t rq_stime; + __be32 rq_xid; + u32 rq_vers; + u32 rq_prog; + u32 rq_proc; + + /* NFSv4 compound */ + u32 rq_opcnt; + u32 rq_opnum[NFSD_MAX_OPS_PER_COMPOUND]; +}; -extern struct svc_program nfsd_program; +extern struct svc_program nfsd_programs[]; extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; extern struct mutex nfsd_mutex; extern spinlock_t nfsd_drc_lock; @@ -86,18 +108,17 @@ bool nfssvc_encode_voidres(struct svc_rqst *rqstp, /* * Function prototypes. */ -int nfsd_svc(int nrservs, struct net *net, const struct cred *cred); +int nfsd_svc(int n, int *nservers, struct net *net, + const struct cred *cred, const char *scope); int nfsd_dispatch(struct svc_rqst *rqstp); int nfsd_nrthreads(struct net *); int nfsd_nrpools(struct net *); int nfsd_get_nrthreads(int n, int *, struct net *); int nfsd_set_nrthreads(int n, int *, struct net *); -int nfsd_pool_stats_open(struct inode *, struct file *); -int nfsd_pool_stats_release(struct inode *, struct file *); void nfsd_shutdown_threads(struct net *net); -bool i_am_nfsd(void); +struct svc_rqst *nfsd_current_rqst(void); struct nfsdfs_client { struct kref cl_ref; @@ -125,6 +146,10 @@ extern const struct svc_version nfsd_acl_version3; #endif #endif +#if IS_ENABLED(CONFIG_NFS_LOCALIO) +extern const struct svc_version localio_version1; +#endif + struct nfsd_net; enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL }; @@ -132,13 +157,13 @@ int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change); int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change); void nfsd_reset_versions(struct nfsd_net *nn); int nfsd_create_serv(struct net *net); -void nfsd_last_thread(struct net *net); +void nfsd_destroy_serv(struct net *net); extern int nfsd_max_blksize; static inline int nfsd_v4client(struct svc_rqst *rq) { - return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4; + return rq && rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4; } static inline struct user_namespace * nfsd_user_namespace(const struct svc_rqst *rqstp) @@ -213,7 +238,6 @@ void nfsd_lockd_shutdown(void); #define nfserr_nospc cpu_to_be32(NFSERR_NOSPC) #define nfserr_rofs cpu_to_be32(NFSERR_ROFS) #define nfserr_mlink cpu_to_be32(NFSERR_MLINK) -#define nfserr_opnotsupp cpu_to_be32(NFSERR_OPNOTSUPP) #define nfserr_nametoolong cpu_to_be32(NFSERR_NAMETOOLONG) #define nfserr_notempty cpu_to_be32(NFSERR_NOTEMPTY) #define nfserr_dquot cpu_to_be32(NFSERR_DQUOT) @@ -258,6 +282,7 @@ void nfsd_lockd_shutdown(void); #define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE) #define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD) #define nfserr_badname cpu_to_be32(NFSERR_BADNAME) +#define nfserr_admin_revoked cpu_to_be32(NFS4ERR_ADMIN_REVOKED) #define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN) #define nfserr_locked cpu_to_be32(NFSERR_LOCKED) #define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC) @@ -309,17 +334,36 @@ void nfsd_lockd_shutdown(void); #define nfserr_xattr2big cpu_to_be32(NFS4ERR_XATTR2BIG) #define nfserr_noxattr cpu_to_be32(NFS4ERR_NOXATTR) -/* error codes for internal use */ +/* + * Error codes for internal use. We use enum to choose numbers that are + * not already assigned, then covert to be32 resulting in a number that + * cannot conflict with any existing be32 nfserr value. + */ +enum { + NFSERR_DROPIT = NFS4ERR_FIRST_FREE, /* if a request fails due to kmalloc failure, it gets dropped. * Client should resend eventually */ -#define nfserr_dropit cpu_to_be32(30000) +#define nfserr_dropit cpu_to_be32(NFSERR_DROPIT) + /* end-of-file indicator in readdir */ -#define nfserr_eof cpu_to_be32(30001) + NFSERR_EOF, +#define nfserr_eof cpu_to_be32(NFSERR_EOF) + /* replay detected */ -#define nfserr_replay_me cpu_to_be32(11001) + NFSERR_REPLAY_ME, +#define nfserr_replay_me cpu_to_be32(NFSERR_REPLAY_ME) + /* nfs41 replay detected */ -#define nfserr_replay_cache cpu_to_be32(11002) + NFSERR_REPLAY_CACHE, +#define nfserr_replay_cache cpu_to_be32(NFSERR_REPLAY_CACHE) + +/* symlink found where dir expected - handled differently to + * other symlink found errors by NFSv3. + */ + NFSERR_SYMLINK_NOT_DIR, +#define nfserr_symlink_not_dir cpu_to_be32(NFSERR_SYMLINK_NOT_DIR) +}; /* Check for dir entries '.' and '..' */ #define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.')) @@ -349,6 +393,7 @@ void nfsd_lockd_shutdown(void); #define NFSD_CLIENT_MAX_TRIM_PER_RUN 128 #define NFS4_CLIENTS_PER_GB 1024 #define NFSD_DELEGRETURN_TIMEOUT (HZ / 34) /* 30ms */ +#define NFSD_CB_GETATTR_TIMEOUT NFSD_DELEGRETURN_TIMEOUT /* * The following attributes are currently not supported by the NFSv4 server: diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index c2495d98c189..96e19c50a5d7 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -62,8 +62,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry) * the write call). */ static inline __be32 -nfsd_mode_check(struct svc_rqst *rqstp, struct dentry *dentry, - umode_t requested) +nfsd_mode_check(struct dentry *dentry, umode_t requested) { umode_t mode = d_inode(dentry)->i_mode & S_IFMT; @@ -76,36 +75,36 @@ nfsd_mode_check(struct svc_rqst *rqstp, struct dentry *dentry, } return nfs_ok; } - /* - * v4 has an error more specific than err_notdir which we should - * return in preference to err_notdir: - */ - if (rqstp->rq_vers == 4 && mode == S_IFLNK) + if (mode == S_IFLNK) { + if (requested == S_IFDIR) + return nfserr_symlink_not_dir; return nfserr_symlink; + } if (requested == S_IFDIR) return nfserr_notdir; if (mode == S_IFDIR) return nfserr_isdir; - return nfserr_inval; + return nfserr_wrong_type; } -static bool nfsd_originating_port_ok(struct svc_rqst *rqstp, int flags) +static bool nfsd_originating_port_ok(struct svc_rqst *rqstp, + struct svc_cred *cred, + struct svc_export *exp) { - if (flags & NFSEXP_INSECURE_PORT) + if (nfsexp_flags(cred, exp) & NFSEXP_INSECURE_PORT) return true; /* We don't require gss requests to use low ports: */ - if (rqstp->rq_cred.cr_flavor >= RPC_AUTH_GSS) + if (cred->cr_flavor >= RPC_AUTH_GSS) return true; return test_bit(RQ_SECURE, &rqstp->rq_flags); } static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, + struct svc_cred *cred, struct svc_export *exp) { - int flags = nfsexp_flags(rqstp, exp); - /* Check if the request originated from a secure port. */ - if (!nfsd_originating_port_ok(rqstp, flags)) { + if (rqstp && !nfsd_originating_port_ok(rqstp, cred, exp)) { RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); dprintk("nfsd: request from insecure port %s!\n", svc_print_addr(rqstp, buf, sizeof(buf))); @@ -113,23 +112,15 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, } /* Set user creds for this exportpoint */ - return nfserrno(nfsd_setuser(rqstp, exp)); + return nfserrno(nfsd_setuser(cred, exp)); } -static inline __be32 check_pseudo_root(struct svc_rqst *rqstp, - struct dentry *dentry, struct svc_export *exp) +static inline __be32 check_pseudo_root(struct dentry *dentry, + struct svc_export *exp) { if (!(exp->ex_flags & NFSEXP_V4ROOT)) return nfs_ok; /* - * v2/v3 clients have no need for the V4ROOT export--they use - * the mount protocl instead; also, further V4ROOT checks may be - * in v4-specific code, in which case v2/v3 clients could bypass - * them. - */ - if (!nfsd_v4client(rqstp)) - return nfserr_stale; - /* * We're exposing only the directories and symlinks that have to be * traversed on the way to real exports: */ @@ -151,7 +142,11 @@ static inline __be32 check_pseudo_root(struct svc_rqst *rqstp, * dentry. On success, the results are used to set fh_export and * fh_dentry. */ -static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) +static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net, + struct svc_cred *cred, + struct auth_domain *client, + struct auth_domain *gssclient, + struct svc_fh *fhp) { struct knfsd_fh *fh = &fhp->fh_handle; struct fid *fid = NULL; @@ -162,10 +157,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) int len; __be32 error; - error = nfserr_stale; - if (rqstp->rq_vers > 2) - error = nfserr_badhandle; - if (rqstp->rq_vers == 4 && fh->fh_size == 0) + error = nfserr_badhandle; + if (fh->fh_size == 0) return nfserr_nofilehandle; if (fh->fh_version != 1) @@ -195,7 +188,9 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) data_left -= len; if (data_left < 0) return error; - exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid); + exp = rqst_exp_find(rqstp ? &rqstp->rq_chandle : NULL, + net, client, gssclient, + fh->fh_fsid_type, fh->fh_fsid); fid = (struct fid *)(fh->fh_fsid + len); error = nfserr_stale; @@ -229,7 +224,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) put_cred(override_creds(new)); put_cred(new); } else { - error = nfsd_setuser_and_check_port(rqstp, exp); + error = nfsd_setuser_and_check_port(rqstp, cred, exp); if (error) goto out; } @@ -237,9 +232,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) /* * Look up the dentry using the NFS file handle. */ - error = nfserr_stale; - if (rqstp->rq_vers > 2) - error = nfserr_badhandle; + error = nfserr_badhandle; fileid_type = fh->fh_fileid_type; @@ -247,7 +240,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) dentry = dget(exp->ex_path.dentry); else { dentry = exportfs_decode_fh_raw(exp->ex_path.mnt, fid, - data_left, fileid_type, + data_left, fileid_type, 0, nfsd_acceptable, exp); if (IS_ERR_OR_NULL(dentry)) { trace_nfsd_set_fh_dentry_badhandle(rqstp, fhp, @@ -278,17 +271,25 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) fhp->fh_dentry = dentry; fhp->fh_export = exp; - switch (rqstp->rq_vers) { - case 4: + switch (fhp->fh_maxsize) { + case NFS4_FHSIZE: if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOATOMIC_ATTR) fhp->fh_no_atomic_attr = true; + fhp->fh_64bit_cookies = true; break; - case 3: + case NFS3_FHSIZE: if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOWCC) fhp->fh_no_wcc = true; + fhp->fh_64bit_cookies = true; + if (exp->ex_flags & NFSEXP_V4ROOT) + goto out; break; - case 2: + case NFS_FHSIZE: fhp->fh_no_wcc = true; + if (EX_WGATHER(exp)) + fhp->fh_use_wgather = true; + if (exp->ex_flags & NFSEXP_V4ROOT) + goto out; } return 0; @@ -298,42 +299,33 @@ out: } /** - * fh_verify - filehandle lookup and access checking - * @rqstp: pointer to current rpc request + * __fh_verify - filehandle lookup and access checking + * @rqstp: RPC transaction context, or NULL + * @net: net namespace in which to perform the export lookup + * @cred: RPC user credential + * @client: RPC auth domain + * @gssclient: RPC GSS auth domain, or NULL * @fhp: filehandle to be verified * @type: expected type of object pointed to by filehandle * @access: type of access needed to object * - * Look up a dentry from the on-the-wire filehandle, check the client's - * access to the export, and set the current task's credentials. - * - * Regardless of success or failure of fh_verify(), fh_put() should be - * called on @fhp when the caller is finished with the filehandle. - * - * fh_verify() may be called multiple times on a given filehandle, for - * example, when processing an NFSv4 compound. The first call will look - * up a dentry using the on-the-wire filehandle. Subsequent calls will - * skip the lookup and just perform the other checks and possibly change - * the current task's credentials. - * - * @type specifies the type of object expected using one of the S_IF* - * constants defined in include/linux/stat.h. The caller may use zero - * to indicate that it doesn't care, or a negative integer to indicate - * that it expects something not of the given type. - * - * @access is formed from the NFSD_MAY_* constants defined in - * fs/nfsd/vfs.h. + * See fh_verify() for further descriptions of @fhp, @type, and @access. */ -__be32 -fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) +static __be32 +__fh_verify(struct svc_rqst *rqstp, + struct net *net, struct svc_cred *cred, + struct auth_domain *client, + struct auth_domain *gssclient, + struct svc_fh *fhp, umode_t type, int access) { - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_export *exp = NULL; struct dentry *dentry; __be32 error; if (!fhp->fh_dentry) { - error = nfsd_set_fh_dentry(rqstp, fhp); + error = nfsd_set_fh_dentry(rqstp, net, cred, client, + gssclient, fhp); if (error) goto out; } @@ -358,15 +350,15 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) * (for example, if different id-squashing options are in * effect on the new filesystem). */ - error = check_pseudo_root(rqstp, dentry, exp); + error = check_pseudo_root(dentry, exp); if (error) goto out; - error = nfsd_setuser_and_check_port(rqstp, exp); + error = nfsd_setuser_and_check_port(rqstp, cred, exp); if (error) goto out; - error = nfsd_mode_check(rqstp, dentry, type); + error = nfsd_mode_check(dentry, type); if (error) goto out; @@ -392,7 +384,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) skip_pseudoflavor_check: /* Finally, check access permissions. */ - error = nfsd_permission(rqstp, exp, dentry, access); + error = nfsd_permission(cred, exp, dentry, access); out: trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error); if (error == nfserr_stale) @@ -400,6 +392,63 @@ out: return error; } +/** + * fh_verify_local - filehandle lookup and access checking + * @net: net namespace in which to perform the export lookup + * @cred: RPC user credential + * @client: RPC auth domain + * @fhp: filehandle to be verified + * @type: expected type of object pointed to by filehandle + * @access: type of access needed to object + * + * This API can be used by callers who do not have an RPC + * transaction context (ie are not running in an nfsd thread). + * + * See fh_verify() for further descriptions of @fhp, @type, and @access. + */ +__be32 +fh_verify_local(struct net *net, struct svc_cred *cred, + struct auth_domain *client, struct svc_fh *fhp, + umode_t type, int access) +{ + return __fh_verify(NULL, net, cred, client, NULL, + fhp, type, access); +} + +/** + * fh_verify - filehandle lookup and access checking + * @rqstp: pointer to current rpc request + * @fhp: filehandle to be verified + * @type: expected type of object pointed to by filehandle + * @access: type of access needed to object + * + * Look up a dentry from the on-the-wire filehandle, check the client's + * access to the export, and set the current task's credentials. + * + * Regardless of success or failure of fh_verify(), fh_put() should be + * called on @fhp when the caller is finished with the filehandle. + * + * fh_verify() may be called multiple times on a given filehandle, for + * example, when processing an NFSv4 compound. The first call will look + * up a dentry using the on-the-wire filehandle. Subsequent calls will + * skip the lookup and just perform the other checks and possibly change + * the current task's credentials. + * + * @type specifies the type of object expected using one of the S_IF* + * constants defined in include/linux/stat.h. The caller may use zero + * to indicate that it doesn't care, or a negative integer to indicate + * that it expects something not of the given type. + * + * @access is formed from the NFSD_MAY_* constants defined in + * fs/nfsd/vfs.h. + */ +__be32 +fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) +{ + return __fh_verify(rqstp, SVC_NET(rqstp), &rqstp->rq_cred, + rqstp->rq_client, rqstp->rq_gssclient, + fhp, type, access); +} /* * Compose a file handle for an NFS reply. @@ -618,20 +667,18 @@ out_negative: __be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp) { bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); - struct inode *inode; struct kstat stat; __be32 err; if (fhp->fh_no_wcc || fhp->fh_pre_saved) return nfs_ok; - inode = d_inode(fhp->fh_dentry); err = fh_getattr(fhp, &stat); if (err) return err; if (v4) - fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); + fhp->fh_pre_change = nfsd4_change_attribute(&stat); fhp->fh_pre_mtime = stat.mtime; fhp->fh_pre_ctime = stat.ctime; @@ -648,7 +695,6 @@ __be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp) __be32 fh_fill_post_attrs(struct svc_fh *fhp) { bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); - struct inode *inode = d_inode(fhp->fh_dentry); __be32 err; if (fhp->fh_no_wcc) @@ -664,7 +710,7 @@ __be32 fh_fill_post_attrs(struct svc_fh *fhp) fhp->fh_post_saved = true; if (v4) fhp->fh_post_change = - nfsd4_change_attribute(&fhp->fh_post_attr, inode); + nfsd4_change_attribute(&fhp->fh_post_attr); return nfs_ok; } @@ -755,7 +801,14 @@ enum fsid_source fsid_source(const struct svc_fh *fhp) return FSIDSOURCE_DEV; } -/* +/** + * nfsd4_change_attribute - Generate an NFSv4 change_attribute value + * @stat: inode attributes + * + * Caller must fill in @stat before calling, typically by invoking + * vfs_getattr() with STATX_MODE, STATX_CTIME, and STATX_CHANGE_COOKIE. + * Returns an unsigned 64-bit changeid4 value (RFC 8881 Section 3.2). + * * We could use i_version alone as the change attribute. However, i_version * can go backwards on a regular file after an unclean shutdown. On its own * that doesn't necessarily cause a problem, but if i_version goes backwards @@ -772,13 +825,13 @@ enum fsid_source fsid_source(const struct svc_fh *fhp) * assume that the new change attr is always logged to stable storage in some * fashion before the results can be seen. */ -u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode) +u64 nfsd4_change_attribute(const struct kstat *stat) { u64 chattr; if (stat->result_mask & STATX_CHANGE_COOKIE) { chattr = stat->change_cookie; - if (S_ISREG(inode->i_mode) && + if (S_ISREG(stat->mode) && !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) { chattr += (u64)stat->ctime.tv_sec << 30; chattr += stat->ctime.tv_nsec; diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 40426f899e76..5103c2f4d225 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -88,6 +88,8 @@ typedef struct svc_fh { * wcc data is not atomic with * operation */ + bool fh_use_wgather; /* NFSv2 wgather option */ + bool fh_64bit_cookies;/* readdir cookie size */ int fh_flags; /* FH flags */ bool fh_post_saved; /* post-op attrs saved */ bool fh_pre_saved; /* pre-op attrs saved */ @@ -215,6 +217,8 @@ extern char * SVCFH_fmt(struct svc_fh *fhp); * Function prototypes */ __be32 fh_verify(struct svc_rqst *, struct svc_fh *, umode_t, int); +__be32 fh_verify_local(struct net *, struct svc_cred *, struct auth_domain *, + struct svc_fh *, umode_t, int); __be32 fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *); __be32 fh_update(struct svc_fh *); void fh_put(struct svc_fh *); @@ -263,7 +267,6 @@ static inline bool fh_fsid_match(const struct knfsd_fh *fh1, return true; } -#ifdef CONFIG_CRC32 /** * knfsd_fh_hash - calculate the crc32 hash for the filehandle * @fh - pointer to filehandle @@ -275,12 +278,6 @@ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) { return ~crc32_le(0xFFFFFFFF, fh->fh_raw, fh->fh_size); } -#else -static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) -{ - return 0; -} -#endif /** * fh_clear_pre_post_attrs - Reset pre/post attributes @@ -293,7 +290,7 @@ static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp) fhp->fh_pre_saved = false; } -u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode); +u64 nfsd4_change_attribute(const struct kstat *stat); __be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp); __be32 fh_fill_post_attrs(struct svc_fh *fhp); __be32 __must_check fh_fill_both_attrs(struct svc_fh *fhp); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index a7315928a760..6dda081eb24c 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -13,6 +13,31 @@ #define NFSDDBG_FACILITY NFSDDBG_PROC +static __be32 nfsd_map_status(__be32 status) +{ + switch (status) { + case nfs_ok: + break; + case nfserr_nofilehandle: + case nfserr_badhandle: + status = nfserr_stale; + break; + case nfserr_wrongsec: + case nfserr_xdev: + case nfserr_file_open: + status = nfserr_acces; + break; + case nfserr_symlink_not_dir: + status = nfserr_notdir; + break; + case nfserr_symlink: + case nfserr_wrong_type: + status = nfserr_inval; + break; + } + return status; +} + static __be32 nfsd_proc_null(struct svc_rqst *rqstp) { @@ -38,6 +63,7 @@ nfsd_proc_getattr(struct svc_rqst *rqstp) goto out; resp->status = fh_getattr(&resp->fh, &resp->stat); out: + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -103,12 +129,13 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) } } - resp->status = nfsd_setattr(rqstp, fhp, &attrs, 0, (time64_t)0); + resp->status = nfsd_setattr(rqstp, fhp, &attrs, NULL); if (resp->status != nfs_ok) goto out; resp->status = fh_getattr(&resp->fh, &resp->stat); out: + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -143,6 +170,7 @@ nfsd_proc_lookup(struct svc_rqst *rqstp) resp->status = fh_getattr(&resp->fh, &resp->stat); out: + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -164,6 +192,7 @@ nfsd_proc_readlink(struct svc_rqst *rqstp) page_address(resp->page), &resp->len); fh_put(&argp->fh); + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -200,6 +229,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) set_bit(RQ_DROPME, &rqstp->rq_flags); + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -235,6 +265,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) set_bit(RQ_DROPME, &rqstp->rq_flags); + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -331,10 +362,11 @@ nfsd_proc_create(struct svc_rqst *rqstp) * echo thing > device-special-file-or-pipe * by doing a CREATE with type==0 */ - resp->status = nfsd_permission(rqstp, - newfhp->fh_export, - newfhp->fh_dentry, - NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS); + resp->status = nfsd_permission( + &rqstp->rq_cred, + newfhp->fh_export, + newfhp->fh_dentry, + NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS); if (resp->status && resp->status != nfserr_rofs) goto out_unlock; } @@ -390,8 +422,8 @@ nfsd_proc_create(struct svc_rqst *rqstp) */ attr->ia_valid &= ATTR_SIZE; if (attr->ia_valid) - resp->status = nfsd_setattr(rqstp, newfhp, &attrs, 0, - (time64_t)0); + resp->status = nfsd_setattr(rqstp, newfhp, &attrs, + NULL); } out_unlock: @@ -403,6 +435,7 @@ done: goto out; resp->status = fh_getattr(&resp->fh, &resp->stat); out: + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -419,6 +452,7 @@ nfsd_proc_remove(struct svc_rqst *rqstp) resp->status = nfsd_unlink(rqstp, &argp->fh, -S_IFDIR, argp->name, argp->len); fh_put(&argp->fh); + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -437,6 +471,7 @@ nfsd_proc_rename(struct svc_rqst *rqstp) &argp->tfh, argp->tname, argp->tlen); fh_put(&argp->ffh); fh_put(&argp->tfh); + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -457,6 +492,7 @@ nfsd_proc_link(struct svc_rqst *rqstp) &argp->ffh); fh_put(&argp->ffh); fh_put(&argp->tfh); + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -495,6 +531,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) fh_put(&argp->ffh); fh_put(&newfh); out: + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -528,6 +565,7 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp) resp->status = fh_getattr(&resp->fh, &resp->stat); out: + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -545,6 +583,7 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp) resp->status = nfsd_unlink(rqstp, &argp->fh, S_IFDIR, argp->name, argp->len); fh_put(&argp->fh); + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -590,6 +629,7 @@ nfsd_proc_readdir(struct svc_rqst *rqstp) nfssvc_encode_nfscookie(resp, offset); fh_put(&argp->fh); + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -607,6 +647,7 @@ nfsd_proc_statfs(struct svc_rqst *rqstp) resp->status = nfsd_statfs(rqstp, &argp->fh, &resp->stats, NFSD_MAY_BYPASS_GSS_ON_ROOT); fh_put(&argp->fh); + resp->status = nfsd_map_status(resp->status); return rpc_success; } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 710a54c7dffc..45f1bb2c6f13 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -19,6 +19,7 @@ #include <linux/sunrpc/svc_xprt.h> #include <linux/lockd/bind.h> #include <linux/nfsacl.h> +#include <linux/nfslocalio.h> #include <linux/seq_file.h> #include <linux/inetdevice.h> #include <net/addrconf.h> @@ -35,7 +36,6 @@ #define NFSDDBG_FACILITY NFSDDBG_SVC atomic_t nfsd_th_cnt = ATOMIC_INIT(0); -extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static int nfsd_acl_rpcbind_set(struct net *, @@ -60,15 +60,6 @@ static __be32 nfsd_init_request(struct svc_rqst *, * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and some members * of the svc_serv struct such as ->sv_temp_socks and ->sv_permsocks. * - * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a - * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0 (unless - * nn->keep_active is set). That number of nfsd threads must - * exist and each must be listed in ->sp_all_threads in some entry of - * ->sv_pools[]. - * - * Each active thread holds a counted reference on nn->nfsd_serv, as does - * the nn->keep_active flag and various transient calls to svc_get(). - * * Finally, the nfsd_mutex also protects some of the global variables that are * accessed when nfsd starts and that are settable via the write_* routines in * nfsctl.c. In particular: @@ -89,6 +80,15 @@ DEFINE_SPINLOCK(nfsd_drc_lock); unsigned long nfsd_drc_max_mem; unsigned long nfsd_drc_mem_used; +#if IS_ENABLED(CONFIG_NFS_LOCALIO) +static const struct svc_version *localio_versions[] = { + [1] = &localio_version1, +}; + +#define NFSD_LOCALIO_NRVERS ARRAY_SIZE(localio_versions) + +#endif /* CONFIG_NFS_LOCALIO */ + #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static const struct svc_version *nfsd_acl_version[] = { # if defined(CONFIG_NFSD_V2_ACL) @@ -99,23 +99,12 @@ static const struct svc_version *nfsd_acl_version[] = { # endif }; -#define NFSD_ACL_MINVERS 2 +#define NFSD_ACL_MINVERS 2 #define NFSD_ACL_NRVERS ARRAY_SIZE(nfsd_acl_version) -static struct svc_program nfsd_acl_program = { - .pg_prog = NFS_ACL_PROGRAM, - .pg_nvers = NFSD_ACL_NRVERS, - .pg_vers = nfsd_acl_version, - .pg_name = "nfsacl", - .pg_class = "nfsd", - .pg_authenticate = &svc_set_client, - .pg_init_request = nfsd_acl_init_request, - .pg_rpcbind_set = nfsd_acl_rpcbind_set, -}; - #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ -static const struct svc_version *nfsd_version[] = { +static const struct svc_version *nfsd_version[NFSD_MAXVERS+1] = { #if defined(CONFIG_NFSD_V2) [2] = &nfsd_version2, #endif @@ -125,98 +114,63 @@ static const struct svc_version *nfsd_version[] = { #endif }; -#define NFSD_MINVERS 2 -#define NFSD_NRVERS ARRAY_SIZE(nfsd_version) - -struct svc_program nfsd_program = { -#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) - .pg_next = &nfsd_acl_program, -#endif +struct svc_program nfsd_programs[] = { + { .pg_prog = NFS_PROGRAM, /* program number */ - .pg_nvers = NFSD_NRVERS, /* nr of entries in nfsd_version */ + .pg_nvers = NFSD_MAXVERS+1, /* nr of entries in nfsd_version */ .pg_vers = nfsd_version, /* version table */ .pg_name = "nfsd", /* program name */ .pg_class = "nfsd", /* authentication class */ - .pg_authenticate = &svc_set_client, /* export authentication */ + .pg_authenticate = svc_set_client, /* export authentication */ .pg_init_request = nfsd_init_request, .pg_rpcbind_set = nfsd_rpcbind_set, + }, +#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) + { + .pg_prog = NFS_ACL_PROGRAM, + .pg_nvers = NFSD_ACL_NRVERS, + .pg_vers = nfsd_acl_version, + .pg_name = "nfsacl", + .pg_class = "nfsd", + .pg_authenticate = svc_set_client, + .pg_init_request = nfsd_acl_init_request, + .pg_rpcbind_set = nfsd_acl_rpcbind_set, + }, +#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + { + .pg_prog = NFS_LOCALIO_PROGRAM, + .pg_nvers = NFSD_LOCALIO_NRVERS, + .pg_vers = localio_versions, + .pg_name = "nfslocalio", + .pg_class = "nfsd", + .pg_authenticate = svc_set_client, + .pg_init_request = svc_generic_init_request, + .pg_rpcbind_set = svc_generic_rpcbind_set, + } +#endif /* CONFIG_NFS_LOCALIO */ }; -static bool -nfsd_support_version(int vers) +bool nfsd_support_version(int vers) { - if (vers >= NFSD_MINVERS && vers < NFSD_NRVERS) + if (vers >= NFSD_MINVERS && vers <= NFSD_MAXVERS) return nfsd_version[vers] != NULL; return false; } -static bool * -nfsd_alloc_versions(void) -{ - bool *vers = kmalloc_array(NFSD_NRVERS, sizeof(bool), GFP_KERNEL); - unsigned i; - - if (vers) { - /* All compiled versions are enabled by default */ - for (i = 0; i < NFSD_NRVERS; i++) - vers[i] = nfsd_support_version(i); - } - return vers; -} - -static bool * -nfsd_alloc_minorversions(void) -{ - bool *vers = kmalloc_array(NFSD_SUPPORTED_MINOR_VERSION + 1, - sizeof(bool), GFP_KERNEL); - unsigned i; - - if (vers) { - /* All minor versions are enabled by default */ - for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) - vers[i] = nfsd_support_version(4); - } - return vers; -} - -void -nfsd_netns_free_versions(struct nfsd_net *nn) -{ - kfree(nn->nfsd_versions); - kfree(nn->nfsd4_minorversions); - nn->nfsd_versions = NULL; - nn->nfsd4_minorversions = NULL; -} - -static void -nfsd_netns_init_versions(struct nfsd_net *nn) -{ - if (!nn->nfsd_versions) { - nn->nfsd_versions = nfsd_alloc_versions(); - nn->nfsd4_minorversions = nfsd_alloc_minorversions(); - if (!nn->nfsd_versions || !nn->nfsd4_minorversions) - nfsd_netns_free_versions(nn); - } -} - int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change) { - if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS) + if (vers < NFSD_MINVERS || vers > NFSD_MAXVERS) return 0; switch(change) { case NFSD_SET: - if (nn->nfsd_versions) - nn->nfsd_versions[vers] = nfsd_support_version(vers); + nn->nfsd_versions[vers] = nfsd_support_version(vers); break; case NFSD_CLEAR: - nfsd_netns_init_versions(nn); - if (nn->nfsd_versions) - nn->nfsd_versions[vers] = false; + nn->nfsd_versions[vers] = false; break; case NFSD_TEST: - if (nn->nfsd_versions) - return nn->nfsd_versions[vers]; - fallthrough; + return nn->nfsd_versions[vers]; case NFSD_AVAIL: return nfsd_support_version(vers); } @@ -243,23 +197,16 @@ int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change switch(change) { case NFSD_SET: - if (nn->nfsd4_minorversions) { - nfsd_vers(nn, 4, NFSD_SET); - nn->nfsd4_minorversions[minorversion] = - nfsd_vers(nn, 4, NFSD_TEST); - } + nfsd_vers(nn, 4, NFSD_SET); + nn->nfsd4_minorversions[minorversion] = + nfsd_vers(nn, 4, NFSD_TEST); break; case NFSD_CLEAR: - nfsd_netns_init_versions(nn); - if (nn->nfsd4_minorversions) { - nn->nfsd4_minorversions[minorversion] = false; - nfsd_adjust_nfsd_versions4(nn); - } + nn->nfsd4_minorversions[minorversion] = false; + nfsd_adjust_nfsd_versions4(nn); break; case NFSD_TEST: - if (nn->nfsd4_minorversions) - return nn->nfsd4_minorversions[minorversion]; - return nfsd_vers(nn, 4, NFSD_TEST); + return nn->nfsd4_minorversions[minorversion]; case NFSD_AVAIL: return minorversion <= NFSD_SUPPORTED_MINOR_VERSION && nfsd_vers(nn, 4, NFSD_AVAIL); @@ -267,6 +214,34 @@ int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change return 0; } +bool nfsd_serv_try_get(struct net *net) __must_hold(rcu) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + return (nn && percpu_ref_tryget_live(&nn->nfsd_serv_ref)); +} + +void nfsd_serv_put(struct net *net) __must_hold(rcu) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + percpu_ref_put(&nn->nfsd_serv_ref); +} + +static void nfsd_serv_done(struct percpu_ref *ref) +{ + struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_serv_ref); + + complete(&nn->nfsd_serv_confirm_done); +} + +static void nfsd_serv_free(struct percpu_ref *ref) +{ + struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_serv_ref); + + complete(&nn->nfsd_serv_free_done); +} + /* * Maximum number of nfsd processes */ @@ -354,13 +329,12 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn) */ void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn) { - int seq = 0; + unsigned int seq; do { - read_seqbegin_or_lock(&nn->writeverf_lock, &seq); + seq = read_seqbegin(&nn->writeverf_lock); memcpy(verf, nn->writeverf, sizeof(nn->writeverf)); - } while (need_seqretry(&nn->writeverf_lock, seq)); - done_seqretry(&nn->writeverf_lock, seq); + } while (read_seqretry(&nn->writeverf_lock, seq)); } static void nfsd_reset_write_verifier_locked(struct nfsd_net *nn) @@ -432,13 +406,13 @@ static int nfsd_startup_net(struct net *net, const struct cred *cred) if (ret) goto out_filecache; +#ifdef CONFIG_NFSD_V4_2_INTER_SSC + nfsd4_ssc_init_umount_work(nn); +#endif ret = nfs4_state_start_net(net); if (ret) goto out_reply_cache; -#ifdef CONFIG_NFSD_V4_2_INTER_SSC - nfsd4_ssc_init_umount_work(nn); -#endif nn->nfsd_net_up = true; return 0; @@ -460,6 +434,9 @@ static void nfsd_shutdown_net(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); + if (!nn->nfsd_net_up) + return; + nfsd_export_flush(net); nfs4_state_shutdown_net(net); nfsd_reply_cache_shutdown(nn); nfsd_file_cache_shutdown_net(net); @@ -467,6 +444,7 @@ static void nfsd_shutdown_net(struct net *net) lockd_down(net); nn->lockd_up = false; } + percpu_ref_exit(&nn->nfsd_serv_ref); nn->nfsd_net_up = false; nfsd_shutdown_generic(); } @@ -537,11 +515,22 @@ static struct notifier_block nfsd_inet6addr_notifier = { /* Only used under nfsd_mutex, so this atomic may be overkill: */ static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0); -void nfsd_last_thread(struct net *net) +/** + * nfsd_destroy_serv - tear down NFSD's svc_serv for a namespace + * @net: network namespace the NFS service is associated with + */ +void nfsd_destroy_serv(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_serv *serv = nn->nfsd_serv; + lockdep_assert_held(&nfsd_mutex); + + percpu_ref_kill_and_confirm(&nn->nfsd_serv_ref, nfsd_serv_done); + wait_for_completion(&nn->nfsd_serv_confirm_done); + wait_for_completion(&nn->nfsd_serv_free_done); + /* percpu_ref_exit is called in nfsd_shutdown_net */ + spin_lock(&nfsd_notifier_lock); nn->nfsd_serv = NULL; spin_unlock(&nfsd_notifier_lock); @@ -559,26 +548,24 @@ void nfsd_last_thread(struct net *net) /* * write_ports can create the server without actually starting * any threads--if we get shut down before any threads are - * started, then nfsd_last_thread will be run before any of this + * started, then nfsd_destroy_serv will be run before any of this * other initialization has been done except the rpcb information. */ svc_rpcb_cleanup(serv, net); - if (!nn->nfsd_net_up) - return; nfsd_shutdown_net(net); - nfsd_export_flush(net); + svc_destroy(&serv); } void nfsd_reset_versions(struct nfsd_net *nn) { int i; - for (i = 0; i < NFSD_NRVERS; i++) + for (i = 0; i <= NFSD_MAXVERS; i++) if (nfsd_vers(nn, i, NFSD_TEST)) return; - for (i = 0; i < NFSD_NRVERS; i++) + for (i = 0; i <= NFSD_MAXVERS; i++) if (i != 4) nfsd_vers(nn, i, NFSD_SET); else { @@ -642,17 +629,17 @@ void nfsd_shutdown_threads(struct net *net) return; } - svc_get(serv); /* Kill outstanding nfsd threads */ svc_set_num_threads(serv, NULL, 0); - nfsd_last_thread(net); - svc_put(serv); + nfsd_destroy_serv(net); mutex_unlock(&nfsd_mutex); } -bool i_am_nfsd(void) +struct svc_rqst *nfsd_current_rqst(void) { - return kthread_func(current) == nfsd; + if (kthread_func(current) == nfsd) + return kthread_data(current); + return NULL; } int nfsd_create_serv(struct net *net) @@ -662,14 +649,21 @@ int nfsd_create_serv(struct net *net) struct svc_serv *serv; WARN_ON(!mutex_is_locked(&nfsd_mutex)); - if (nn->nfsd_serv) { - svc_get(nn->nfsd_serv); + if (nn->nfsd_serv) return 0; - } + + error = percpu_ref_init(&nn->nfsd_serv_ref, nfsd_serv_free, + 0, GFP_KERNEL); + if (error) + return error; + init_completion(&nn->nfsd_serv_free_done); + init_completion(&nn->nfsd_serv_confirm_done); + if (nfsd_max_blksize == 0) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(nn); - serv = svc_create_pooled(&nfsd_program, &nn->nfsd_svcstats, + serv = svc_create_pooled(nfsd_programs, ARRAY_SIZE(nfsd_programs), + &nn->nfsd_svcstats, nfsd_max_blksize, nfsd); if (serv == NULL) return -ENOMEM; @@ -677,7 +671,7 @@ int nfsd_create_serv(struct net *net) serv->sv_maxconn = nn->max_connections; error = svc_bind(serv, net); if (error < 0) { - svc_put(serv); + svc_destroy(&serv); return error; } spin_lock(&nfsd_notifier_lock); @@ -708,17 +702,29 @@ int nfsd_nrpools(struct net *net) int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) { - int i = 0; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct svc_serv *serv = nn->nfsd_serv; + int i; - if (nn->nfsd_serv != NULL) { - for (i = 0; i < nn->nfsd_serv->sv_nrpools && i < n; i++) - nthreads[i] = nn->nfsd_serv->sv_pools[i].sp_nrthreads; - } - + if (serv) + for (i = 0; i < serv->sv_nrpools && i < n; i++) + nthreads[i] = serv->sv_pools[i].sp_nrthreads; return 0; } +/** + * nfsd_set_nrthreads - set the number of running threads in the net's service + * @n: number of array members in @nthreads + * @nthreads: array of thread counts for each pool + * @net: network namespace to operate within + * + * This function alters the number of running threads for the given network + * namespace in each pool. If passed an array longer then the number of pools + * the extra pool settings are ignored. If passed an array shorter than the + * number of pools, the missing values are interpreted as 0's. + * + * Returns 0 on success or a negative errno on error. + */ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) { int i = 0; @@ -726,11 +732,18 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) int err = 0; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - WARN_ON(!mutex_is_locked(&nfsd_mutex)); + lockdep_assert_held(&nfsd_mutex); if (nn->nfsd_serv == NULL || n <= 0) return 0; + /* + * Special case: When n == 1, pass in NULL for the pool, so that the + * change is distributed equally among them. + */ + if (n == 1) + return svc_set_num_threads(nn->nfsd_serv, NULL, nthreads[0]); + if (n > nn->nfsd_serv->sv_nrpools) n = nn->nfsd_serv->sv_nrpools; @@ -753,49 +766,50 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) } } - /* - * There must always be a thread in pool 0; the admin - * can't shut down NFS completely using pool_threads. - */ - if (nthreads[0] == 0) - nthreads[0] = 1; - /* apply the new numbers */ - svc_get(nn->nfsd_serv); for (i = 0; i < n; i++) { err = svc_set_num_threads(nn->nfsd_serv, &nn->nfsd_serv->sv_pools[i], nthreads[i]); if (err) - break; + goto out; } - svc_put(nn->nfsd_serv); + + /* Anything undefined in array is considered to be 0 */ + for (i = n; i < nn->nfsd_serv->sv_nrpools; ++i) { + err = svc_set_num_threads(nn->nfsd_serv, + &nn->nfsd_serv->sv_pools[i], + 0); + if (err) + goto out; + } +out: return err; } -/* - * Adjust the number of threads and return the new number of threads. - * This is also the function that starts the server if necessary, if - * this is the first time nrservs is nonzero. +/** + * nfsd_svc: start up or shut down the nfsd server + * @n: number of array members in @nthreads + * @nthreads: array of thread counts for each pool + * @net: network namespace to operate within + * @cred: credentials to use for xprt creation + * @scope: server scope value (defaults to nodename) + * + * Adjust the number of threads in each pool and return the new + * total number of threads in the service. */ int -nfsd_svc(int nrservs, struct net *net, const struct cred *cred) +nfsd_svc(int n, int *nthreads, struct net *net, const struct cred *cred, const char *scope) { int error; struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_serv *serv; - mutex_lock(&nfsd_mutex); - dprintk("nfsd: creating service\n"); - - nrservs = max(nrservs, 0); - nrservs = min(nrservs, NFSD_MAXSERVS); - error = 0; + lockdep_assert_held(&nfsd_mutex); - if (nrservs == 0 && nn->nfsd_serv == NULL) - goto out; + dprintk("nfsd: creating service\n"); - strscpy(nn->nfsd_name, utsname()->nodename, + strscpy(nn->nfsd_name, scope ? scope : utsname()->nodename, sizeof(nn->nfsd_name)); error = nfsd_create_serv(net); @@ -806,20 +820,14 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred) error = nfsd_startup_net(net, cred); if (error) goto out_put; - error = svc_set_num_threads(serv, NULL, nrservs); + error = nfsd_set_nrthreads(n, nthreads, net); if (error) goto out_put; error = serv->sv_nrthreads; out_put: - /* Threads now hold service active */ - if (xchg(&nn->keep_active, 0)) - svc_put(serv); - if (serv->sv_nrthreads == 0) - nfsd_last_thread(net); - svc_put(serv); + nfsd_destroy_serv(net); out: - mutex_unlock(&nfsd_mutex); return error; } @@ -900,17 +908,17 @@ nfsd_init_request(struct svc_rqst *rqstp, if (likely(nfsd_vers(nn, rqstp->rq_vers, NFSD_TEST))) return svc_generic_init_request(rqstp, progp, ret); - ret->mismatch.lovers = NFSD_NRVERS; - for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) { + ret->mismatch.lovers = NFSD_MAXVERS + 1; + for (i = NFSD_MINVERS; i <= NFSD_MAXVERS; i++) { if (nfsd_vers(nn, i, NFSD_TEST)) { ret->mismatch.lovers = i; break; } } - if (ret->mismatch.lovers == NFSD_NRVERS) + if (ret->mismatch.lovers > NFSD_MAXVERS) return rpc_prog_unavail; ret->mismatch.hivers = NFSD_MINVERS; - for (i = NFSD_NRVERS - 1; i >= NFSD_MINVERS; i--) { + for (i = NFSD_MAXVERS; i >= NFSD_MINVERS; i--) { if (nfsd_vers(nn, i, NFSD_TEST)) { ret->mismatch.hivers = i; break; @@ -932,11 +940,9 @@ nfsd(void *vrqstp) /* At this point, the thread shares current->fs * with the init process. We need to create files with the - * umask as defined by the client instead of init's umask. */ - if (unshare_fs_struct() < 0) { - printk("Unable to start nfsd thread: out of memory\n"); - goto out; - } + * umask as defined by the client instead of init's umask. + */ + svc_thread_init_status(rqstp, unshare_fs_struct()); current->fs->umask = 0; @@ -947,23 +953,24 @@ nfsd(void *vrqstp) /* * The main request loop */ - while (!kthread_should_stop()) { + while (!svc_thread_should_stop(rqstp)) { /* Update sv_maxconn if it has changed */ rqstp->rq_server->sv_maxconn = nn->max_connections; svc_recv(rqstp); + + nfsd_file_net_dispose(nn); } atomic_dec(&nfsd_th_cnt); -out: /* Release the thread */ svc_exit_thread(rqstp); return 0; } /** - * nfsd_dispatch - Process an NFS or NFSACL Request + * nfsd_dispatch - Process an NFS or NFSACL or LOCALIO Request * @rqstp: incoming request * * This RPC dispatcher integrates the NFS server's duplicate reply cache. @@ -996,6 +1003,15 @@ int nfsd_dispatch(struct svc_rqst *rqstp) if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream)) goto out_decode_err; + /* + * Release rq_status_counter setting it to an odd value after the rpc + * request has been properly parsed. rq_status_counter is used to + * notify the consumers if the rqstp fields are stable + * (rq_status_counter is odd) or not meaningful (rq_status_counter + * is even). + */ + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1); + rp = NULL; switch (nfsd_cache_lookup(rqstp, start, len, &rp)) { case RC_DOIT: @@ -1014,6 +1030,12 @@ int nfsd_dispatch(struct svc_rqst *rqstp) if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream)) goto out_encode_err; + /* + * Release rq_status_counter setting it to an even value after the rpc + * request has been properly processed. + */ + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1); + nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, nfs_reply); out_cached_reply: return 1; @@ -1062,31 +1084,3 @@ bool nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { return true; } - -int nfsd_pool_stats_open(struct inode *inode, struct file *file) -{ - int ret; - struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id); - - mutex_lock(&nfsd_mutex); - if (nn->nfsd_serv == NULL) { - mutex_unlock(&nfsd_mutex); - return -ENODEV; - } - svc_get(nn->nfsd_serv); - ret = svc_pool_stats_open(nn->nfsd_serv, file); - mutex_unlock(&nfsd_mutex); - return ret; -} - -int nfsd_pool_stats_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq = file->private_data; - struct svc_serv *serv = seq->private; - int ret = seq_release(inode, file); - - mutex_lock(&nfsd_mutex); - svc_put(serv); - mutex_unlock(&nfsd_mutex); - return ret; -} diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h index 4f4282d4eeca..925817f66917 100644 --- a/fs/nfsd/pnfs.h +++ b/fs/nfsd/pnfs.h @@ -27,17 +27,18 @@ struct nfsd4_layout_ops { struct nfs4_client *clp, struct nfsd4_getdeviceinfo *gdevp); __be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr, - struct nfsd4_getdeviceinfo *gdevp); + const struct nfsd4_getdeviceinfo *gdevp); __be32 (*proc_layoutget)(struct inode *, const struct svc_fh *fhp, struct nfsd4_layoutget *lgp); - __be32 (*encode_layoutget)(struct xdr_stream *, - struct nfsd4_layoutget *lgp); + __be32 (*encode_layoutget)(struct xdr_stream *xdr, + const struct nfsd4_layoutget *lgp); __be32 (*proc_layoutcommit)(struct inode *inode, struct nfsd4_layoutcommit *lcp); - void (*fence_client)(struct nfs4_layout_stateid *ls); + void (*fence_client)(struct nfs4_layout_stateid *ls, + struct nfsd_file *file); }; extern const struct nfsd4_layout_ops *nfsd4_layout_ops[]; @@ -72,11 +73,13 @@ void nfsd4_setup_layout_type(struct svc_export *exp); void nfsd4_return_all_client_layouts(struct nfs4_client *); void nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp); +void nfsd4_close_layout(struct nfs4_layout_stateid *ls); int nfsd4_init_pnfs(void); void nfsd4_exit_pnfs(void); #else struct nfs4_client; struct nfs4_file; +struct nfs4_layout_stateid; static inline void nfsd4_setup_layout_type(struct svc_export *exp) { @@ -89,6 +92,9 @@ static inline void nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp) { } +static inline void nfsd4_close_layout(struct nfs4_layout_stateid *ls) +{ +} static inline void nfsd4_exit_pnfs(void) { } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index cbddcf484dba..35b3564c065f 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -79,6 +79,7 @@ struct nfsd4_callback_ops { void (*prepare)(struct nfsd4_callback *); int (*done)(struct nfsd4_callback *, struct rpc_task *); void (*release)(struct nfsd4_callback *); + uint32_t opcode; }; /* @@ -88,17 +89,36 @@ struct nfsd4_callback_ops { */ struct nfs4_stid { refcount_t sc_count; -#define NFS4_OPEN_STID 1 -#define NFS4_LOCK_STID 2 -#define NFS4_DELEG_STID 4 -/* For an open stateid kept around *only* to process close replays: */ -#define NFS4_CLOSED_STID 8 + + /* A new stateid is added to the cl_stateids idr early before it + * is fully initialised. Its sc_type is then zero. After + * initialisation the sc_type it set under cl_lock, and then + * never changes. + */ +#define SC_TYPE_OPEN BIT(0) +#define SC_TYPE_LOCK BIT(1) +#define SC_TYPE_DELEG BIT(2) +#define SC_TYPE_LAYOUT BIT(3) + unsigned short sc_type; + +/* state_lock protects sc_status for delegation stateids. + * ->cl_lock protects sc_status for open and lock stateids. + * ->st_mutex also protect sc_status for open stateids. + * ->ls_lock protects sc_status for layout stateids. + */ +/* + * For an open stateid kept around *only* to process close replays. + * For deleg stateid, kept in idr until last reference is dropped. + */ +#define SC_STATUS_CLOSED BIT(0) /* For a deleg stateid kept around only to process free_stateid's: */ -#define NFS4_REVOKED_DELEG_STID 16 -#define NFS4_CLOSED_DELEG_STID 32 -#define NFS4_LAYOUT_STID 64 +#define SC_STATUS_REVOKED BIT(1) +#define SC_STATUS_ADMIN_REVOKED BIT(2) +#define SC_STATUS_FREEABLE BIT(3) +#define SC_STATUS_FREED BIT(4) + unsigned short sc_status; + struct list_head sc_cp_list; - unsigned char sc_type; stateid_t sc_stateid; spinlock_t sc_lock; struct nfs4_client *sc_client; @@ -117,6 +137,24 @@ struct nfs4_cpntf_state { time64_t cpntf_time; /* last time stateid used */ }; +struct nfs4_cb_fattr { + struct nfsd4_callback ncf_getattr; + u32 ncf_cb_status; + u32 ncf_cb_bmap[1]; + + /* from CB_GETATTR reply */ + u64 ncf_cb_change; + u64 ncf_cb_fsize; + + unsigned long ncf_cb_flags; + bool ncf_file_modified; + u64 ncf_initial_cinfo; + u64 ncf_cur_fsize; +}; + +/* bits for ncf_cb_flags */ +#define CB_GETATTR_BUSY 0 + /* * Represents a delegation stateid. The nfs4_client holds references to these * and they are put when it is being destroyed or when the delegation is @@ -150,6 +188,9 @@ struct nfs4_delegation { int dl_retries; struct nfsd4_callback dl_recall; bool dl_recalled; + + /* for CB_GETATTR */ + struct nfs4_cb_fattr dl_cb_fattr; }; #define cb_to_delegation(cb) \ @@ -174,8 +215,6 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) /* Maximum number of slots per session. 160 is useful for long haul TCP */ #define NFSD_MAX_SLOTS_PER_SESSION 160 -/* Maximum number of operations per session compound */ -#define NFSD_MAX_OPS_PER_COMPOUND 50 /* Maximum session per slot cache size */ #define NFSD_SLOT_CACHE_SIZE 2048 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ @@ -319,8 +358,9 @@ enum { * 0. If they are not renewed within a lease period, they become eligible for * destruction by the laundromat. * - * These objects can also be destroyed prematurely by the fault injection code, - * or if the client sends certain forms of SETCLIENTID or EXCHANGE_ID updates. + * These objects can also be destroyed if the client sends certain forms of + * SETCLIENTID or EXCHANGE_ID operations. + * * Care is taken *not* to do this however when the objects have an elevated * refcount. * @@ -328,7 +368,7 @@ enum { * * o Each nfs4_clients is also hashed by name (the opaque quantity initially * sent by the client to identify itself). - * + * * o cl_perclient list is used to ensure no dangling stateowner references * when we expire the nfs4_client */ @@ -353,6 +393,7 @@ struct nfs4_client { clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ u32 cl_minorversion; + atomic_t cl_admin_revoked; /* count of admin-revoked states */ /* NFSv4.1 client implementation id: */ struct xdr_netobj cl_nii_domain; struct xdr_netobj cl_nii_name; @@ -370,6 +411,8 @@ struct nfs4_client { 1 << NFSD4_CLIENT_CB_KILL) #define NFSD4_CLIENT_CB_RECALL_ANY (6) unsigned long cl_flags; + + struct workqueue_struct *cl_callback_wq; const struct cred *cl_cb_cred; struct rpc_clnt *cl_cb_client; u32 cl_cb_ident; @@ -448,7 +491,7 @@ struct nfs4_replay { unsigned int rp_buflen; char *rp_buf; struct knfsd_fh rp_openfh; - struct mutex rp_mutex; + atomic_t rp_locked; char rp_ibuf[NFSD4_REPLAY_ISIZE]; }; @@ -642,6 +685,7 @@ enum nfsd4_cb_op { NFSPROC4_CLNT_CB_SEQUENCE, NFSPROC4_CLNT_CB_NOTIFY_LOCK, NFSPROC4_CLNT_CB_RECALL_ANY, + NFSPROC4_CLNT_CB_GETATTR, }; /* Returns true iff a is later than b: */ @@ -674,15 +718,15 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, stateid_t *stateid, int flags, struct nfsd_file **filp, struct nfs4_stid **cstid); __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, - stateid_t *stateid, unsigned char typemask, - struct nfs4_stid **s, struct nfsd_net *nn); + stateid_t *stateid, unsigned short typemask, + unsigned short statusmask, + struct nfs4_stid **s, struct nfsd_net *nn); struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, void (*sc_free)(struct nfs4_stid *)); int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy); void nfs4_free_copy_state(struct nfsd4_copy *copy); struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn, struct nfs4_stid *p_stid); -void nfs4_unhash_stid(struct nfs4_stid *s); void nfs4_put_stid(struct nfs4_stid *s); void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); @@ -696,8 +740,6 @@ extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn * extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op); extern bool nfsd4_run_cb(struct nfsd4_callback *cb); -extern int nfsd4_create_callback_queue(void); -extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfsd4_shutdown_copy(struct nfs4_client *clp); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name, @@ -716,6 +758,14 @@ static inline void get_nfs4_file(struct nfs4_file *fi) } struct nfsd_file *find_any_file(struct nfs4_file *f); +#ifdef CONFIG_NFSD_V4 +void nfsd4_revoke_states(struct net *net, struct super_block *sb); +#else +static inline void nfsd4_revoke_states(struct net *net, struct super_block *sb) +{ +} +#endif + /* grace period management */ void nfsd4_end_grace(struct nfsd_net *nn); @@ -734,5 +784,5 @@ static inline bool try_to_expire_client(struct nfs4_client *clp) } extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, - struct inode *inode); + struct dentry *dentry, bool *file_modified, u64 *size); #endif /* NFSD4_STATE_H */ diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index 9f606fa08bd4..f7eaf95e20fc 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -57,7 +57,7 @@ static int nfsd_show(struct seq_file *seq, void *v) #ifdef CONFIG_NFSD_V4 /* Show count for individual nfsv4 operations */ /* Writing operation numbers 0 1 2 also for maintaining uniformity */ - seq_printf(seq,"proc4ops %u", LAST_NFS4_OP + 1); + seq_printf(seq, "proc4ops %u", LAST_NFS4_OP + 1); for (i = 0; i <= LAST_NFS4_OP; i++) { seq_printf(seq, " %lld", percpu_counter_sum_positive(&nn->counter[NFSD_STATS_NFS4_OP(i)])); @@ -73,53 +73,11 @@ static int nfsd_show(struct seq_file *seq, void *v) DEFINE_PROC_SHOW_ATTRIBUTE(nfsd); -int nfsd_percpu_counters_init(struct percpu_counter *counters, int num) -{ - int i, err = 0; - - for (i = 0; !err && i < num; i++) - err = percpu_counter_init(&counters[i], 0, GFP_KERNEL); - - if (!err) - return 0; - - for (; i > 0; i--) - percpu_counter_destroy(&counters[i-1]); - - return err; -} - -void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num) -{ - int i; - - for (i = 0; i < num; i++) - percpu_counter_set(&counters[i], 0); -} - -void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num) -{ - int i; - - for (i = 0; i < num; i++) - percpu_counter_destroy(&counters[i]); -} - -int nfsd_stat_counters_init(struct nfsd_net *nn) -{ - return nfsd_percpu_counters_init(nn->counter, NFSD_STATS_COUNTERS_NUM); -} - -void nfsd_stat_counters_destroy(struct nfsd_net *nn) -{ - nfsd_percpu_counters_destroy(nn->counter, NFSD_STATS_COUNTERS_NUM); -} - -void nfsd_proc_stat_init(struct net *net) +struct proc_dir_entry *nfsd_proc_stat_init(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops); + return svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops); } void nfsd_proc_stat_shutdown(struct net *net) diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h index d2753e975dfd..e4efb0e4e56d 100644 --- a/fs/nfsd/stats.h +++ b/fs/nfsd/stats.h @@ -10,12 +10,7 @@ #include <uapi/linux/nfsd/stats.h> #include <linux/percpu_counter.h> -int nfsd_percpu_counters_init(struct percpu_counter *counters, int num); -void nfsd_percpu_counters_reset(struct percpu_counter *counters, int num); -void nfsd_percpu_counters_destroy(struct percpu_counter *counters, int num); -int nfsd_stat_counters_init(struct nfsd_net *nn); -void nfsd_stat_counters_destroy(struct nfsd_net *nn); -void nfsd_proc_stat_init(struct net *net); +struct proc_dir_entry *nfsd_proc_stat_init(struct net *net); void nfsd_proc_stat_shutdown(struct net *net); static inline void nfsd_stats_rc_hits_inc(struct nfsd_net *nn) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index d7ed49eef591..b8470d4cbe99 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -9,8 +9,10 @@ #define _NFSD_TRACE_H #include <linux/tracepoint.h> +#include <linux/sunrpc/clnt.h> #include <linux/sunrpc/xprt.h> #include <trace/misc/nfs.h> +#include <trace/misc/sunrpc.h> #include "export.h" #include "nfsfh.h" @@ -84,7 +86,8 @@ DEFINE_NFSD_XDR_ERR_EVENT(cant_encode); { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" }, \ { NFSD_MAY_BYPASS_GSS, "BYPASS_GSS" }, \ { NFSD_MAY_READ_IF_EXEC, "READ_IF_EXEC" }, \ - { NFSD_MAY_64BIT_COOKIE, "64BIT_COOKIE" }) + { NFSD_MAY_64BIT_COOKIE, "64BIT_COOKIE" }, \ + { NFSD_MAY_LOCALIO, "LOCALIO" }) TRACE_EVENT(nfsd_compound, TP_PROTO( @@ -102,7 +105,7 @@ TRACE_EVENT(nfsd_compound, TP_fast_assign( __entry->xid = be32_to_cpu(rqst->rq_xid); __entry->opcnt = opcnt; - __assign_str_len(tag, tag, taglen); + __assign_str(tag); ), TP_printk("xid=0x%08x opcnt=%u tag=%s", __entry->xid, __entry->opcnt, __get_str(tag) @@ -125,7 +128,7 @@ TRACE_EVENT(nfsd_compound_status, __entry->args_opcnt = args_opcnt; __entry->resp_opcnt = resp_opcnt; __entry->status = be32_to_cpu(status); - __assign_str(name, name); + __assign_str(name); ), TP_printk("op=%u/%u %s status=%d", __entry->resp_opcnt, __entry->args_opcnt, @@ -191,7 +194,7 @@ TRACE_EVENT(nfsd_compound_encode_err, { S_IFIFO, "FIFO" }, \ { S_IFSOCK, "SOCK" }) -TRACE_EVENT(nfsd_fh_verify, +TRACE_EVENT_CONDITION(nfsd_fh_verify, TP_PROTO( const struct svc_rqst *rqstp, const struct svc_fh *fhp, @@ -199,6 +202,7 @@ TRACE_EVENT(nfsd_fh_verify, int access ), TP_ARGS(rqstp, fhp, type, access), + TP_CONDITION(rqstp != NULL), TP_STRUCT__entry( __field(unsigned int, netns_ino) __sockaddr(server, rqstp->rq_xprt->xpt_remotelen) @@ -237,7 +241,7 @@ TRACE_EVENT_CONDITION(nfsd_fh_verify_err, __be32 error ), TP_ARGS(rqstp, fhp, type, access, error), - TP_CONDITION(error), + TP_CONDITION(rqstp != NULL && error), TP_STRUCT__entry( __field(unsigned int, netns_ino) __sockaddr(server, rqstp->rq_xprt->xpt_remotelen) @@ -293,12 +297,13 @@ DECLARE_EVENT_CLASS(nfsd_fh_err_class, __entry->status) ) -#define DEFINE_NFSD_FH_ERR_EVENT(name) \ -DEFINE_EVENT(nfsd_fh_err_class, nfsd_##name, \ - TP_PROTO(struct svc_rqst *rqstp, \ - struct svc_fh *fhp, \ - int status), \ - TP_ARGS(rqstp, fhp, status)) +#define DEFINE_NFSD_FH_ERR_EVENT(name) \ +DEFINE_EVENT_CONDITION(nfsd_fh_err_class, nfsd_##name, \ + TP_PROTO(struct svc_rqst *rqstp, \ + struct svc_fh *fhp, \ + int status), \ + TP_ARGS(rqstp, fhp, status), \ + TP_CONDITION(rqstp != NULL)) DEFINE_NFSD_FH_ERR_EVENT(set_fh_dentry_badexport); DEFINE_NFSD_FH_ERR_EVENT(set_fh_dentry_badhandle); @@ -316,7 +321,7 @@ TRACE_EVENT(nfsd_exp_find_key, TP_fast_assign( __entry->fsidtype = key->ek_fsidtype; memcpy(__entry->fsid, key->ek_fsid, 4*6); - __assign_str(auth_domain, key->ek_client->name); + __assign_str(auth_domain); __entry->status = status; ), TP_printk("fsid=%x::%s domain=%s status=%d", @@ -340,8 +345,8 @@ TRACE_EVENT(nfsd_expkey_update, TP_fast_assign( __entry->fsidtype = key->ek_fsidtype; memcpy(__entry->fsid, key->ek_fsid, 4*6); - __assign_str(auth_domain, key->ek_client->name); - __assign_str(path, exp_path); + __assign_str(auth_domain); + __assign_str(path); __entry->cache = !test_bit(CACHE_NEGATIVE, &key->h.flags); ), TP_printk("fsid=%x::%s domain=%s path=%s cache=%s", @@ -363,8 +368,8 @@ TRACE_EVENT(nfsd_exp_get_by_name, __field(int, status) ), TP_fast_assign( - __assign_str(path, key->ex_path.dentry->d_name.name); - __assign_str(auth_domain, key->ex_client->name); + __assign_str(path); + __assign_str(auth_domain); __entry->status = status; ), TP_printk("path=%s domain=%s status=%d", @@ -383,8 +388,8 @@ TRACE_EVENT(nfsd_export_update, __field(bool, cache) ), TP_fast_assign( - __assign_str(path, key->ex_path.dentry->d_name.name); - __assign_str(auth_domain, key->ex_client->name); + __assign_str(path); + __assign_str(auth_domain); __entry->cache = !test_bit(CACHE_NEGATIVE, &key->h.flags); ), TP_printk("path=%s domain=%s cache=%s", @@ -483,7 +488,7 @@ TRACE_EVENT(nfsd_dirent, TP_fast_assign( __entry->fh_hash = fhp ? knfsd_fh_hash(&fhp->fh_handle) : 0; __entry->ino = ino; - __assign_str_len(name, name, namlen) + __assign_str(name); ), TP_printk("fh_hash=0x%08x ino=%llu name=%s", __entry->fh_hash, __entry->ino, __get_str(name) @@ -641,23 +646,18 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \ DEFINE_STATESEQID_EVENT(preprocess); DEFINE_STATESEQID_EVENT(open_confirm); -TRACE_DEFINE_ENUM(NFS4_OPEN_STID); -TRACE_DEFINE_ENUM(NFS4_LOCK_STID); -TRACE_DEFINE_ENUM(NFS4_DELEG_STID); -TRACE_DEFINE_ENUM(NFS4_CLOSED_STID); -TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID); -TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID); -TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID); - #define show_stid_type(x) \ __print_flags(x, "|", \ - { NFS4_OPEN_STID, "OPEN" }, \ - { NFS4_LOCK_STID, "LOCK" }, \ - { NFS4_DELEG_STID, "DELEG" }, \ - { NFS4_CLOSED_STID, "CLOSED" }, \ - { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \ - { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \ - { NFS4_LAYOUT_STID, "LAYOUT" }) + { SC_TYPE_OPEN, "OPEN" }, \ + { SC_TYPE_LOCK, "LOCK" }, \ + { SC_TYPE_DELEG, "DELEG" }, \ + { SC_TYPE_LAYOUT, "LAYOUT" }) + +#define show_stid_status(x) \ + __print_flags(x, "|", \ + { SC_STATUS_CLOSED, "CLOSED" }, \ + { SC_STATUS_REVOKED, "REVOKED" }, \ + { SC_STATUS_ADMIN_REVOKED, "ADMIN_REVOKED" }) DECLARE_EVENT_CLASS(nfsd_stid_class, TP_PROTO( @@ -666,6 +666,7 @@ DECLARE_EVENT_CLASS(nfsd_stid_class, TP_ARGS(stid), TP_STRUCT__entry( __field(unsigned long, sc_type) + __field(unsigned long, sc_status) __field(int, sc_count) __field(u32, cl_boot) __field(u32, cl_id) @@ -676,16 +677,18 @@ DECLARE_EVENT_CLASS(nfsd_stid_class, const stateid_t *stp = &stid->sc_stateid; __entry->sc_type = stid->sc_type; + __entry->sc_status = stid->sc_status; __entry->sc_count = refcount_read(&stid->sc_count); __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; __entry->cl_id = stp->si_opaque.so_clid.cl_id; __entry->si_id = stp->si_opaque.so_id; __entry->si_generation = stp->si_generation; ), - TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s", + TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s state=%s", __entry->cl_boot, __entry->cl_id, __entry->si_id, __entry->si_generation, - __entry->sc_count, show_stid_type(__entry->sc_type) + __entry->sc_count, show_stid_type(__entry->sc_type), + show_stid_status(__entry->sc_status) ) ); @@ -696,6 +699,129 @@ DEFINE_EVENT(nfsd_stid_class, nfsd_stid_##name, \ DEFINE_STID_EVENT(revoke); +TRACE_EVENT(nfsd_stateowner_replay, + TP_PROTO( + u32 opnum, + const struct nfs4_replay *rp + ), + TP_ARGS(opnum, rp), + TP_STRUCT__entry( + __field(unsigned long, status) + __field(u32, opnum) + ), + TP_fast_assign( + __entry->status = be32_to_cpu(rp->rp_status); + __entry->opnum = opnum; + ), + TP_printk("opnum=%u status=%lu", + __entry->opnum, __entry->status) +); + +TRACE_EVENT_CONDITION(nfsd_seq4_status, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct nfsd4_sequence *sequence + ), + TP_ARGS(rqstp, sequence), + TP_CONDITION(sequence->status_flags), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __field(u32, xid) + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, seqno) + __field(u32, reserved) + __field(unsigned long, status_flags) + ), + TP_fast_assign( + const struct nfsd4_sessionid *sid = + (struct nfsd4_sessionid *)&sequence->sessionid; + + __entry->netns_ino = SVC_NET(rqstp)->ns.inum; + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->cl_boot = sid->clientid.cl_boot; + __entry->cl_id = sid->clientid.cl_id; + __entry->seqno = sid->sequence; + __entry->reserved = sid->reserved; + __entry->status_flags = sequence->status_flags; + ), + TP_printk("xid=0x%08x sessionid=%08x:%08x:%08x:%08x status_flags=%s", + __entry->xid, __entry->cl_boot, __entry->cl_id, + __entry->seqno, __entry->reserved, + show_nfs4_seq4_status(__entry->status_flags) + ) +); + +DECLARE_EVENT_CLASS(nfsd_cs_slot_class, + TP_PROTO( + const struct nfs4_client *clp, + const struct nfsd4_create_session *cs + ), + TP_ARGS(clp, cs), + TP_STRUCT__entry( + __field(u32, seqid) + __field(u32, slot_seqid) + __field(u32, cl_boot) + __field(u32, cl_id) + __sockaddr(addr, clp->cl_cb_conn.cb_addrlen) + ), + TP_fast_assign( + const struct nfsd4_clid_slot *slot = &clp->cl_cs_slot; + + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, + clp->cl_cb_conn.cb_addrlen); + __entry->seqid = cs->seqid; + __entry->slot_seqid = slot->sl_seqid; + ), + TP_printk("addr=%pISpc client %08x:%08x seqid=%u slot_seqid=%u", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, + __entry->seqid, __entry->slot_seqid + ) +); + +#define DEFINE_CS_SLOT_EVENT(name) \ +DEFINE_EVENT(nfsd_cs_slot_class, nfsd_##name, \ + TP_PROTO( \ + const struct nfs4_client *clp, \ + const struct nfsd4_create_session *cs \ + ), \ + TP_ARGS(clp, cs)) + +DEFINE_CS_SLOT_EVENT(slot_seqid_conf); +DEFINE_CS_SLOT_EVENT(slot_seqid_unconf); + +TRACE_EVENT(nfsd_slot_seqid_sequence, + TP_PROTO( + const struct nfs4_client *clp, + const struct nfsd4_sequence *seq, + const struct nfsd4_slot *slot + ), + TP_ARGS(clp, seq, slot), + TP_STRUCT__entry( + __field(u32, seqid) + __field(u32, slot_seqid) + __field(u32, cl_boot) + __field(u32, cl_id) + __sockaddr(addr, clp->cl_cb_conn.cb_addrlen) + __field(bool, in_use) + ), + TP_fast_assign( + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, + clp->cl_cb_conn.cb_addrlen); + __entry->seqid = seq->seqid; + __entry->slot_seqid = slot->sl_seqid; + ), + TP_printk("addr=%pISpc client %08x:%08x seqid=%u slot_seqid=%u (%sin use)", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, + __entry->seqid, __entry->slot_seqid, + __entry->in_use ? "" : "not " + ) +); + DECLARE_EVENT_CLASS(nfsd_clientid_class, TP_PROTO(const clientid_t *clid), TP_ARGS(clid), @@ -725,6 +851,30 @@ DEFINE_CLIENTID_EVENT(purged); DEFINE_CLIENTID_EVENT(renew); DEFINE_CLIENTID_EVENT(stale); +TRACE_EVENT(nfsd_mark_client_expired, + TP_PROTO( + const struct nfs4_client *clp, + int cl_rpc_users + ), + TP_ARGS(clp, cl_rpc_users), + TP_STRUCT__entry( + __field(int, cl_rpc_users) + __field(u32, cl_boot) + __field(u32, cl_id) + __sockaddr(addr, clp->cl_cb_conn.cb_addrlen) + ), + TP_fast_assign( + __entry->cl_rpc_users = cl_rpc_users; + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, + clp->cl_cb_conn.cb_addrlen) + ), + TP_printk("addr=%pISpc client %08x:%08x cl_rpc_users=%d", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, + __entry->cl_rpc_users) +); + DECLARE_EVENT_CLASS(nfsd_net_class, TP_PROTO(const struct nfsd_net *nn), TP_ARGS(nn), @@ -853,7 +1003,7 @@ DECLARE_EVENT_CLASS(nfsd_clid_class, __entry->flavor = clp->cl_cred.cr_flavor; memcpy(__entry->verifier, (void *)&clp->cl_verifier, NFS4_VERIFIER_SIZE); - __assign_str_len(name, clp->cl_name.data, clp->cl_name.len); + __assign_str(name); ), TP_printk("addr=%pISpc name='%s' verifier=0x%s flavor=%s client=%08x:%08x", __entry->addr, __get_str(name), @@ -963,7 +1113,7 @@ TRACE_EVENT(nfsd_file_acquire, ), TP_fast_assign( - __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->xid = rqstp ? be32_to_cpu(rqstp->rq_xid) : 0; __entry->inode = inode; __entry->may_flags = may_flags; __entry->nf_ref = nf ? refcount_read(&nf->nf_ref) : 0; @@ -997,7 +1147,7 @@ TRACE_EVENT(nfsd_file_insert_err, __field(long, error) ), TP_fast_assign( - __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->xid = rqstp ? be32_to_cpu(rqstp->rq_xid) : 0; __entry->inode = inode; __entry->may_flags = may_flags; __entry->error = error; @@ -1027,7 +1177,7 @@ TRACE_EVENT(nfsd_file_cons_err, __field(const void *, nf_file) ), TP_fast_assign( - __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->xid = rqstp ? be32_to_cpu(rqstp->rq_xid) : 0; __entry->inode = inode; __entry->may_flags = may_flags; __entry->nf_ref = refcount_read(&nf->nf_ref); @@ -1262,28 +1412,6 @@ TRACE_EVENT(nfsd_drc_mismatch, __entry->ingress) ); -TRACE_EVENT_CONDITION(nfsd_drc_gc, - TP_PROTO( - const struct nfsd_net *nn, - unsigned long freed - ), - TP_ARGS(nn, freed), - TP_CONDITION(freed > 0), - TP_STRUCT__entry( - __field(unsigned long long, boot_time) - __field(unsigned long, freed) - __field(int, total) - ), - TP_fast_assign( - __entry->boot_time = nn->boot_time; - __entry->freed = freed; - __entry->total = atomic_read(&nn->num_drc_entries); - ), - TP_printk("boot_time=%16llx total=%d freed=%lu", - __entry->boot_time, __entry->total, __entry->freed - ) -); - TRACE_EVENT(nfsd_cb_args, TP_PROTO( const struct nfs4_client *clp, @@ -1356,10 +1484,14 @@ DEFINE_EVENT(nfsd_cb_class, nfsd_cb_##name, \ TP_PROTO(const struct nfs4_client *clp), \ TP_ARGS(clp)) -DEFINE_NFSD_CB_EVENT(state); +DEFINE_NFSD_CB_EVENT(start); +DEFINE_NFSD_CB_EVENT(new_state); DEFINE_NFSD_CB_EVENT(probe); DEFINE_NFSD_CB_EVENT(lost); DEFINE_NFSD_CB_EVENT(shutdown); +DEFINE_NFSD_CB_EVENT(rpc_prepare); +DEFINE_NFSD_CB_EVENT(rpc_done); +DEFINE_NFSD_CB_EVENT(rpc_release); TRACE_DEFINE_ENUM(RPC_AUTH_NULL); TRACE_DEFINE_ENUM(RPC_AUTH_UNIX); @@ -1393,7 +1525,7 @@ TRACE_EVENT(nfsd_cb_setup, TP_fast_assign( __entry->cl_boot = clp->cl_clientid.cl_boot; __entry->cl_id = clp->cl_clientid.cl_id; - __assign_str(netid, netid); + __assign_str(netid); __entry->authflavor = authflavor; __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, clp->cl_cb_conn.cb_addrlen) @@ -1427,6 +1559,143 @@ TRACE_EVENT(nfsd_cb_setup_err, __entry->error) ); +/* Not a real opcode, but there is no 0 operation. */ +#define _CB_NULL 0 + +#define show_nfsd_cb_opcode(val) \ + __print_symbolic(val, \ + { _CB_NULL, "CB_NULL" }, \ + { OP_CB_GETATTR, "CB_GETATTR" }, \ + { OP_CB_RECALL, "CB_RECALL" }, \ + { OP_CB_LAYOUTRECALL, "CB_LAYOUTRECALL" }, \ + { OP_CB_RECALL_ANY, "CB_RECALL_ANY" }, \ + { OP_CB_NOTIFY_LOCK, "CB_NOTIFY_LOCK" }, \ + { OP_CB_OFFLOAD, "CB_OFFLOAD" }) + +DECLARE_EVENT_CLASS(nfsd_cb_lifetime_class, + TP_PROTO( + const struct nfs4_client *clp, + const struct nfsd4_callback *cb + ), + TP_ARGS(clp, cb), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(const void *, cb) + __field(unsigned long, opcode) + __field(bool, need_restart) + __sockaddr(addr, clp->cl_cb_conn.cb_addrlen) + ), + TP_fast_assign( + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __entry->cb = cb; + __entry->opcode = cb->cb_ops ? cb->cb_ops->opcode : _CB_NULL; + __entry->need_restart = cb->cb_need_restart; + __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, + clp->cl_cb_conn.cb_addrlen) + ), + TP_printk("addr=%pISpc client %08x:%08x cb=%p%s opcode=%s", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, __entry->cb, + __entry->need_restart ? " (need restart)" : " (first try)", + show_nfsd_cb_opcode(__entry->opcode) + ) +); + +#define DEFINE_NFSD_CB_LIFETIME_EVENT(name) \ +DEFINE_EVENT(nfsd_cb_lifetime_class, nfsd_cb_##name, \ + TP_PROTO( \ + const struct nfs4_client *clp, \ + const struct nfsd4_callback *cb \ + ), \ + TP_ARGS(clp, cb)) + +DEFINE_NFSD_CB_LIFETIME_EVENT(queue); +DEFINE_NFSD_CB_LIFETIME_EVENT(destroy); +DEFINE_NFSD_CB_LIFETIME_EVENT(restart); +DEFINE_NFSD_CB_LIFETIME_EVENT(bc_update); +DEFINE_NFSD_CB_LIFETIME_EVENT(bc_shutdown); + +TRACE_EVENT(nfsd_cb_seq_status, + TP_PROTO( + const struct rpc_task *task, + const struct nfsd4_callback *cb + ), + TP_ARGS(task, cb), + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, seqno) + __field(u32, reserved) + __field(int, tk_status) + __field(int, seq_status) + ), + TP_fast_assign( + const struct nfs4_client *clp = cb->cb_clp; + const struct nfsd4_session *session = clp->cl_cb_session; + const struct nfsd4_sessionid *sid = + (struct nfsd4_sessionid *)&session->se_sessionid; + + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client ? + task->tk_client->cl_clid : -1; + __entry->cl_boot = sid->clientid.cl_boot; + __entry->cl_id = sid->clientid.cl_id; + __entry->seqno = sid->sequence; + __entry->reserved = sid->reserved; + __entry->tk_status = task->tk_status; + __entry->seq_status = cb->cb_seq_status; + ), + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " sessionid=%08x:%08x:%08x:%08x tk_status=%d seq_status=%d", + __entry->task_id, __entry->client_id, + __entry->cl_boot, __entry->cl_id, + __entry->seqno, __entry->reserved, + __entry->tk_status, __entry->seq_status + ) +); + +TRACE_EVENT(nfsd_cb_free_slot, + TP_PROTO( + const struct rpc_task *task, + const struct nfsd4_callback *cb + ), + TP_ARGS(task, cb), + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, seqno) + __field(u32, reserved) + __field(u32, slot_seqno) + ), + TP_fast_assign( + const struct nfs4_client *clp = cb->cb_clp; + const struct nfsd4_session *session = clp->cl_cb_session; + const struct nfsd4_sessionid *sid = + (struct nfsd4_sessionid *)&session->se_sessionid; + + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client ? + task->tk_client->cl_clid : -1; + __entry->cl_boot = sid->clientid.cl_boot; + __entry->cl_id = sid->clientid.cl_id; + __entry->seqno = sid->sequence; + __entry->reserved = sid->reserved; + __entry->slot_seqno = session->se_cb_seq_nr; + ), + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " sessionid=%08x:%08x:%08x:%08x new slot seqno=%u", + __entry->task_id, __entry->client_id, + __entry->cl_boot, __entry->cl_id, + __entry->seqno, __entry->reserved, + __entry->slot_seqno + ) +); + TRACE_EVENT_CONDITION(nfsd_cb_recall, TP_PROTO( const struct nfs4_stid *stid @@ -1582,6 +1851,7 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_done); DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done); DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done); DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done); +DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_getattr_done); TRACE_EVENT(nfsd_cb_recall_any_done, TP_PROTO( @@ -1616,7 +1886,7 @@ TRACE_EVENT(nfsd_ctl_unlock_ip, ), TP_fast_assign( __entry->netns_ino = net->ns.inum; - __assign_str(address, address); + __assign_str(address); ), TP_printk("address=%s", __get_str(address) @@ -1635,7 +1905,7 @@ TRACE_EVENT(nfsd_ctl_unlock_fs, ), TP_fast_assign( __entry->netns_ino = net->ns.inum; - __assign_str(path, path); + __assign_str(path); ), TP_printk("path=%s", __get_str(path) @@ -1659,8 +1929,8 @@ TRACE_EVENT(nfsd_ctl_filehandle, TP_fast_assign( __entry->netns_ino = net->ns.inum; __entry->maxsize = maxsize; - __assign_str(domain, domain); - __assign_str(path, path); + __assign_str(domain); + __assign_str(path); ), TP_printk("domain=%s path=%s maxsize=%d", __get_str(domain), __get_str(path), __entry->maxsize @@ -1720,7 +1990,7 @@ TRACE_EVENT(nfsd_ctl_version, ), TP_fast_assign( __entry->netns_ino = net->ns.inum; - __assign_str(mesg, mesg); + __assign_str(mesg); ), TP_printk("%s", __get_str(mesg) @@ -1761,7 +2031,7 @@ TRACE_EVENT(nfsd_ctl_ports_addxprt, TP_fast_assign( __entry->netns_ino = net->ns.inum; __entry->port = port; - __assign_str(transport, transport); + __assign_str(transport); ), TP_printk("transport=%s port=%d", __get_str(transport), __entry->port @@ -1822,9 +2092,9 @@ TRACE_EVENT(nfsd_ctl_time, TP_fast_assign( __entry->netns_ino = net->ns.inum; __entry->time = time; - __assign_str_len(name, name, namelen); + __assign_str(name); ), - TP_printk("file=%s time=%d\n", + TP_printk("file=%s time=%d", __get_str(name), __entry->time ) ); @@ -1841,7 +2111,7 @@ TRACE_EVENT(nfsd_ctl_recoverydir, ), TP_fast_assign( __entry->netns_ino = net->ns.inum; - __assign_str(recdir, recdir); + __assign_str(recdir); ), TP_printk("recdir=%s", __get_str(recdir) @@ -1863,6 +2133,174 @@ TRACE_EVENT(nfsd_end_grace, ) ); +DECLARE_EVENT_CLASS(nfsd_copy_class, + TP_PROTO( + const struct nfsd4_copy *copy + ), + TP_ARGS(copy), + TP_STRUCT__entry( + __field(bool, intra) + __field(bool, async) + __field(u32, src_cl_boot) + __field(u32, src_cl_id) + __field(u32, src_so_id) + __field(u32, src_si_generation) + __field(u32, dst_cl_boot) + __field(u32, dst_cl_id) + __field(u32, dst_so_id) + __field(u32, dst_si_generation) + __field(u32, cb_cl_boot) + __field(u32, cb_cl_id) + __field(u32, cb_so_id) + __field(u32, cb_si_generation) + __field(u64, src_cp_pos) + __field(u64, dst_cp_pos) + __field(u64, cp_count) + __sockaddr(addr, sizeof(struct sockaddr_in6)) + ), + TP_fast_assign( + const stateid_t *src_stp = ©->cp_src_stateid; + const stateid_t *dst_stp = ©->cp_dst_stateid; + const stateid_t *cb_stp = ©->cp_res.cb_stateid; + + __entry->intra = test_bit(NFSD4_COPY_F_INTRA, ©->cp_flags); + __entry->async = !test_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); + __entry->src_cl_boot = src_stp->si_opaque.so_clid.cl_boot; + __entry->src_cl_id = src_stp->si_opaque.so_clid.cl_id; + __entry->src_so_id = src_stp->si_opaque.so_id; + __entry->src_si_generation = src_stp->si_generation; + __entry->dst_cl_boot = dst_stp->si_opaque.so_clid.cl_boot; + __entry->dst_cl_id = dst_stp->si_opaque.so_clid.cl_id; + __entry->dst_so_id = dst_stp->si_opaque.so_id; + __entry->dst_si_generation = dst_stp->si_generation; + __entry->cb_cl_boot = cb_stp->si_opaque.so_clid.cl_boot; + __entry->cb_cl_id = cb_stp->si_opaque.so_clid.cl_id; + __entry->cb_so_id = cb_stp->si_opaque.so_id; + __entry->cb_si_generation = cb_stp->si_generation; + __entry->src_cp_pos = copy->cp_src_pos; + __entry->dst_cp_pos = copy->cp_dst_pos; + __entry->cp_count = copy->cp_count; + __assign_sockaddr(addr, ©->cp_clp->cl_addr, + sizeof(struct sockaddr_in6)); + ), + TP_printk("client=%pISpc intra=%d async=%d " + "src_client %08x:%08x src_stateid %08x:%08x " + "dst_client %08x:%08x dst_stateid %08x:%08x " + "cb_client %08x:%08x cb_stateid %08x:%08x " + "cp_src_pos=%llu cp_dst_pos=%llu cp_count=%llu", + __get_sockaddr(addr), __entry->intra, __entry->async, + __entry->src_cl_boot, __entry->src_cl_id, + __entry->src_so_id, __entry->src_si_generation, + __entry->dst_cl_boot, __entry->dst_cl_id, + __entry->dst_so_id, __entry->dst_si_generation, + __entry->cb_cl_boot, __entry->cb_cl_id, + __entry->cb_so_id, __entry->cb_si_generation, + __entry->src_cp_pos, __entry->dst_cp_pos, __entry->cp_count + ) +); + +#define DEFINE_COPY_EVENT(name) \ +DEFINE_EVENT(nfsd_copy_class, nfsd_copy_##name, \ + TP_PROTO(const struct nfsd4_copy *copy), \ + TP_ARGS(copy)) + +DEFINE_COPY_EVENT(inter); +DEFINE_COPY_EVENT(intra); +DEFINE_COPY_EVENT(async); + +TRACE_EVENT(nfsd_copy_done, + TP_PROTO( + const struct nfsd4_copy *copy, + __be32 status + ), + TP_ARGS(copy, status), + TP_STRUCT__entry( + __field(int, status) + __field(bool, intra) + __field(bool, async) + __sockaddr(addr, sizeof(struct sockaddr_in6)) + ), + TP_fast_assign( + __entry->status = be32_to_cpu(status); + __entry->intra = test_bit(NFSD4_COPY_F_INTRA, ©->cp_flags); + __entry->async = !test_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); + __assign_sockaddr(addr, ©->cp_clp->cl_addr, + sizeof(struct sockaddr_in6)); + ), + TP_printk("addr=%pISpc status=%d intra=%d async=%d", + __get_sockaddr(addr), __entry->status, __entry->intra, __entry->async + ) +); + +TRACE_EVENT(nfsd_copy_async_done, + TP_PROTO( + const struct nfsd4_copy *copy + ), + TP_ARGS(copy), + TP_STRUCT__entry( + __field(int, status) + __field(bool, intra) + __field(bool, async) + __field(u32, src_cl_boot) + __field(u32, src_cl_id) + __field(u32, src_so_id) + __field(u32, src_si_generation) + __field(u32, dst_cl_boot) + __field(u32, dst_cl_id) + __field(u32, dst_so_id) + __field(u32, dst_si_generation) + __field(u32, cb_cl_boot) + __field(u32, cb_cl_id) + __field(u32, cb_so_id) + __field(u32, cb_si_generation) + __field(u64, src_cp_pos) + __field(u64, dst_cp_pos) + __field(u64, cp_count) + __sockaddr(addr, sizeof(struct sockaddr_in6)) + ), + TP_fast_assign( + const stateid_t *src_stp = ©->cp_src_stateid; + const stateid_t *dst_stp = ©->cp_dst_stateid; + const stateid_t *cb_stp = ©->cp_res.cb_stateid; + + __entry->status = be32_to_cpu(copy->nfserr); + __entry->intra = test_bit(NFSD4_COPY_F_INTRA, ©->cp_flags); + __entry->async = !test_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); + __entry->src_cl_boot = src_stp->si_opaque.so_clid.cl_boot; + __entry->src_cl_id = src_stp->si_opaque.so_clid.cl_id; + __entry->src_so_id = src_stp->si_opaque.so_id; + __entry->src_si_generation = src_stp->si_generation; + __entry->dst_cl_boot = dst_stp->si_opaque.so_clid.cl_boot; + __entry->dst_cl_id = dst_stp->si_opaque.so_clid.cl_id; + __entry->dst_so_id = dst_stp->si_opaque.so_id; + __entry->dst_si_generation = dst_stp->si_generation; + __entry->cb_cl_boot = cb_stp->si_opaque.so_clid.cl_boot; + __entry->cb_cl_id = cb_stp->si_opaque.so_clid.cl_id; + __entry->cb_so_id = cb_stp->si_opaque.so_id; + __entry->cb_si_generation = cb_stp->si_generation; + __entry->src_cp_pos = copy->cp_src_pos; + __entry->dst_cp_pos = copy->cp_dst_pos; + __entry->cp_count = copy->cp_count; + __assign_sockaddr(addr, ©->cp_clp->cl_addr, + sizeof(struct sockaddr_in6)); + ), + TP_printk("client=%pISpc status=%d intra=%d async=%d " + "src_client %08x:%08x src_stateid %08x:%08x " + "dst_client %08x:%08x dst_stateid %08x:%08x " + "cb_client %08x:%08x cb_stateid %08x:%08x " + "cp_src_pos=%llu cp_dst_pos=%llu cp_count=%llu", + __get_sockaddr(addr), + __entry->status, __entry->intra, __entry->async, + __entry->src_cl_boot, __entry->src_cl_id, + __entry->src_so_id, __entry->src_si_generation, + __entry->dst_cl_boot, __entry->dst_cl_id, + __entry->dst_so_id, __entry->dst_si_generation, + __entry->cb_cl_boot, __entry->cb_cl_id, + __entry->cb_so_id, __entry->cb_si_generation, + __entry->src_cp_pos, __entry->dst_cp_pos, __entry->cp_count + ) +); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index b3e51d88faff..ca29a5e1600f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -25,7 +25,6 @@ #include <linux/posix_acl_xattr.h> #include <linux/xattr.h> #include <linux/jhash.h> -#include <linux/ima.h> #include <linux/pagemap.h> #include <linux/slab.h> #include <linux/uaccess.h> @@ -338,6 +337,24 @@ out: return err; } +static void +commit_reset_write_verifier(struct nfsd_net *nn, struct svc_rqst *rqstp, + int err) +{ + switch (err) { + case -EAGAIN: + case -ESTALE: + /* + * Neither of these are the result of a problem with + * durable storage, so avoid a write verifier reset. + */ + break; + default: + nfsd_reset_write_verifier(nn); + trace_nfsd_writeverf_reset(nn, rqstp, err); + } +} + /* * Commit metadata changes to stable storage. */ @@ -405,8 +422,9 @@ nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, if (iap->ia_size < inode->i_size) { __be32 err; - err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, - NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE); + err = nfsd_permission(&rqstp->rq_cred, + fhp->fh_export, fhp->fh_dentry, + NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE); if (err) return err; } @@ -459,7 +477,6 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap) * @rqstp: controlling RPC transaction * @fhp: filehandle of target * @attr: attributes to set - * @check_guard: set to 1 if guardtime is a valid timestamp * @guardtime: do not act if ctime.tv_sec does not match this timestamp * * This call may adjust the contents of @attr (in particular, this @@ -471,8 +488,7 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap) */ __be32 nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct nfsd_attrs *attr, - int check_guard, time64_t guardtime) + struct nfsd_attrs *attr, const struct timespec64 *guardtime) { struct dentry *dentry; struct inode *inode; @@ -480,7 +496,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, int accmode = NFSD_MAY_SATTR; umode_t ftype = 0; __be32 err; - int host_err; + int host_err = 0; bool get_write_count; bool size_change = (iap->ia_valid & ATTR_SIZE); int retries; @@ -521,9 +537,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_sanitize_attrs(inode, iap); - if (check_guard && guardtime != inode_get_ctime(inode).tv_sec) - return nfserr_notsync; - /* * The size case is special, it changes the file in addition to the * attributes, and file systems don't expect it to be mixed with @@ -538,6 +551,19 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, } inode_lock(inode); + err = fh_fill_pre_attrs(fhp); + if (err) + goto out_unlock; + + if (guardtime) { + struct timespec64 ctime = inode_get_ctime(inode); + if ((u32)guardtime->tv_sec != (u32)ctime.tv_sec || + guardtime->tv_nsec != ctime.tv_nsec) { + err = nfserr_notsync; + goto out_fill_attrs; + } + } + for (retries = 1;;) { struct iattr attrs; @@ -565,13 +591,23 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, attr->na_aclerr = set_posix_acl(&nop_mnt_idmap, dentry, ACL_TYPE_DEFAULT, attr->na_dpacl); +out_fill_attrs: + /* + * RFC 1813 Section 3.3.2 does not mandate that an NFS server + * returns wcc_data for SETATTR. Some client implementations + * depend on receiving wcc_data, however, to sort out partial + * updates (eg., the client requested that size and mode be + * modified, but the server changed only the file mode). + */ + fh_fill_post_attrs(fhp); +out_unlock: inode_unlock(inode); if (size_change) put_write_access(inode); out: if (!host_err) host_err = commit_metadata(fhp); - return nfserrno(host_err); + return err != 0 ? err : nfserrno(host_err); } #if defined(CONFIG_NFSD_V4) @@ -648,8 +684,7 @@ __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp, &nfsd4_get_cstate(rqstp)->current_fh, dst_pos, count, status); - nfsd_reset_write_verifier(nn); - trace_nfsd_writeverf_reset(nn, rqstp, status); + commit_reset_write_verifier(nn, rqstp, status); ret = nfserrno(status); } } @@ -781,7 +816,8 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor sresult |= map->access; - err2 = nfsd_permission(rqstp, export, dentry, map->how); + err2 = nfsd_permission(&rqstp->rq_cred, export, + dentry, map->how); switch (err2) { case nfs_ok: result |= map->access; @@ -861,17 +897,12 @@ __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, goto out; } - host_err = ima_file_check(file, may_flags); + host_err = security_file_post_open(file, may_flags); if (host_err) { fput(file); goto out; } - if (may_flags & NFSD_MAY_64BIT_COOKIE) - file->f_mode |= FMODE_64BITHASH; - else - file->f_mode |= FMODE_32BITHASH; - *filp = file; out: return host_err; @@ -1025,7 +1056,10 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, ssize_t host_err; trace_nfsd_read_splice(rqstp, fhp, offset, *count); - host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); + host_err = rw_verify_area(READ, file, &offset, *count); + if (!host_err) + host_err = splice_direct_to_actor(file, &sd, + nfsd_direct_splice_actor); return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); } @@ -1124,7 +1158,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, errseq_t since; __be32 nfserr; int host_err; - int use_wgather; loff_t pos = offset; unsigned long exp_op_flags = 0; unsigned int pflags = current->flags; @@ -1150,24 +1183,20 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, } exp = fhp->fh_export; - use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); if (!EX_ISSYNC(exp)) stable = NFS_UNSTABLE; - if (stable && !use_wgather) + if (stable && !fhp->fh_use_wgather) flags |= RWF_SYNC; iov_iter_kvec(&iter, ITER_SOURCE, vec, vlen, *cnt); since = READ_ONCE(file->f_wb_err); if (verf) nfsd_copy_write_verifier(verf, nn); - file_start_write(file); host_err = vfs_iter_write(file, &iter, &pos, flags); - file_end_write(file); if (host_err < 0) { - nfsd_reset_write_verifier(nn); - trace_nfsd_writeverf_reset(nn, rqstp, host_err); + commit_reset_write_verifier(nn, rqstp, host_err); goto out_nfserr; } *cnt = host_err; @@ -1177,12 +1206,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, if (host_err < 0) goto out_nfserr; - if (stable && use_wgather) { + if (stable && fhp->fh_use_wgather) { host_err = wait_for_concurrent_writes(file); - if (host_err < 0) { - nfsd_reset_write_verifier(nn); - trace_nfsd_writeverf_reset(nn, rqstp, host_err); - } + if (host_err < 0) + commit_reset_write_verifier(nn, rqstp, host_err); } out_nfserr: @@ -1199,6 +1226,30 @@ out_nfserr: } /** + * nfsd_read_splice_ok - check if spliced reading is supported + * @rqstp: RPC transaction context + * + * Return values: + * %true: nfsd_splice_read() may be used + * %false: nfsd_splice_read() must not be used + * + * NFS READ normally uses splice to send data in-place. However the + * data in cache can change after the reply's MIC is computed but + * before the RPC reply is sent. To prevent the client from + * rejecting the server-computed MIC in this somewhat rare case, do + * not use splice with the GSS integrity and privacy services. + */ +bool nfsd_read_splice_ok(struct svc_rqst *rqstp) +{ + switch (svc_auth_flavor(rqstp)) { + case RPC_AUTH_GSS_KRB5I: + case RPC_AUTH_GSS_KRB5P: + return false; + } + return true; +} + +/** * nfsd_read - Read data from a file * @rqstp: RPC transaction context * @fhp: file handle of file to be read @@ -1227,7 +1278,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, return err; file = nf->nf_file; - if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags)) + if (file->f_op->splice_read && nfsd_read_splice_ok(rqstp)) err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof); else err = nfsd_iter_read(rqstp, fhp, file, offset, count, 0, eof); @@ -1325,8 +1376,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, err = nfserr_notsupp; break; default: - nfsd_reset_write_verifier(nn); - trace_nfsd_writeverf_reset(nn, rqstp, err2); + commit_reset_write_verifier(nn, rqstp, err2); err = nfserrno(err2); } } else @@ -1368,8 +1418,8 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, * Callers expect new file metadata to be committed even * if the attributes have not changed. */ - if (iap->ia_valid) - status = nfsd_setattr(rqstp, resfhp, attrs, 0, (time64_t)0); + if (nfsd_attrs_valid(attrs)) + status = nfsd_setattr(rqstp, resfhp, attrs, NULL); else status = nfserrno(commit_metadata(resfhp)); @@ -1421,7 +1471,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, dirp = d_inode(dentry); dchild = dget(resfhp->fh_dentry); - err = nfsd_permission(rqstp, fhp->fh_export, dentry, NFSD_MAY_CREATE); + err = nfsd_permission(&rqstp->rq_cred, fhp->fh_export, dentry, + NFSD_MAY_CREATE); if (err) goto out; @@ -1713,10 +1764,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, if (!err) err = nfserrno(commit_metadata(tfhp)); } else { - if (host_err == -EXDEV && rqstp->rq_vers == 2) - err = nfserr_acces; - else - err = nfserrno(host_err); + err = nfserrno(host_err); } dput(dnew); out_drop_write: @@ -1782,7 +1830,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) goto out; - err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev; + err = nfserr_xdev; if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) goto out; if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) @@ -1796,6 +1844,10 @@ retry: } trap = lock_rename(tdentry, fdentry); + if (IS_ERR(trap)) { + err = nfserr_xdev; + goto out_want_write; + } err = fh_fill_pre_attrs(ffhp); if (err != nfs_ok) goto out_unlock; @@ -1864,13 +1916,14 @@ retry: } out_unlock: unlock_rename(tdentry, fdentry); +out_want_write: fh_drop_write(ffhp); /* - * If the target dentry has cached open files, then we need to try to - * close them prior to doing the rename. Flushing delayed fput - * shouldn't be done with locks held however, so we delay it until this - * point and then reattempt the whole shebang. + * If the target dentry has cached open files, then we need to + * try to close them prior to doing the rename. Final fput + * shouldn't be done with locks held however, so we delay it + * until this point and then reattempt the whole shebang. */ if (close_cached) { close_cached = false; @@ -1882,9 +1935,17 @@ out: return err; } -/* - * Unlink a file or directory - * N.B. After this call fhp needs an fh_put +/** + * nfsd_unlink - remove a directory entry + * @rqstp: RPC transaction context + * @fhp: the file handle of the parent directory to be modified + * @type: enforced file type of the object to be removed + * @fname: the name of directory entry to be removed + * @flen: length of @fname in octets + * + * After this call fhp needs an fh_put. + * + * Returns a generic NFS status code in network byte-order. */ __be32 nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, @@ -1958,18 +2019,17 @@ out_drop_write: fh_drop_write(fhp); out_nfserr: if (host_err == -EBUSY) { - /* name is mounted-on. There is no perfect - * error status. + /* + * See RFC 8881 Section 18.25.4 para 4: NFSv4 REMOVE + * wants a status unique to the object type. */ - if (nfsd_v4client(rqstp)) + if (type != S_IFDIR) err = nfserr_file_open; else err = nfserr_acces; - } else { - err = nfserrno(host_err); } out: - return err; + return err != nfs_ok ? err : nfserrno(host_err); out_unlock: inode_unlock(dirp); goto out_drop_write; @@ -2092,9 +2152,23 @@ static __be32 nfsd_buffered_readdir(struct file *file, struct svc_fh *fhp, return cdp->err; } -/* - * Read entries from a directory. - * The NFSv3/4 verifier we ignore for now. +/** + * nfsd_readdir - Read entries from a directory + * @rqstp: RPC transaction context + * @fhp: NFS file handle of directory to be read + * @offsetp: OUT: seek offset of final entry that was read + * @cdp: OUT: an eof error value + * @func: entry filler actor + * + * This implementation ignores the NFSv3/4 verifier cookie. + * + * NB: normal system calls hold file->f_pos_lock when calling + * ->iterate_shared and ->llseek, but nfsd_readdir() does not. + * Because the struct file acquired here is not visible to other + * threads, it's internal state does not need mutex protection. + * + * Returns nfs_ok on success, otherwise an nfsstat code is + * returned. */ __be32 nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, @@ -2105,14 +2179,15 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, loff_t offset = *offsetp; int may_flags = NFSD_MAY_READ; - /* NFSv2 only supports 32 bit cookies */ - if (rqstp->rq_vers > 2) - may_flags |= NFSD_MAY_64BIT_COOKIE; - err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file); if (err) goto out; + if (fhp->fh_64bit_cookies) + file->f_mode |= FMODE_64BITHASH; + else + file->f_mode |= FMODE_32BITHASH; + offset = vfs_llseek(file, offset, SEEK_SET); if (offset < 0) { err = nfserrno((int)offset); @@ -2124,11 +2199,43 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, if (err == nfserr_eof || err == nfserr_toosmall) err = nfs_ok; /* can still be found in ->err */ out_close: - fput(file); + nfsd_filp_close(file); out: return err; } +/** + * nfsd_filp_close: close a file synchronously + * @fp: the file to close + * + * nfsd_filp_close() is similar in behaviour to filp_close(). + * The difference is that if this is the final close on the + * file, the that finalisation happens immediately, rather then + * being handed over to a work_queue, as it the case for + * filp_close(). + * When a user-space process closes a file (even when using + * filp_close() the finalisation happens before returning to + * userspace, so it is effectively synchronous. When a kernel thread + * uses file_close(), on the other hand, the handling is completely + * asynchronous. This means that any cost imposed by that finalisation + * is not imposed on the nfsd thread, and nfsd could potentually + * close files more quickly than the work queue finalises the close, + * which would lead to unbounded growth in the queue. + * + * In some contexts is it not safe to synchronously wait for + * close finalisation (see comment for __fput_sync()), but nfsd + * does not match those contexts. In partcilarly it does not, at the + * time that this function is called, hold and locks and no finalisation + * of any file, socket, or device driver would have any cause to wait + * for nfsd to make progress. + */ +void nfsd_filp_close(struct file *fp) +{ + get_file(fp); + filp_close(fp, NULL); + __fput_sync(fp); +} + /* * Get file system stats * N.B. After this call fhp needs an fh_put @@ -2150,9 +2257,9 @@ nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, in return err; } -static int exp_rdonly(struct svc_rqst *rqstp, struct svc_export *exp) +static int exp_rdonly(struct svc_cred *cred, struct svc_export *exp) { - return nfsexp_flags(rqstp, exp) & NFSEXP_READONLY; + return nfsexp_flags(cred, exp) & NFSEXP_READONLY; } #ifdef CONFIG_NFSD_V4 @@ -2396,8 +2503,8 @@ out_unlock: * Check for a user's access permissions to this inode. */ __be32 -nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, - struct dentry *dentry, int acc) +nfsd_permission(struct svc_cred *cred, struct svc_export *exp, + struct dentry *dentry, int acc) { struct inode *inode = d_inode(dentry); int err; @@ -2428,7 +2535,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, */ if (!(acc & NFSD_MAY_LOCAL_ACCESS)) if (acc & (NFSD_MAY_WRITE | NFSD_MAY_SATTR | NFSD_MAY_TRUNC)) { - if (exp_rdonly(rqstp, exp) || + if (exp_rdonly(cred, exp) || __mnt_is_readonly(exp->ex_path.mnt)) return nfserr_rofs; if (/* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE(inode)) diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index e3c29596f4df..3ff146522556 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -33,6 +33,8 @@ #define NFSD_MAY_64BIT_COOKIE 0x1000 /* 64 bit readdir cookies for >= NFSv3 */ +#define NFSD_MAY_LOCALIO 0x2000 /* for tracing, reflects when localio used */ + #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) @@ -60,6 +62,14 @@ static inline void nfsd_attrs_free(struct nfsd_attrs *attrs) posix_acl_release(attrs->na_dpacl); } +static inline bool nfsd_attrs_valid(struct nfsd_attrs *attrs) +{ + struct iattr *iap = attrs->na_iattr; + + return (iap->ia_valid || (attrs->na_seclabel && + attrs->na_seclabel->len)); +} + __be32 nfserrno (int errno); int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, struct svc_export **expp); @@ -69,7 +79,7 @@ __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *, const char *, unsigned int, struct svc_export **, struct dentry **); __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, - struct nfsd_attrs *, int, time64_t); + struct nfsd_attrs *, const struct timespec64 *); int nfsd_mountpoint(struct dentry *, struct svc_export *); #ifdef CONFIG_NFSD_V4 __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *, @@ -114,6 +124,7 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, unsigned long *count, unsigned int base, u32 *eof); +bool nfsd_read_splice_ok(struct svc_rqst *rqstp); __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, unsigned long *count, u32 *eof); @@ -144,8 +155,10 @@ __be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, struct kstatfs *, int access); -__be32 nfsd_permission(struct svc_rqst *, struct svc_export *, - struct dentry *, int); +__be32 nfsd_permission(struct svc_cred *cred, struct svc_export *exp, + struct dentry *dentry, int acc); + +void nfsd_filp_close(struct file *fp); static inline int fh_want_write(struct svc_fh *fh) { diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 03fe4e21306c..522067b7fd75 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -14,7 +14,7 @@ struct nfsd3_sattrargs { struct svc_fh fh; struct iattr attrs; int check_guard; - time64_t guardtime; + struct timespec64 guardtime; }; struct nfsd3_diropargs { diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 144e05efd14c..2a21a7662e03 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -50,6 +50,134 @@ #define HAS_CSTATE_FLAG(c, f) ((c)->sid_flags & (f)) #define CLEAR_CSTATE_FLAG(c, f) ((c)->sid_flags &= ~(f)) +/** + * nfsd4_encode_bool - Encode an XDR bool type result + * @xdr: target XDR stream + * @val: boolean value to encode + * + * Return values: + * %nfs_ok: @val encoded; @xdr advanced to next position + * %nfserr_resource: stream buffer space exhausted + */ +static __always_inline __be32 +nfsd4_encode_bool(struct xdr_stream *xdr, bool val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(p == NULL)) + return nfserr_resource; + *p = val ? xdr_one : xdr_zero; + return nfs_ok; +} + +/** + * nfsd4_encode_uint32_t - Encode an XDR uint32_t type result + * @xdr: target XDR stream + * @val: integer value to encode + * + * Return values: + * %nfs_ok: @val encoded; @xdr advanced to next position + * %nfserr_resource: stream buffer space exhausted + */ +static __always_inline __be32 +nfsd4_encode_uint32_t(struct xdr_stream *xdr, u32 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(p == NULL)) + return nfserr_resource; + *p = cpu_to_be32(val); + return nfs_ok; +} + +#define nfsd4_encode_aceflag4(x, v) nfsd4_encode_uint32_t(x, v) +#define nfsd4_encode_acemask4(x, v) nfsd4_encode_uint32_t(x, v) +#define nfsd4_encode_acetype4(x, v) nfsd4_encode_uint32_t(x, v) +#define nfsd4_encode_count4(x, v) nfsd4_encode_uint32_t(x, v) +#define nfsd4_encode_mode4(x, v) nfsd4_encode_uint32_t(x, v) +#define nfsd4_encode_nfs_lease4(x, v) nfsd4_encode_uint32_t(x, v) +#define nfsd4_encode_qop4(x, v) nfsd4_encode_uint32_t(x, v) +#define nfsd4_encode_sequenceid4(x, v) nfsd4_encode_uint32_t(x, v) +#define nfsd4_encode_slotid4(x, v) nfsd4_encode_uint32_t(x, v) + +/** + * nfsd4_encode_uint64_t - Encode an XDR uint64_t type result + * @xdr: target XDR stream + * @val: integer value to encode + * + * Return values: + * %nfs_ok: @val encoded; @xdr advanced to next position + * %nfserr_resource: stream buffer space exhausted + */ +static __always_inline __be32 +nfsd4_encode_uint64_t(struct xdr_stream *xdr, u64 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT * 2); + + if (unlikely(p == NULL)) + return nfserr_resource; + put_unaligned_be64(val, p); + return nfs_ok; +} + +#define nfsd4_encode_changeid4(x, v) nfsd4_encode_uint64_t(x, v) +#define nfsd4_encode_nfs_cookie4(x, v) nfsd4_encode_uint64_t(x, v) +#define nfsd4_encode_length4(x, v) nfsd4_encode_uint64_t(x, v) +#define nfsd4_encode_offset4(x, v) nfsd4_encode_uint64_t(x, v) + +/** + * nfsd4_encode_opaque_fixed - Encode a fixed-length XDR opaque type result + * @xdr: target XDR stream + * @data: pointer to data + * @size: length of data in bytes + * + * Return values: + * %nfs_ok: @data encoded; @xdr advanced to next position + * %nfserr_resource: stream buffer space exhausted + */ +static __always_inline __be32 +nfsd4_encode_opaque_fixed(struct xdr_stream *xdr, const void *data, + size_t size) +{ + __be32 *p = xdr_reserve_space(xdr, xdr_align_size(size)); + size_t pad = xdr_pad_size(size); + + if (unlikely(p == NULL)) + return nfserr_resource; + memcpy(p, data, size); + if (pad) + memset((char *)p + size, 0, pad); + return nfs_ok; +} + +/** + * nfsd4_encode_opaque - Encode a variable-length XDR opaque type result + * @xdr: target XDR stream + * @data: pointer to data + * @size: length of data in bytes + * + * Return values: + * %nfs_ok: @data encoded; @xdr advanced to next position + * %nfserr_resource: stream buffer space exhausted + */ +static __always_inline __be32 +nfsd4_encode_opaque(struct xdr_stream *xdr, const void *data, size_t size) +{ + size_t pad = xdr_pad_size(size); + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT + xdr_align_size(size)); + if (unlikely(p == NULL)) + return nfserr_resource; + *p++ = cpu_to_be32(size); + memcpy(p, data, size); + if (pad) + memset((char *)p + size, 0, pad); + return nfs_ok; +} + +#define nfsd4_encode_component4(x, d, s) nfsd4_encode_opaque(x, d, s) + struct nfsd4_compound_state { struct svc_fh current_fh; struct svc_fh save_fh; @@ -170,12 +298,8 @@ struct nfsd4_lock { } v; /* response */ - union { - struct { - stateid_t stateid; - } ok; - struct nfsd4_lock_denied denied; - } u; + stateid_t lk_resp_stateid; + struct nfsd4_lock_denied lk_denied; }; #define lk_new_open_seqid v.new.open_seqid #define lk_new_open_stateid v.new.open_stateid @@ -185,20 +309,15 @@ struct nfsd4_lock { #define lk_old_lock_stateid v.old.lock_stateid #define lk_old_lock_seqid v.old.lock_seqid -#define lk_resp_stateid u.ok.stateid -#define lk_denied u.denied - - struct nfsd4_lockt { u32 lt_type; clientid_t lt_clientid; struct xdr_netobj lt_owner; u64 lt_offset; u64 lt_length; - struct nfsd4_lock_denied lt_denied; + struct nfsd4_lock_denied lt_denied; }; - struct nfsd4_locku { u32 lu_type; u32 lu_seqid; @@ -267,9 +386,9 @@ struct nfsd4_open { u32 op_deleg_want; /* request */ stateid_t op_stateid; /* response */ __be32 op_xdr_error; /* see nfsd4_open_omfg() */ - u32 op_recall; /* recall */ struct nfsd4_change_info op_cinfo; /* response */ u32 op_rflags; /* response */ + bool op_recall; /* response */ bool op_truncate; /* used during processing */ bool op_created; /* used during processing */ struct nfs4_openowner *op_openowner; /* used during processing */ @@ -399,6 +518,24 @@ struct nfsd4_free_stateid { stateid_t fr_stateid; /* request */ }; +struct nfsd4_get_dir_delegation { + /* request */ + u32 gdda_signal_deleg_avail; + u32 gdda_notification_types[1]; + struct timespec64 gdda_child_attr_delay; + struct timespec64 gdda_dir_attr_delay; + u32 gdda_child_attributes[3]; + u32 gdda_dir_attributes[3]; + /* response */ + u32 gddrnf_status; + nfs4_verifier gddr_cookieverf; + stateid_t gddr_stateid; + u32 gddr_notification[1]; + u32 gddr_child_attributes[3]; + u32 gddr_dir_attributes[3]; + bool gddrnf_will_signal_deleg_avail; +}; + /* also used for NVERIFY */ struct nfsd4_verify { u32 ve_bmval[3]; /* request */ @@ -496,7 +633,7 @@ struct nfsd4_layoutcommit { u32 lc_layout_type; /* request */ u32 lc_up_len; /* layout length */ void *lc_up_layout; /* decoded by callback */ - u32 lc_size_chg; /* boolean for response */ + bool lc_size_chg; /* response */ u64 lc_newsize; /* response */ }; @@ -508,7 +645,7 @@ struct nfsd4_layoutreturn { u32 lrf_body_len; /* request */ void *lrf_body; /* request */ stateid_t lr_sid; /* request/response */ - u32 lrs_present; /* response */ + bool lrs_present; /* response */ }; struct nfsd4_fallocate { @@ -555,8 +692,10 @@ struct nfsd4_copy { #define NFSD4_COPY_F_INTRA (1) #define NFSD4_COPY_F_SYNCHRONOUS (2) #define NFSD4_COPY_F_COMMITTED (3) +#define NFSD4_COPY_F_COMPLETED (4) /* response */ + __be32 nfserr; struct nfsd42_write_res cp_res; struct knfsd_fh fh; @@ -617,7 +756,8 @@ struct nfsd4_offload_status { /* response */ u64 count; - u32 status; + __be32 status; + bool completed; }; struct nfsd4_copy_notify { @@ -627,8 +767,7 @@ struct nfsd4_copy_notify { /* response */ stateid_t cpn_cnr_stateid; - u64 cpn_sec; - u32 cpn_nsec; + struct timespec64 cpn_lease_time; struct nl4_server *cpn_src; }; @@ -680,6 +819,7 @@ struct nfsd4_op { struct nfsd4_reclaim_complete reclaim_complete; struct nfsd4_test_stateid test_stateid; struct nfsd4_free_stateid free_stateid; + struct nfsd4_get_dir_delegation get_dir_delegation; struct nfsd4_getdeviceinfo getdeviceinfo; struct nfsd4_layoutget layoutget; struct nfsd4_layoutcommit layoutcommit; @@ -723,6 +863,7 @@ struct nfsd4_compoundargs { u32 minorversion; u32 client_opcnt; u32 opcnt; + bool splice_ok; struct nfsd4_op *ops; struct nfsd4_op iops[8]; }; @@ -821,8 +962,10 @@ extern __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp, struct nfsd4_compound_state *, union nfsd4_op_u *u); extern __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *, union nfsd4_op_u *u); +extern void nfsd4_lock_release(union nfsd4_op_u *u); extern __be32 nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *, union nfsd4_op_u *u); +extern void nfsd4_lockt_release(union nfsd4_op_u *u); extern __be32 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *, union nfsd4_op_u *u); extern __be32 diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h index 0d39af1b00a0..e8b00309c449 100644 --- a/fs/nfsd/xdr4cb.h +++ b/fs/nfsd/xdr4cb.h @@ -54,3 +54,21 @@ #define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \ cb_sequence_dec_sz + \ op_dec_sz) + +/* + * 1: CB_GETATTR opcode (32-bit) + * N: file_handle + * 1: number of entry in attribute array (32-bit) + * 1: entry 0 in attribute array (32-bit) + */ +#define NFS4_enc_cb_getattr_sz (cb_compound_enc_hdr_sz + \ + cb_sequence_enc_sz + \ + 1 + enc_nfs4_fh_sz + 1 + 1) +/* + * 4: fattr_bitmap_maxsz + * 1: attribute array len + * 2: change attr (64-bit) + * 2: size (64-bit) + */ +#define NFS4_dec_cb_getattr_sz (cb_compound_dec_hdr_sz + \ + cb_sequence_dec_sz + 4 + 1 + 2 + 2 + op_dec_sz) |