From b92dccf65bab3b6b7deb79ff3321dc256eb0f53b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:03 -0500 Subject: NFS: Fix a busy inodes issue... The nfs_open_context may live longer than the file descriptor that spawned it, so it needs to carry a reference to the vfsmount. If not, then generic_shutdown_super() may end up being called before reads and writes have been flushed out. Make a couple of functions static while we're at it... Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a77ee95b7efb..8d5b6691ae3d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -973,7 +973,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) return err; } -struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred) +static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred) { struct nfs_open_context *ctx; @@ -981,6 +981,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp if (ctx != NULL) { atomic_set(&ctx->count, 1); ctx->dentry = dget(dentry); + ctx->vfsmnt = mntget(mnt); ctx->cred = get_rpccred(cred); ctx->state = NULL; ctx->lockowner = current->files; @@ -1011,6 +1012,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx) if (ctx->cred != NULL) put_rpccred(ctx->cred); dput(ctx->dentry); + mntput(ctx->vfsmnt); kfree(ctx); } } @@ -1019,7 +1021,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx) * Ensure that mmap has a recent RPC credential for use when writing out * shared pages */ -void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) +static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) { struct inode *inode = filp->f_dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); @@ -1051,7 +1053,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c return ctx; } -void nfs_file_clear_open_context(struct file *filp) +static void nfs_file_clear_open_context(struct file *filp) { struct inode *inode = filp->f_dentry->d_inode; struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data; @@ -1076,7 +1078,7 @@ int nfs_open(struct inode *inode, struct file *filp) cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); if (IS_ERR(cred)) return PTR_ERR(cred); - ctx = alloc_nfs_open_context(filp->f_dentry, cred); + ctx = alloc_nfs_open_context(filp->f_vfsmnt, filp->f_dentry, cred); put_rpccred(cred); if (ctx == NULL) return -ENOMEM; -- cgit v1.2.3 From 755c1e20cd2ad56e5c567fa05769eb98a3eef72b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:06 -0500 Subject: NFS: writes should not clobber utimes() calls Ensure that we flush out writes in the case when someone calls utimes() in order to set the file times. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 8d5b6691ae3d..5746dc841a6c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -859,11 +859,9 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) lock_kernel(); nfs_begin_data_update(inode); - /* Write all dirty data if we're changing file permissions or size */ - if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) { - filemap_write_and_wait(inode->i_mapping); - nfs_wb_all(inode); - } + /* Write all dirty data */ + filemap_write_and_wait(inode->i_mapping); + nfs_wb_all(inode); /* * Return any delegations if we're going to change ACLs */ -- cgit v1.2.3 From ca62b9c3f7b8679ada4de94d2ab7098c6860c3d7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:07 -0500 Subject: NFSv4: Don't invalidate cached attributes if change attribute is unchanged Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5746dc841a6c..0e1ef97bcf54 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1299,34 +1299,35 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if ((fattr->valid & NFS_ATTR_FATTR) == 0) return 0; + /* Has the inode gone and changed behind our back? */ + if (nfsi->fileid != fattr->fileid + || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { + return -EIO; + } + /* Are we in the process of updating data on the server? */ data_unstable = nfs_caches_unstable(inode); /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); - if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && - nfsi->change_attr != fattr->change_attr) { + if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0) { + if (nfsi->change_attr == fattr->change_attr) + goto out; nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (!data_unstable) nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } - /* Has the inode gone and changed behind our back? */ - if (nfsi->fileid != fattr->fileid - || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { - return -EIO; - } - - cur_size = i_size_read(inode); - new_isize = nfs_size_to_loff_t(fattr->size); - /* Verify a few of the more important attributes */ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (!data_unstable) nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } + + cur_size = i_size_read(inode); + new_isize = nfs_size_to_loff_t(fattr->size); if (cur_size != new_isize) { nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (nfsi->npages == 0) @@ -1343,6 +1344,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (inode->i_nlink != fattr->nlink) nfsi->cache_validity |= NFS_INO_INVALID_ATTR; +out: if (!timespec_equal(&inode->i_atime, &fattr->atime)) nfsi->cache_validity |= NFS_INO_INVALID_ATIME; @@ -1481,15 +1483,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->cache_change_attribute = jiffies; } - if ((fattr->valid & NFS_ATTR_FATTR_V4) - && nfsi->change_attr != fattr->change_attr) { - dprintk("NFS: change_attr change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); - nfsi->change_attr = fattr->change_attr; - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - nfsi->cache_change_attribute = jiffies; - } - /* If ctime has changed we should definitely clear access+acl caches */ if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; @@ -1519,6 +1512,17 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_blksize = fattr->du.nfs2.blocksize; } + if ((fattr->valid & NFS_ATTR_FATTR_V4)) { + if (nfsi->change_attr != fattr->change_attr) { + dprintk("NFS: change_attr change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); + nfsi->change_attr = fattr->change_attr; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + nfsi->cache_change_attribute = jiffies; + } else + invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA); + } + /* Update attrtimeo value if we're out of the unstable period */ if (invalid & NFS_INO_INVALID_ATTR) { nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); -- cgit v1.2.3 From fb374d24f225f38f13dbffb65dd7ec72daf08dba Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:08 -0500 Subject: NFS: reduce the number of false cache invalidations. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0e1ef97bcf54..24988f430e4b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1328,11 +1328,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); - if (cur_size != new_isize) { - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; - if (nfsi->npages == 0) - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; - } + if (cur_size != new_isize && nfsi->npages == 0) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) -- cgit v1.2.3 From 967b9281361481aecf323563886ef972ee88c681 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:09 -0500 Subject: NFSv4: Do not call rpciod_down() before call to destroy_nfsv4_state() The reason is that the idmapper cleanup may call flush_workqueue() on rpciod_workqueue. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 24988f430e4b..b81149eb26e5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2025,10 +2025,11 @@ static void nfs4_kill_super(struct super_block *sb) if (server->client != NULL && !IS_ERR(server->client)) rpc_shutdown_client(server->client); - rpciod_down(); /* release rpciod */ destroy_nfsv4_state(server); + rpciod_down(); + kfree(server->hostname); kfree(server); } -- cgit v1.2.3 From bd6475454c774bd9dbe6078d94bbf72b1d3b65f4 Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Mon, 20 Mar 2006 13:44:10 -0500 Subject: NFS: kzalloc conversion in fs/nfs this converts fs/nfs to kzalloc() usage. compile tested with make allyesconfig Signed-off-by: Eric Sesterhenn Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 4 +--- fs/nfs/inode.c | 6 ++---- fs/nfs/unlink.c | 3 +-- fs/nfs/write.c | 6 ++---- 4 files changed, 6 insertions(+), 13 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 32c95a0d93f3..b89d27f93d66 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -101,11 +101,9 @@ nfs_idmap_new(struct nfs4_client *clp) if (clp->cl_idmap != NULL) return; - if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) + if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) return; - memset(idmap, 0, sizeof(*idmap)); - snprintf(idmap->idmap_path, sizeof(idmap->idmap_path), "%s/idmap", clp->cl_rpcclient->cl_pathname); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b81149eb26e5..521d1dcb4cf0 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1638,10 +1638,9 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, #endif /* CONFIG_NFS_V3 */ s = ERR_PTR(-ENOMEM); - server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); + server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); if (!server) goto out_err; - memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); @@ -1942,10 +1941,9 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, return ERR_PTR(-EINVAL); } - server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); + server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); if (!server) return ERR_PTR(-ENOMEM); - memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index a65c7b53d558..0e28189c2151 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -163,10 +163,9 @@ nfs_async_unlink(struct dentry *dentry) struct rpc_clnt *clnt = NFS_CLIENT(dir->d_inode); int status = -ENOMEM; - data = kmalloc(sizeof(*data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) goto out; - memset(data, 0, sizeof(*data)); data->cred = rpcauth_lookupcred(clnt->cl_auth, 0); if (IS_ERR(data->cred)) { diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d6ad449041eb..92ecf24455c3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -100,10 +100,8 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) p->pagevec = &p->page_array[0]; else { size_t size = ++pagecount * sizeof(struct page *); - p->pagevec = kmalloc(size, GFP_NOFS); - if (p->pagevec) { - memset(p->pagevec, 0, size); - } else { + p->pagevec = kzalloc(size, GFP_NOFS); + if (!p->pagevec) { mempool_free(p, nfs_commit_mempool); p = NULL; } -- cgit v1.2.3 From 7a480e250c7ca9187275d8574ae9e48a6b602cb9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:12 -0500 Subject: NFS: show retransmit settings when displaying mount options Sometimes it's important to know the exact RPC retransmit settings the kernel is using for an NFS mount point. Add this facility to the NFS client's show_options method. Test plan: Set various retransmit settings via the mount command, and check that the settings are reflected in /proc/mounts. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 8 ++++++++ include/linux/nfs_fs_sb.h | 2 ++ 2 files changed, 10 insertions(+) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 521d1dcb4cf0..48683d4bf0f6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -396,6 +396,9 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans); + server->retrans_timeo = timeparms.to_initval; + server->retrans_count = timeparms.to_retries; + /* create transport and client */ xprt = xprt_create_proto(proto, &server->addr, &timeparms); if (IS_ERR(xprt)) { @@ -629,6 +632,8 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) proto = buf; } seq_printf(m, ",proto=%s", proto); + seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); + seq_printf(m, ",retrans=%u", nfss->retrans_count); seq_puts(m, ",addr="); seq_escape(m, nfss->hostname, " \t\n\\"); return 0; @@ -1800,6 +1805,9 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); + server->retrans_timeo = timeparms.to_initval; + server->retrans_count = timeparms.to_retries; + clp = nfs4_get_client(&server->addr.sin_addr); if (!clp) { dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 3d3a305488cf..a522ab97358d 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -26,6 +26,8 @@ struct nfs_server { unsigned int acregmax; unsigned int acdirmin; unsigned int acdirmax; + unsigned long retrans_timeo; /* retransmit timeout */ + unsigned int retrans_count; /* number of retransmit tries */ unsigned int namelen; char * hostname; /* remote hostname */ struct nfs_fh fh; -- cgit v1.2.3 From c8bded96aa8735823e53c95a26177987ebb19a90 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:13 -0500 Subject: NFS: clean up some mount options Get rid of "lock" and "posix", and spell out "vers=". Test plan: None. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 48683d4bf0f6..827d69255b1b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -591,10 +591,9 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) } nfs_info[] = { { NFS_MOUNT_SOFT, ",soft", ",hard" }, { NFS_MOUNT_INTR, ",intr", "" }, - { NFS_MOUNT_POSIX, ",posix", "" }, { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, - { NFS_MOUNT_NONLM, ",nolock", ",lock" }, + { NFS_MOUNT_NONLM, ",nolock", "" }, { NFS_MOUNT_NOACL, ",noacl", "" }, { 0, NULL, NULL } }; @@ -603,7 +602,7 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) char buf[12]; char *proto; - seq_printf(m, ",v%d", nfss->rpc_ops->version); + seq_printf(m, ",vers=%d", nfss->rpc_ops->version); seq_printf(m, ",rsize=%d", nfss->rsize); seq_printf(m, ",wsize=%d", nfss->wsize); if (nfss->acregmin != 3*HZ) -- cgit v1.2.3 From d9ef5a8c26aab09762afce43df64736720b4860e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:13 -0500 Subject: NFS: introduce mechanism for tracking NFS client metrics Add a per-superblock performance counter facility to the NFS client. This facility mimics the counters available for block devices and for networking. Expose these new counters via the new /proc/self/mountstats interface. Thanks to Andrew Morton and Trond Myklebust for their review and comments. Test plan: fsx and iozone on UP and SMP systems, with and without pre-emption. Watch for memory overwrite bugs, and performance loss (significantly more CPU required per op). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 103 +++++++++++++++++++++++++++++-- fs/nfs/iostat.h | 152 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nfs_fs_sb.h | 3 + 3 files changed, 252 insertions(+), 6 deletions(-) create mode 100644 fs/nfs/iostat.h (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 827d69255b1b..86b756f44e27 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -42,6 +42,7 @@ #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" +#include "iostat.h" #define NFSDBG_FACILITY NFSDBG_VFS #define NFS_PARANOIA 1 @@ -65,6 +66,7 @@ static void nfs_clear_inode(struct inode *); static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct super_block *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); +static int nfs_show_stats(struct seq_file *, struct vfsmount *); static void nfs_zap_acl_cache(struct inode *); static struct rpc_program nfs_program; @@ -78,6 +80,7 @@ static struct super_operations nfs_sops = { .clear_inode = nfs_clear_inode, .umount_begin = nfs_umount_begin, .show_options = nfs_show_options, + .show_stats = nfs_show_stats, }; /* @@ -290,6 +293,12 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) } sb->s_root->d_op = server->rpc_ops->dentry_ops; + server->io_stats = nfs_alloc_iostats(); + if (!server->io_stats) { + no_root_error = -ENOMEM; + goto out_no_root; + } + /* Get some general file system info */ if (server->namelen == 0 && server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) @@ -582,7 +591,7 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf) } -static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) +static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) { static struct proc_nfs_info { int flag; @@ -598,20 +607,19 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) { 0, NULL, NULL } }; struct proc_nfs_info *nfs_infop; - struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); char buf[12]; char *proto; seq_printf(m, ",vers=%d", nfss->rpc_ops->version); seq_printf(m, ",rsize=%d", nfss->rsize); seq_printf(m, ",wsize=%d", nfss->wsize); - if (nfss->acregmin != 3*HZ) + if (nfss->acregmin != 3*HZ || showdefaults) seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); - if (nfss->acregmax != 60*HZ) + if (nfss->acregmax != 60*HZ || showdefaults) seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); - if (nfss->acdirmin != 30*HZ) + if (nfss->acdirmin != 30*HZ || showdefaults) seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); - if (nfss->acdirmax != 60*HZ) + if (nfss->acdirmax != 60*HZ || showdefaults) seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { if (nfss->flags & nfs_infop->flag) @@ -633,8 +641,89 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) seq_printf(m, ",proto=%s", proto); seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); seq_printf(m, ",retrans=%u", nfss->retrans_count); +} + +static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + + nfs_show_mount_options(m, nfss, 0); + seq_puts(m, ",addr="); seq_escape(m, nfss->hostname, " \t\n\\"); + + return 0; +} + +static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) +{ + int i, cpu; + struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + struct rpc_auth *auth = nfss->client->cl_auth; + struct nfs_iostats totals = { }; + + seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS); + + /* + * Display all mount option settings + */ + seq_printf(m, "\n\topts:\t"); + seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw"); + seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : ""); + seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : ""); + seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : ""); + nfs_show_mount_options(m, nfss, 1); + + seq_printf(m, "\n\tcaps:\t"); + seq_printf(m, "caps=0x%x", nfss->caps); + seq_printf(m, ",wtmult=%d", nfss->wtmult); + seq_printf(m, ",dtsize=%d", nfss->dtsize); + seq_printf(m, ",bsize=%d", nfss->bsize); + seq_printf(m, ",namelen=%d", nfss->namelen); + +#ifdef CONFIG_NFS_V4 + if (nfss->rpc_ops->version == 4) { + seq_printf(m, "\n\tnfsv4:\t"); + seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); + seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); + seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); + } +#endif + + /* + * Display security flavor in effect for this mount + */ + seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor); + if (auth->au_flavor) + seq_printf(m, ",pseudoflavor=%d", auth->au_flavor); + + /* + * Display superblock I/O counters + */ + for (cpu = 0; cpu < NR_CPUS; cpu++) { + struct nfs_iostats *stats; + + if (!cpu_possible(cpu)) + continue; + + preempt_disable(); + stats = per_cpu_ptr(nfss->io_stats, cpu); + + for (i = 0; i < __NFSIOS_COUNTSMAX; i++) + totals.events[i] += stats->events[i]; + for (i = 0; i < __NFSIOS_BYTESMAX; i++) + totals.bytes[i] += stats->bytes[i]; + + preempt_enable(); + } + + seq_printf(m, "\n\tevents:\t"); + for (i = 0; i < __NFSIOS_COUNTSMAX; i++) + seq_printf(m, "%lu ", totals.events[i]); + seq_printf(m, "\n\tbytes:\t"); + for (i = 0; i < __NFSIOS_BYTESMAX; i++) + seq_printf(m, "%Lu ", totals.bytes[i]); + return 0; } @@ -1742,6 +1831,7 @@ static struct super_operations nfs4_sops = { .clear_inode = nfs4_clear_inode, .umount_begin = nfs_umount_begin, .show_options = nfs_show_options, + .show_stats = nfs_show_stats, }; /* @@ -2015,6 +2105,7 @@ out_err: out_free: kfree(server->mnt_path); kfree(server->hostname); + nfs_free_iostats(server->io_stats); kfree(server); return s; } diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h new file mode 100644 index 000000000000..dc080e50ec57 --- /dev/null +++ b/fs/nfs/iostat.h @@ -0,0 +1,152 @@ +/* + * linux/fs/nfs/iostat.h + * + * Declarations for NFS client per-mount statistics + * + * Copyright (C) 2005, 2006 Chuck Lever + * + * NFS client per-mount statistics provide information about the health of + * the NFS client and the health of each NFS mount point. Generally these + * are not for detailed problem diagnosis, but simply to indicate that there + * is a problem. + * + * These counters are not meant to be human-readable, but are meant to be + * integrated into system monitoring tools such as "sar" and "iostat". As + * such, the counters are sampled by the tools over time, and are never + * zeroed after a file system is mounted. Moving averages can be computed + * by the tools by taking the difference between two instantaneous samples + * and dividing that by the time between the samples. + */ + +#ifndef _NFS_IOSTAT +#define _NFS_IOSTAT + +#define NFS_IOSTAT_VERS "1.0" + +/* + * NFS byte counters + * + * 1. SERVER - the number of payload bytes read from or written to the + * server by the NFS client via an NFS READ or WRITE request. + * + * 2. NORMAL - the number of bytes read or written by applications via + * the read(2) and write(2) system call interfaces. + * + * 3. DIRECT - the number of bytes read or written from files opened + * with the O_DIRECT flag. + * + * These counters give a view of the data throughput into and out of the NFS + * client. Comparing the number of bytes requested by an application with the + * number of bytes the client requests from the server can provide an + * indication of client efficiency (per-op, cache hits, etc). + * + * These counters can also help characterize which access methods are in + * use. DIRECT by itself shows whether there is any O_DIRECT traffic. + * NORMAL + DIRECT shows how much data is going through the system call + * interface. A large amount of SERVER traffic without much NORMAL or + * DIRECT traffic shows that applications are using mapped files. + * + * NFS page counters + * + * These count the number of pages read or written via nfs_readpage(), + * nfs_readpages(), or their write equivalents. + */ +enum nfs_stat_bytecounters { + NFSIOS_NORMALREADBYTES = 0, + NFSIOS_NORMALWRITTENBYTES, + NFSIOS_DIRECTREADBYTES, + NFSIOS_DIRECTWRITTENBYTES, + NFSIOS_SERVERREADBYTES, + NFSIOS_SERVERWRITTENBYTES, + NFSIOS_READPAGES, + NFSIOS_WRITEPAGES, + __NFSIOS_BYTESMAX, +}; + +/* + * NFS event counters + * + * These counters provide a low-overhead way of monitoring client activity + * without enabling NFS trace debugging. The counters show the rate at + * which VFS requests are made, and how often the client invalidates its + * data and attribute caches. This allows system administrators to monitor + * such things as how close-to-open is working, and answer questions such + * as "why are there so many GETATTR requests on the wire?" + * + * They also count anamolous events such as short reads and writes, silly + * renames due to close-after-delete, and operations that change the size + * of a file (such operations can often be the source of data corruption + * if applications aren't using file locking properly). + */ +enum nfs_stat_eventcounters { + NFSIOS_INODEREVALIDATE = 0, + NFSIOS_DENTRYREVALIDATE, + NFSIOS_DATAINVALIDATE, + NFSIOS_ATTRINVALIDATE, + NFSIOS_VFSOPEN, + NFSIOS_VFSLOOKUP, + NFSIOS_VFSACCESS, + NFSIOS_VFSUPDATEPAGE, + NFSIOS_VFSREADPAGE, + NFSIOS_VFSREADPAGES, + NFSIOS_VFSWRITEPAGE, + NFSIOS_VFSWRITEPAGES, + NFSIOS_VFSGETDENTS, + NFSIOS_VFSSETATTR, + NFSIOS_VFSFLUSH, + NFSIOS_VFSFSYNC, + NFSIOS_VFSLOCK, + NFSIOS_VFSRELEASE, + NFSIOS_CONGESTIONWAIT, + NFSIOS_SETATTRTRUNC, + NFSIOS_EXTENDWRITE, + NFSIOS_SILLYRENAME, + NFSIOS_SHORTREAD, + NFSIOS_SHORTWRITE, + __NFSIOS_COUNTSMAX, +}; + +#ifdef __KERNEL__ + +#include +#include + +struct nfs_iostats { + unsigned long long bytes[__NFSIOS_BYTESMAX]; + unsigned long events[__NFSIOS_COUNTSMAX]; +} ____cacheline_aligned; + +static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat) +{ + struct nfs_iostats *iostats; + int cpu; + + cpu = get_cpu(); + iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu); + iostats->events[stat] ++; + put_cpu_no_resched(); +} + +static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend) +{ + struct nfs_iostats *iostats; + int cpu; + + cpu = get_cpu(); + iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu); + iostats->bytes[stat] += addend; + put_cpu_no_resched(); +} + +static inline struct nfs_iostats *nfs_alloc_iostats(void) +{ + return alloc_percpu(struct nfs_iostats); +} + +static inline void nfs_free_iostats(struct nfs_iostats *stats) +{ + free_percpu(stats); +} + +#endif +#endif diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a522ab97358d..d65e69a06b72 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -4,6 +4,8 @@ #include #include +struct nfs_iostats; + /* * NFS client parameters stored in the superblock. */ @@ -12,6 +14,7 @@ struct nfs_server { struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ + struct nfs_iostats * io_stats; /* I/O statistics */ struct backing_dev_info backing_dev_info; int flags; /* various flags */ unsigned int caps; /* server capabilities */ -- cgit v1.2.3 From 91d5b47023b608227d605d1e916b29dd0215bff7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:14 -0500 Subject: NFS: add I/O performance counters Invoke the byte and event counter macros where we want to count bytes and events. Clean-up: fix a possible NULL dereference in nfs_lock, and simplify nfs_file_open. Test-plan: fsx and iozone on UP and SMP systems, with and without pre-emption. Watch for memory overwrite bugs, and performance loss (significantly more CPU required per op). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 8 ++++++++ fs/nfs/direct.c | 7 +++++++ fs/nfs/file.c | 20 +++++++++----------- fs/nfs/inode.c | 8 ++++++++ fs/nfs/read.c | 12 ++++++++++++ fs/nfs/write.c | 18 ++++++++++++++++++ 6 files changed, 62 insertions(+), 11 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a1554bead692..151b8dd0ac3b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -34,6 +34,7 @@ #include "nfs4_fs.h" #include "delegation.h" +#include "iostat.h" #define NFS_PARANOIA 1 /* #define NFS_DEBUG_VERBOSE 1 */ @@ -507,6 +508,8 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct nfs_fattr fattr; long res; + nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); + lock_kernel(); res = nfs_revalidate_inode(NFS_SERVER(inode), inode); @@ -713,6 +716,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) parent = dget_parent(dentry); lock_kernel(); dir = parent->d_inode; + nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); inode = dentry->d_inode; if (!inode) { @@ -844,6 +848,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru dfprintk(VFS, "NFS: lookup(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); + nfs_inc_stats(dir, NFSIOS_VFSLOOKUP); res = ERR_PTR(-ENAMETOOLONG); if (dentry->d_name.len > NFS_SERVER(dir)->namelen) @@ -1241,6 +1246,7 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", dentry->d_parent->d_name.name, dentry->d_name.name, atomic_read(&dentry->d_count)); + nfs_inc_stats(dir, NFSIOS_SILLYRENAME); #ifdef NFS_PARANOIA if (!dentry->d_inode) @@ -1640,6 +1646,8 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) struct rpc_cred *cred; int res = 0; + nfs_inc_stats(inode, NFSIOS_VFSACCESS); + if (mask == 0) goto out; /* Is this sys_access() ? */ diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4e9b3a1b36c5..fc07ce4885da 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -54,6 +54,8 @@ #include #include +#include "iostat.h" + #define NFSDBG_FACILITY NFSDBG_VFS #define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT) @@ -67,6 +69,7 @@ struct nfs_direct_req { struct kref kref; /* release manager */ struct list_head list; /* nfs_read_data structs */ wait_queue_head_t wait; /* wait for i/o completion */ + struct inode * inode; /* target file of I/O */ struct page ** pages; /* pages in our buffer */ unsigned int npages; /* count of pages */ atomic_t complete, /* i/os we're waiting for */ @@ -357,7 +360,9 @@ static ssize_t nfs_direct_read_seg(struct inode *inode, dreq->pages = pages; dreq->npages = nr_pages; + dreq->inode = inode; + nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); rpc_clnt_sigmask(clnt, &oldset); nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count, file_offset); @@ -572,6 +577,7 @@ static ssize_t nfs_direct_write(struct inode *inode, return page_count; } + nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, size); result = nfs_direct_write_seg(inode, ctx, user_addr, size, file_offset, pages, page_count); nfs_free_user_pages(pages, page_count, 0); @@ -581,6 +587,7 @@ static ssize_t nfs_direct_write(struct inode *inode, break; return result; } + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result); tot_bytes += result; file_offset += result; if (result < size) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 387809f2d188..1cf07e4ad136 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -32,6 +32,7 @@ #include #include "delegation.h" +#include "iostat.h" #define NFSDBG_FACILITY NFSDBG_FILE @@ -102,18 +103,15 @@ static int nfs_check_flags(int flags) static int nfs_file_open(struct inode *inode, struct file *filp) { - struct nfs_server *server = NFS_SERVER(inode); - int (*open)(struct inode *, struct file *); int res; res = nfs_check_flags(filp->f_flags); if (res) return res; + nfs_inc_stats(inode, NFSIOS_VFSOPEN); lock_kernel(); - /* Do NFSv4 open() call */ - if ((open = server->rpc_ops->file_open) != NULL) - res = open(inode, filp); + res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp); unlock_kernel(); return res; } @@ -124,6 +122,7 @@ nfs_file_release(struct inode *inode, struct file *filp) /* Ensure that dirty pages are flushed out with the right creds */ if (filp->f_mode & FMODE_WRITE) filemap_fdatawrite(filp->f_mapping); + nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return NFS_PROTO(inode)->file_release(inode, filp); } @@ -199,6 +198,7 @@ nfs_file_flush(struct file *file) if ((file->f_mode & FMODE_WRITE) == 0) return 0; + nfs_inc_stats(inode, NFSIOS_VFSFLUSH); lock_kernel(); /* Ensure that data+attribute caches are up to date after close() */ status = nfs_wb_all(inode); @@ -229,6 +229,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos) (unsigned long) count, (unsigned long) pos); result = nfs_revalidate_file(inode, iocb->ki_filp); + nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count); if (!result) result = generic_file_aio_read(iocb, buf, count, pos); return result; @@ -282,6 +283,7 @@ nfs_fsync(struct file *file, struct dentry *dentry, int datasync) dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + nfs_inc_stats(inode, NFSIOS_VFSFSYNC); lock_kernel(); status = nfs_wb_all(inode); if (!status) { @@ -378,6 +380,7 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t if (!count) goto out; + nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); result = generic_file_aio_write(iocb, buf, count, pos); out: return result; @@ -517,9 +520,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) inode->i_sb->s_id, inode->i_ino, fl->fl_type, fl->fl_flags, (long long)fl->fl_start, (long long)fl->fl_end); - - if (!inode) - return -EINVAL; + nfs_inc_stats(inode, NFSIOS_VFSLOCK); /* No mandatory locks over NFS */ if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID && @@ -544,9 +545,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) inode->i_sb->s_id, inode->i_ino, fl->fl_type, fl->fl_flags); - if (!inode) - return -EINVAL; - /* * No BSD flocks over NFS allowed. * Note: we could try to fake a POSIX lock request here by diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 86b756f44e27..8ee74111e519 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -753,6 +753,8 @@ static void nfs_zap_caches_locked(struct inode *inode) struct nfs_inode *nfsi = NFS_I(inode); int mode = inode->i_mode; + nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); NFS_ATTRTIMEO_UPDATE(inode) = jiffies; @@ -940,6 +942,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) struct nfs_fattr fattr; int error; + nfs_inc_stats(inode, NFSIOS_VFSSETATTR); + if (attr->ia_valid & ATTR_SIZE) { if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode)) attr->ia_valid &= ~ATTR_SIZE; @@ -993,6 +997,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) spin_unlock(&inode->i_lock); } if ((attr->ia_valid & ATTR_SIZE) != 0) { + nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); inode->i_size = attr->ia_size; vmtruncate(inode, attr->ia_size); } @@ -1278,6 +1283,7 @@ int nfs_attribute_timeout(struct inode *inode) */ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { + nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) && !nfs_attribute_timeout(inode)) return NFS_STALE(inode) ? -ESTALE : 0; @@ -1294,6 +1300,7 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) struct nfs_inode *nfsi = NFS_I(inode); if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { + nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); if (S_ISREG(inode->i_mode)) nfs_sync_mapping(mapping); invalidate_inode_pages2(mapping); @@ -1615,6 +1622,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* Update attrtimeo value if we're out of the unstable period */ if (invalid & NFS_INO_INVALID_ATTR) { + nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 05eb43fadf8e..ae3ddd24cf8f 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -31,6 +31,8 @@ #include +#include "iostat.h" + #define NFSDBG_FACILITY NFSDBG_PAGECACHE static int nfs_pagein_one(struct list_head *, struct inode *); @@ -133,6 +135,8 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, } count -= result; rdata->args.pgbase += result; + nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result); + /* Note: result == 0 should only happen if we're caching * a write that extends the file and punches a hole. */ @@ -458,8 +462,11 @@ void nfs_readpage_result(struct rpc_task *task, void *calldata) dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", task->tk_pid, status); + nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count); + /* Is this a short read? */ if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) { + nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); /* Has the server at least made some progress? */ if (resp->count != 0) { /* Yes, so retry the read at the end of the data */ @@ -491,6 +498,9 @@ int nfs_readpage(struct file *file, struct page *page) dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", page, PAGE_CACHE_SIZE, page->index); + nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); + nfs_add_stats(inode, NFSIOS_READPAGES, 1); + /* * Try to flush any pending writes to the file.. * @@ -570,6 +580,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, inode->i_sb->s_id, (long long)NFS_FILEID(inode), nr_pages); + nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); if (filp == NULL) { desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); @@ -582,6 +593,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (!list_empty(&head)) { int err = nfs_pagein_list(&head, server->rpages); if (!ret) + nfs_add_stats(inode, NFSIOS_READPAGES, err); ret = err; } put_nfs_open_context(desc.ctx); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 92ecf24455c3..e7c8361cf201 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -63,6 +63,7 @@ #include #include "delegation.h" +#include "iostat.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -134,6 +135,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); if (i_size >= end) return; + nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); i_size_write(inode, end); } @@ -223,6 +225,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, wdata->args.pgbase += result; written += result; count -= result; + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result); } while (count); /* Update file length */ nfs_grow_file(page, offset, written); @@ -279,6 +282,9 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc) int priority = wb_priority(wbc); int err; + nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); + nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); + /* * Note: We need to ensure that we have a reference to the inode * if we are to do asynchronous writes. If not, waiting @@ -343,6 +349,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) struct inode *inode = mapping->host; int err; + nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); + err = generic_writepages(mapping, wbc); if (err) return err; @@ -354,6 +362,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc)); if (err < 0) goto out; + nfs_add_stats(inode, NFSIOS_WRITEPAGES, err); wbc->nr_to_write -= err; if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) { err = nfs_wait_on_requests(inode, 0, 0); @@ -596,6 +605,9 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) if (!bdi_write_congested(bdi)) return 0; + + nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT); + if (intr) { struct rpc_clnt *clnt = NFS_CLIENT(mapping->host); sigset_t oldset; @@ -749,6 +761,8 @@ int nfs_updatepage(struct file *file, struct page *page, struct nfs_page *req; int status = 0; + nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); + dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n", file->f_dentry->d_parent->d_name.name, file->f_dentry->d_name.name, count, @@ -1152,6 +1166,8 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata) dprintk("NFS: %4d nfs_writeback_done (status %d)\n", task->tk_pid, task->tk_status); + nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); + #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (resp->verf->committed < argp->stable && task->tk_status >= 0) { /* We tried a write call, but the server did not @@ -1177,6 +1193,8 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata) if (task->tk_status >= 0 && resp->count < argp->count) { static unsigned long complain; + nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE); + /* Has the server at least made some progress? */ if (resp->count != 0) { /* Was this an NFSv2 write or an NFSv3 stable write? */ -- cgit v1.2.3 From 67ec9f46b889bfb1ab0a4e307d53929d5f0692bf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:15 -0500 Subject: NFS: report how long an NFS file system has been mounted Add a field in nfs_server to record a timestamp when a mount succeeds. Report the number of seconds the file system has been mounted via nfs_show_stats(). Test plan: Mount an NFS file system, watch the mountstats reports and compare with clock time. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 5 +++++ include/linux/nfs_fs_sb.h | 1 + 2 files changed, 6 insertions(+) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 8ee74111e519..1b2d782374b5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -299,6 +299,9 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) goto out_no_root; } + /* mount time stamp, in seconds */ + server->mount_time = jiffies; + /* Get some general file system info */ if (server->namelen == 0 && server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) @@ -674,6 +677,8 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : ""); nfs_show_mount_options(m, nfss, 1); + seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); + seq_printf(m, "\n\tcaps:\t"); seq_printf(m, "caps=0x%x", nfss->caps); seq_printf(m, ",wtmult=%d", nfss->wtmult); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index d65e69a06b72..65dec21af774 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -35,6 +35,7 @@ struct nfs_server { char * hostname; /* remote hostname */ struct nfs_fh fh; struct sockaddr_in addr; + unsigned long mount_time; /* when this fs was mounted */ #ifdef CONFIG_NFS_V4 /* Our own IP address, as a null-terminated string. * This is used to generate the clientid, and the callback address. -- cgit v1.2.3 From 4ece3a2d18fd7fe1d4972284a8c98c569020093f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:22 -0500 Subject: NFS: add RPC I/O statistics to /proc/self/mountstats NFS client now shows various RPC I/O metrics in /proc/self/mountstats. Test plan: Mount/umount while doing "cat /proc/self/mountstats", multiple iterations of connectathon locking suite. Test with NFS version 2, 3, and 4. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 1b2d782374b5..b9f35ca266c2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -728,6 +729,9 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) seq_printf(m, "\n\tbytes:\t"); for (i = 0; i < __NFSIOS_BYTESMAX; i++) seq_printf(m, "%Lu ", totals.bytes[i]); + seq_printf(m, "\n"); + + rpc_print_iostats(m, nfss->client); return 0; } -- cgit v1.2.3 From 01d0ae8beaee75d954900109619b700fe68707d9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:48 -0500 Subject: NFSv4: Fix an oops in nfs4_fill_super The mount statistics patches introduced a call to nfs_free_iostats that is not only redundant, but actually causes an oops. Also fix a memory leak due to the lack of a call to nfs_free_iostats on unmount. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 13 ++++++------- fs/nfs/iostat.h | 3 ++- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b9f35ca266c2..17654bffc3c6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -281,6 +281,10 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) sb->s_magic = NFS_SUPER_MAGIC; + server->io_stats = nfs_alloc_iostats(); + if (server->io_stats == NULL) + return -ENOMEM; + root_inode = nfs_get_root(sb, &server->fh, &fsinfo); /* Did getting the root inode fail? */ if (IS_ERR(root_inode)) { @@ -294,12 +298,6 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) } sb->s_root->d_op = server->rpc_ops->dentry_ops; - server->io_stats = nfs_alloc_iostats(); - if (!server->io_stats) { - no_root_error = -ENOMEM; - goto out_no_root; - } - /* mount time stamp, in seconds */ server->mount_time = jiffies; @@ -1822,6 +1820,7 @@ static void nfs_kill_super(struct super_block *s) rpciod_down(); /* release rpciod */ + nfs_free_iostats(server->io_stats); kfree(server->hostname); kfree(server); } @@ -2122,7 +2121,6 @@ out_err: out_free: kfree(server->mnt_path); kfree(server->hostname); - nfs_free_iostats(server->io_stats); kfree(server); return s; } @@ -2143,6 +2141,7 @@ static void nfs4_kill_super(struct super_block *sb) rpciod_down(); + nfs_free_iostats(server->io_stats); kfree(server->hostname); kfree(server); } diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index 7a7495153317..6350ecbde589 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -156,7 +156,8 @@ static inline struct nfs_iostats *nfs_alloc_iostats(void) static inline void nfs_free_iostats(struct nfs_iostats *stats) { - free_percpu(stats); + if (stats != NULL) + free_percpu(stats); } #endif -- cgit v1.2.3 From 03f28e3a2059fc466761d872122f30acb7be61ae Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:48 -0500 Subject: NFS: Make nfs_fhget() return appropriate error values Currently it returns NULL, which usually gets interpreted as ENOMEM. In fact it can mean a host of issues. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 10 +++++----- fs/nfs/inode.c | 15 +++++++-------- fs/nfs/nfs4proc.c | 2 +- 3 files changed, 13 insertions(+), 14 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 609185a15c99..06c48b385c94 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -901,9 +901,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru res = ERR_PTR(error); goto out_unlock; } - res = ERR_PTR(-EACCES); inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); - if (!inode) + res = (struct dentry *)inode; + if (IS_ERR(res)) goto out_unlock; no_entry: res = d_add_unique(dentry, inode); @@ -1096,7 +1096,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) return NULL; dentry->d_op = NFS_PROTO(dir)->dentry_ops; inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); - if (!inode) { + if (IS_ERR(inode)) { dput(dentry); return NULL; } @@ -1134,9 +1134,9 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, if (error < 0) goto out_err; } - error = -ENOMEM; inode = nfs_fhget(dentry->d_sb, fhandle, fattr); - if (inode == NULL) + error = PTR_ERR(inode); + if (IS_ERR(inode)) goto out_err; d_instantiate(dentry, inode); return 0; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 17654bffc3c6..a0cda53461b3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -241,7 +241,6 @@ static struct inode * nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo) { struct nfs_server *server = NFS_SB(sb); - struct inode *rooti; int error; error = server->rpc_ops->getroot(server, rootfh, fsinfo); @@ -250,10 +249,7 @@ nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *f return ERR_PTR(error); } - rooti = nfs_fhget(sb, rootfh, fsinfo->fattr); - if (!rooti) - return ERR_PTR(-ENOMEM); - return rooti; + return nfs_fhget(sb, rootfh, fsinfo->fattr); } /* @@ -853,7 +849,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) .fh = fh, .fattr = fattr }; - struct inode *inode = NULL; + struct inode *inode = ERR_PTR(-ENOENT); unsigned long hash; if ((fattr->valid & NFS_ATTR_FATTR) == 0) @@ -866,8 +862,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) hash = nfs_fattr_to_ino_t(fattr); - if (!(inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc))) + inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc); + if (inode == NULL) { + inode = ERR_PTR(-ENOMEM); goto out_no_inode; + } if (inode->i_state & I_NEW) { struct nfs_inode *nfsi = NFS_I(inode); @@ -936,7 +935,7 @@ out: return inode; out_no_inode: - printk("nfs_fhget: iget failed\n"); + dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode)); goto out; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 31000326aba4..02c7d8c04c58 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -336,7 +336,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data if (!(data->f_attr.valid & NFS_ATTR_FATTR)) goto out; inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr); - if (inode == NULL) + if (IS_ERR(inode)) goto out; state = nfs4_get_open_state(inode, data->owner); if (state == NULL) -- cgit v1.2.3 From c42de9dd67250fe984e0e31c9b542d721af6454b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:51 -0500 Subject: NFS: Fix a race in nfs_sync_inode() Kudos to Neil Brown for spotting the problem: "in nfs_sync_inode, there is effectively the sequence: nfs_wait_on_requests nfs_flush_inode nfs_commit_inode This seems a bit racy to me as if the only requests are on the ->commit list, and nfs_commit_inode is called separately after nfs_wait_on_requests completes, and before nfs_commit_inode start (say: by nfs_write_inode) then none of these function will return >0, yet there will be some pending request that aren't waited for." The solution is to search for requests to wait upon, search for dirty requests, and search for uncommitted requests while holding the nfsi->req_lock The patch also cleans up nfs_sync_inode(), getting rid of the redundant FLUSH_WAIT flag. It turns out that we were always setting it. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 4 +-- fs/nfs/write.c | 72 +++++++++++++++++++++++++++++++++++--------------- include/linux/nfs_fs.h | 10 +++---- 3 files changed, 56 insertions(+), 30 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a0cda53461b3..60aac58270a8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -137,7 +137,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) static int nfs_write_inode(struct inode *inode, int sync) { - int flags = sync ? FLUSH_WAIT : 0; + int flags = sync ? FLUSH_SYNC : 0; int ret; ret = nfs_commit_inode(inode, flags); @@ -1051,7 +1051,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) int err; /* Flush out writes to the server in order to update c/mtime */ - nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT); + nfs_sync_inode_wait(inode, 0, 0, FLUSH_NOCOMMIT); /* * We may force a getattr if the user cares about atime. diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2beb1268bb1b..3f5225404c97 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -539,8 +539,7 @@ nfs_mark_request_commit(struct nfs_page *req) * * Interruptible by signals only if mounted with intr flag. */ -static int -nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages) +static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req; @@ -553,7 +552,6 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int else idx_end = idx_start + npages - 1; - spin_lock(&nfsi->req_lock); next = idx_start; while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { if (req->wb_index > idx_end) @@ -566,15 +564,25 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int spin_unlock(&nfsi->req_lock); error = nfs_wait_on_request(req); nfs_release_request(req); + spin_lock(&nfsi->req_lock); if (error < 0) return error; - spin_lock(&nfsi->req_lock); res++; } - spin_unlock(&nfsi->req_lock); return res; } +static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int ret; + + spin_lock(&nfsi->req_lock); + ret = nfs_wait_on_requests_locked(inode, idx_start, npages); + spin_unlock(&nfsi->req_lock); + return ret; +} + /* * nfs_scan_dirty - Scan an inode for dirty requests * @inode: NFS inode to scan @@ -626,6 +634,11 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st } return res; } +#else +static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) +{ + return 0; +} #endif static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) @@ -1421,6 +1434,11 @@ static const struct rpc_call_ops nfs_commit_ops = { .rpc_call_done = nfs_commit_done, .rpc_release = nfs_commit_release, }; +#else +static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how) +{ + return 0; +} #endif static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, @@ -1460,28 +1478,38 @@ int nfs_commit_inode(struct inode *inode, int how) } #endif -int nfs_sync_inode(struct inode *inode, unsigned long idx_start, - unsigned int npages, int how) +int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start, + unsigned int npages, int how) { + struct nfs_inode *nfsi = NFS_I(inode); + LIST_HEAD(head); int nocommit = how & FLUSH_NOCOMMIT; - int wait = how & FLUSH_WAIT; - int error; - - how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT); + int pages, ret; + how &= ~FLUSH_NOCOMMIT; + spin_lock(&nfsi->req_lock); do { - if (wait) { - error = nfs_wait_on_requests(inode, idx_start, npages); - if (error != 0) - continue; - } - error = nfs_flush_inode(inode, idx_start, npages, how); - if (error != 0) + ret = nfs_wait_on_requests_locked(inode, idx_start, npages); + if (ret != 0) continue; - if (!nocommit) - error = nfs_commit_inode(inode, how); - } while (error > 0); - return error; + pages = nfs_scan_dirty(inode, &head, idx_start, npages); + if (pages != 0) { + spin_unlock(&nfsi->req_lock); + ret = nfs_flush_list(inode, &head, pages, how); + spin_lock(&nfsi->req_lock); + continue; + } + if (nocommit) + break; + pages = nfs_scan_commit(inode, &head, 0, 0); + if (pages == 0) + break; + spin_unlock(&nfsi->req_lock); + ret = nfs_commit_list(inode, &head, how); + spin_lock(&nfsi->req_lock); + } while (ret >= 0); + spin_unlock(&nfsi->req_lock); + return ret; } int nfs_init_writepagecache(void) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 55de0770df4a..cbebd7d1b9e8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -56,9 +56,7 @@ * When flushing a cluster of dirty pages, there can be different * strategies: */ -#define FLUSH_AGING 0 /* only flush old buffers */ #define FLUSH_SYNC 1 /* file being synced, or contention */ -#define FLUSH_WAIT 2 /* wait for completion */ #define FLUSH_STABLE 4 /* commit to stable storage */ #define FLUSH_LOWPRI 8 /* low priority background flush */ #define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */ @@ -419,7 +417,7 @@ void nfs_commit_free(struct nfs_write_data *p); * Try to write back everything synchronously (but check the * return value!) */ -extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int); +extern int nfs_sync_inode_wait(struct inode *, unsigned long, unsigned int, int); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) extern int nfs_commit_inode(struct inode *, int); extern void nfs_commit_release(void *wdata); @@ -440,7 +438,7 @@ nfs_have_writebacks(struct inode *inode) static inline int nfs_wb_all(struct inode *inode) { - int error = nfs_sync_inode(inode, 0, 0, FLUSH_WAIT); + int error = nfs_sync_inode_wait(inode, 0, 0, 0); return (error < 0) ? error : 0; } @@ -449,8 +447,8 @@ nfs_wb_all(struct inode *inode) */ static inline int nfs_wb_page_priority(struct inode *inode, struct page* page, int how) { - int error = nfs_sync_inode(inode, page->index, 1, - how | FLUSH_WAIT | FLUSH_STABLE); + int error = nfs_sync_inode_wait(inode, page->index, 1, + how | FLUSH_STABLE); return (error < 0) ? error : 0; } -- cgit v1.2.3