summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.com>2017-12-21 01:45:40 +0300
committerAl Viro <viro@zeniv.linux.org.uk>2017-12-26 04:22:07 +0300
commitf1ee616214cb22410e939d963bbb2349c2570f02 (patch)
tree3c7c0aab550ed16f00fbe76595e42de6f461b52a /fs
parent00b0c9b82663ac42e5a09f58ce960f81f29d64ee (diff)
downloadlinux-f1ee616214cb22410e939d963bbb2349c2570f02.tar.xz
VFS: don't keep disconnected dentries on d_anon
The original purpose of the per-superblock d_anon list was to keep disconnected dentries in the cache between consecutive requests to the NFS server. Dentries can be disconnected if a client holds a file open and repeatedly performs IO on it, and if the server drops the dentry, whether due to memory pressure, server restart, or "echo 3 > /proc/sys/vm/drop_caches". This purpose was thwarted by commit 75a6f82a0d10 ("freeing unlinked file indefinitely delayed") which caused disconnected dentries to be freed as soon as their refcount reached zero. This means that, when a dentry being used by nfsd gets disconnected, a new one needs to be allocated for every request (unless requests overlap). As the dentry has no name, no parent, and no children, there is little of value to cache. As small memory allocations are typically fast (from per-cpu free lists) this likely has little cost. This means that the original purpose of s_anon is no longer relevant: there is no longer any need to keep disconnected dentries on a list so they appear to be hashed. However, s_anon now has a new use. When you mount an NFS filesystem, the dentry stored in s_root is just a placebo. The "real" root dentry is allocated using d_obtain_root() and so it kept on the s_anon list. I don't know the reason for this, but suspect it related to NFSv4 where a mount of "server:/some/path" require NFS to look up the root filehandle on the server, then walk down "/some" and "/path" to get the filehandle to mount. Whatever the reason, NFS depends on the s_anon list and on shrink_dcache_for_umount() pruning all dentries on this list. So we cannot simply remove s_anon. We could just leave the code unchanged, but apart from that being potentially confusing, the (unfair) bit-spin-lock which protects s_anon can become a bottle neck when lots of disconnected dentries are being created. So this patch renames s_anon to s_roots, and stops storing disconnected dentries on the list. Only dentries obtained with d_obtain_root() are now stored on this list. There are many fewer of these (only NFS and NILFS2 use the call, and only during filesystem mount) so contention on the bit-lock will not be a problem. Possibly an alternate solution should be found for NFS and NILFS2, but that would require understanding their needs first. Signed-off-by: NeilBrown <neilb@suse.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs')
-rw-r--r--fs/dcache.c22
-rw-r--r--fs/super.c2
2 files changed, 13 insertions, 11 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index b99a39206930..17e6b84b9656 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -48,8 +48,8 @@
* - i_dentry, d_u.d_alias, d_inode of aliases
* dcache_hash_bucket lock protects:
* - the dcache hash table
- * s_anon bl list spinlock protects:
- * - the s_anon list (see __d_drop)
+ * s_roots bl list spinlock protects:
+ * - the s_roots list (see __d_drop)
* dentry->d_sb->s_dentry_lru_lock protects:
* - the dcache lru lists and counters
* d_lock protects:
@@ -67,7 +67,7 @@
* dentry->d_lock
* dentry->d_sb->s_dentry_lru_lock
* dcache_hash_bucket lock
- * s_anon lock
+ * s_roots lock
*
* If there is an ancestor relationship:
* dentry->d_parent->...->d_parent->d_lock
@@ -476,10 +476,10 @@ void __d_drop(struct dentry *dentry)
/*
* Hashed dentries are normally on the dentry hashtable,
* with the exception of those newly allocated by
- * d_obtain_alias, which are always IS_ROOT:
+ * d_obtain_root, which are always IS_ROOT:
*/
if (unlikely(IS_ROOT(dentry)))
- b = &dentry->d_sb->s_anon;
+ b = &dentry->d_sb->s_roots;
else
b = d_hash(dentry->d_name.hash);
@@ -1499,8 +1499,8 @@ void shrink_dcache_for_umount(struct super_block *sb)
sb->s_root = NULL;
do_one_tree(dentry);
- while (!hlist_bl_empty(&sb->s_anon)) {
- dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash));
+ while (!hlist_bl_empty(&sb->s_roots)) {
+ dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_roots), struct dentry, d_hash));
do_one_tree(dentry);
}
}
@@ -1964,9 +1964,11 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
spin_lock(&tmp->d_lock);
__d_set_inode_and_type(tmp, inode, add_flags);
hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry);
- hlist_bl_lock(&tmp->d_sb->s_anon);
- hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
- hlist_bl_unlock(&tmp->d_sb->s_anon);
+ if (!disconnected) {
+ hlist_bl_lock(&tmp->d_sb->s_roots);
+ hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_roots);
+ hlist_bl_unlock(&tmp->d_sb->s_roots);
+ }
spin_unlock(&tmp->d_lock);
spin_unlock(&inode->i_lock);
diff --git a/fs/super.c b/fs/super.c
index d4e33e8f1e6f..9ea66601d664 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -207,7 +207,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
if (s->s_user_ns != &init_user_ns)
s->s_iflags |= SB_I_NODEV;
INIT_HLIST_NODE(&s->s_instances);
- INIT_HLIST_BL_HEAD(&s->s_anon);
+ INIT_HLIST_BL_HEAD(&s->s_roots);
mutex_init(&s->s_sync_lock);
INIT_LIST_HEAD(&s->s_inodes);
spin_lock_init(&s->s_inode_list_lock);