summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2026-04-11 11:01:28 +0300
committerAl Viro <viro@zeniv.linux.org.uk>2026-06-05 07:34:55 +0300
commita5beeb64f22662a53facd71ca2843f9d649597d6 (patch)
treec00a9091d45f3f9248fe019cda9c104f75fef041
parentb4143e0e71cd660d05de8d2861e980e7c840eb39 (diff)
downloadlinux-a5beeb64f22662a53facd71ca2843f9d649597d6.tar.xz
Document rcu_read_lock() use in select_collect2()
If select_collect2() finds something that is neither busy nor can be moved to shrink list, it needs to return that to caller's caller (shrink_dcache_tree()) ASAP and do so without grabbing references (among other things, it might be already dying, in which case refcount can't be incremented). We are called inside a ->d_lock scope, but that scope is going to be terminated as soon as we return to caller (d_walk()); ->d_lock will be retaken by shrink_dcache_tree(), but we need to bridge between these scopes, turning them into contiguous RCU read-side critical area. We do that with rcu_read_lock() scope - it spans from unbalanced rcu_read_lock() in select_collect2() to unbalanced rcu_read_unlock() in shrink_dcache_tree(). That works, but it really needs to be documented; it's rather unidiomatic and it had caused quite a bit of confusion - some of it in form of patches "fixing" the damn thing. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--fs/dcache.c22
1 files changed, 21 insertions, 1 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index ae9b7151e6a4..a65cb6451e63 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1638,6 +1638,15 @@ static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry)
if (dentry->d_lockref.count <= 0) {
if (!__move_to_shrink_list(dentry, &data->dispose)) {
+ /*
+ * We need an enter RCU read-side critical area that
+ * would extend past the return from d_walk() and
+ * we are in the scope of ->d_lock that will terminate
+ * before that, so we use rcu_read_lock() to bridge
+ * over to the scope of ->d_lock in d_walk() caller.
+ * The scope of rcu_read_lock() spans from here to
+ * paired rcu_read_unlock() in shrink_dcache_tree().
+ */
rcu_read_lock();
data->victim = dentry;
return D_WALK_QUIT;
@@ -1682,9 +1691,20 @@ static void shrink_dcache_tree(struct dentry *parent, bool for_umount)
d_walk(parent, &data, select_collect2);
if (data.victim) {
struct dentry *v = data.victim;
-
+ /*
+ * select_collect2() has picked a dentry that was
+ * either dying or on a shrink list and arranged
+ * for it to be returned to us. We are still in
+ * the RCU read-side critical area started there
+ * (rcu_read_lock() scope opened in select_collect2()),
+ * so dentry couldn't have been freed yet, but its
+ * state might've changed since we dropped ->d_lock
+ * on the way out. Switch over to ->d_lock scope
+ * and recheck the dentry state.
+ */
spin_lock(&v->d_lock);
rcu_read_unlock();
+
if (v->d_lockref.count < 0 &&
!(v->d_flags & DCACHE_DENTRY_KILLED)) {
struct completion_list wait;