From bd900901b8d1838bf1b6e63063e0025fca42d283 Mon Sep 17 00:00:00 2001 From: Imran Khan Date: Thu, 24 Mar 2022 21:30:39 +1100 Subject: kernfs: Remove reference counting for kernfs_open_node. The decision to free kernfs_open_node object in kernfs_put_open_node can be taken based on whether kernfs_open_node->files list is empty or not. As far as kernfs_drain_open_files is concerned it can't overlap with kernfs_fops_open and hence can check for ->attr.open optimistically (if ->attr.open is NULL) or under kernfs_open_file_mutex (if it needs to traverse the ->files list.) Thus kernfs_drain_open_files can work w/o ref counting involved kernfs_open_node as well. So remove ->refcnt and modify the above mentioned users accordingly. Suggested by: Al Viro Signed-off-by: Imran Khan Link: https://lore.kernel.org/r/20220324103040.584491-2-imran.f.khan@oracle.com Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 88423069407c..aea6968c979e 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -33,7 +33,6 @@ static DEFINE_SPINLOCK(kernfs_open_node_lock); static DEFINE_MUTEX(kernfs_open_file_mutex); struct kernfs_open_node { - atomic_t refcnt; atomic_t event; wait_queue_head_t poll; struct list_head files; /* goes through kernfs_open_file.list */ @@ -530,10 +529,8 @@ static int kernfs_get_open_node(struct kernfs_node *kn, } on = kn->attr.open; - if (on) { - atomic_inc(&on->refcnt); + if (on) list_add_tail(&of->list, &on->files); - } spin_unlock_irq(&kernfs_open_node_lock); mutex_unlock(&kernfs_open_file_mutex); @@ -548,7 +545,6 @@ static int kernfs_get_open_node(struct kernfs_node *kn, if (!new_on) return -ENOMEM; - atomic_set(&new_on->refcnt, 0); atomic_set(&new_on->event, 1); init_waitqueue_head(&new_on->poll); INIT_LIST_HEAD(&new_on->files); @@ -557,11 +553,12 @@ static int kernfs_get_open_node(struct kernfs_node *kn, /** * kernfs_put_open_node - put kernfs_open_node - * @kn: target kernfs_nodet + * @kn: target kernfs_node * @of: associated kernfs_open_file * * Put @kn->attr.open and unlink @of from the files list. If - * reference count reaches zero, disassociate and free it. + * list of associated open files becomes empty, disassociate and + * free kernfs_open_node. * * LOCKING: * None. @@ -578,7 +575,7 @@ static void kernfs_put_open_node(struct kernfs_node *kn, if (of) list_del(&of->list); - if (atomic_dec_and_test(&on->refcnt)) + if (list_empty(&on->files)) kn->attr.open = NULL; else on = NULL; @@ -768,15 +765,15 @@ void kernfs_drain_open_files(struct kernfs_node *kn) if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE))) return; - spin_lock_irq(&kernfs_open_node_lock); on = kn->attr.open; - if (on) - atomic_inc(&on->refcnt); - spin_unlock_irq(&kernfs_open_node_lock); if (!on) return; mutex_lock(&kernfs_open_file_mutex); + if (!kn->attr.open) { + mutex_unlock(&kernfs_open_file_mutex); + return; + } list_for_each_entry(of, &on->files, list) { struct inode *inode = file_inode(of->file); @@ -789,8 +786,6 @@ void kernfs_drain_open_files(struct kernfs_node *kn) } mutex_unlock(&kernfs_open_file_mutex); - - kernfs_put_open_node(kn, NULL); } /* -- cgit v1.2.3 From c1b1352f21bcf8c0678c4d4fbfafc4f6729e1daa Mon Sep 17 00:00:00 2001 From: Imran Khan Date: Wed, 4 May 2022 19:51:19 +1000 Subject: kernfs: Rename kernfs_put_open_node to kernfs_unlink_open_file. Since we are no longer using refcnt for kernfs_open_node instances, rename kernfs_put_open_node to kernfs_unlink_open_file to reflect this change. Also update function description and inline comments accordingly. Signed-off-by: Imran Khan Link: https://lore.kernel.org/r/20220504095123.295859-2-imran.f.khan@oracle.com Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index aea6968c979e..e3abfa843879 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -552,18 +552,19 @@ static int kernfs_get_open_node(struct kernfs_node *kn, } /** - * kernfs_put_open_node - put kernfs_open_node + * kernfs_unlink_open_file - Unlink @of from @kn. + * * @kn: target kernfs_node * @of: associated kernfs_open_file * - * Put @kn->attr.open and unlink @of from the files list. If - * list of associated open files becomes empty, disassociate and - * free kernfs_open_node. + * Unlink @of from list of @kn's associated open files. If list of + * associated open files becomes empty, disassociate and free + * kernfs_open_node. * * LOCKING: * None. */ -static void kernfs_put_open_node(struct kernfs_node *kn, +static void kernfs_unlink_open_file(struct kernfs_node *kn, struct kernfs_open_file *of) { struct kernfs_open_node *on = kn->attr.open; @@ -703,7 +704,7 @@ static int kernfs_fop_open(struct inode *inode, struct file *file) return 0; err_put_node: - kernfs_put_open_node(kn, of); + kernfs_unlink_open_file(kn, of); err_seq_release: seq_release(inode, file); err_free: @@ -749,7 +750,7 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp) mutex_unlock(&kernfs_open_file_mutex); } - kernfs_put_open_node(kn, of); + kernfs_unlink_open_file(kn, of); seq_release(inode, filp); kfree(of->prealloc_buf); kfree(of); @@ -765,8 +766,15 @@ void kernfs_drain_open_files(struct kernfs_node *kn) if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE))) return; - on = kn->attr.open; - if (!on) + /* + * lockless opportunistic check is safe below because no one is adding to + * ->attr.open at this point of time. This check allows early bail out + * if ->attr.open is already NULL. kernfs_unlink_open_file makes + * ->attr.open NULL only while holding kernfs_open_file_mutex so below + * check under kernfs_open_file_mutex will ensure bailing out if + * ->attr.open became NULL while waiting for the mutex. + */ + if (!kn->attr.open) return; mutex_lock(&kernfs_open_file_mutex); @@ -775,6 +783,8 @@ void kernfs_drain_open_files(struct kernfs_node *kn) return; } + on = kn->attr.open; + list_for_each_entry(of, &on->files, list) { struct inode *inode = file_inode(of->file); -- cgit v1.2.3 From 1a702dc88e150487c9c173a249b3d236498b9183 Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Mon, 16 May 2022 12:09:51 -0700 Subject: kernfs: Separate kernfs_pr_cont_buf and rename_lock. Previously the protection of kernfs_pr_cont_buf was piggy backed by rename_lock, which means that pr_cont() needs to be protected under rename_lock. This can cause potential circular lock dependencies. If there is an OOM, we have the following call hierarchy: -> cpuset_print_current_mems_allowed() -> pr_cont_cgroup_name() -> pr_cont_kernfs_name() pr_cont_kernfs_name() will grab rename_lock and call printk. So we have the following lock dependencies: kernfs_rename_lock -> console_sem Sometimes, printk does a wakeup before releasing console_sem, which has the dependence chain: console_sem -> p->pi_lock -> rq->lock Now, imagine one wants to read cgroup_name under rq->lock, for example, printing cgroup_name in a tracepoint in the scheduler code. They will be holding rq->lock and take rename_lock: rq->lock -> kernfs_rename_lock Now they will deadlock. A prevention to this circular lock dependency is to separate the protection of pr_cont_buf from rename_lock. In principle, rename_lock is to protect the integrity of cgroup name when copying to buf. Once pr_cont_buf has got its content, rename_lock can be dropped. So it's safe to drop rename_lock after kernfs_name_locked (and kernfs_path_from_node_locked) and rely on a dedicated pr_cont_lock to protect pr_cont_buf. Acked-by: Tejun Heo Signed-off-by: Hao Luo Link: https://lore.kernel.org/r/20220516190951.3144144-1-haoluo@google.com Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index e205fde7163a..6eca72cfa1f2 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -18,7 +18,15 @@ #include "kernfs-internal.h" static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */ -static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */ +/* + * Don't use rename_lock to piggy back on pr_cont_buf. We don't want to + * call pr_cont() while holding rename_lock. Because sometimes pr_cont() + * will perform wakeups when releasing console_sem. Holding rename_lock + * will introduce deadlock if the scheduler reads the kernfs_name in the + * wakeup path. + */ +static DEFINE_SPINLOCK(kernfs_pr_cont_lock); +static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */ static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) @@ -229,12 +237,12 @@ void pr_cont_kernfs_name(struct kernfs_node *kn) { unsigned long flags; - spin_lock_irqsave(&kernfs_rename_lock, flags); + spin_lock_irqsave(&kernfs_pr_cont_lock, flags); - kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); + kernfs_name(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); pr_cont("%s", kernfs_pr_cont_buf); - spin_unlock_irqrestore(&kernfs_rename_lock, flags); + spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); } /** @@ -248,10 +256,10 @@ void pr_cont_kernfs_path(struct kernfs_node *kn) unsigned long flags; int sz; - spin_lock_irqsave(&kernfs_rename_lock, flags); + spin_lock_irqsave(&kernfs_pr_cont_lock, flags); - sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf, - sizeof(kernfs_pr_cont_buf)); + sz = kernfs_path_from_node(kn, NULL, kernfs_pr_cont_buf, + sizeof(kernfs_pr_cont_buf)); if (sz < 0) { pr_cont("(error)"); goto out; @@ -265,7 +273,7 @@ void pr_cont_kernfs_path(struct kernfs_node *kn) pr_cont("%s", kernfs_pr_cont_buf); out: - spin_unlock_irqrestore(&kernfs_rename_lock, flags); + spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); } /** @@ -823,13 +831,12 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, lockdep_assert_held_read(&kernfs_root(parent)->kernfs_rwsem); - /* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */ - spin_lock_irq(&kernfs_rename_lock); + spin_lock_irq(&kernfs_pr_cont_lock); len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf)); if (len >= sizeof(kernfs_pr_cont_buf)) { - spin_unlock_irq(&kernfs_rename_lock); + spin_unlock_irq(&kernfs_pr_cont_lock); return NULL; } @@ -841,7 +848,7 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, parent = kernfs_find_ns(parent, name, ns); } - spin_unlock_irq(&kernfs_rename_lock); + spin_unlock_irq(&kernfs_pr_cont_lock); return parent; } -- cgit v1.2.3