From e894f633980804a528a2d6996c4ea651df631632 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 4 Jun 2025 14:02:40 +0200 Subject: kthread: Honour kthreads preferred affinity after cpuset changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When cpuset isolated partitions get updated, unbound kthreads get indifferently affine to all non isolated CPUs, regardless of their individual affinity preferences. For example kswapd is a per-node kthread that prefers to be affine to the node it refers to. Whenever an isolated partition is created, updated or deleted, kswapd's node affinity is going to be broken if any CPU in the related node is not isolated because kswapd will be affine globally. Fix this with letting the consolidated kthread managed affinity code do the affinity update on behalf of cpuset. Signed-off-by: Frederic Weisbecker Reviewed-by: Waiman Long Cc: Michal Koutný Cc: Ingo Molnar Cc: Johannes Weiner Cc: Marco Crivellari Cc: Michal Hocko Cc: Peter Zijlstra Cc: Tejun Heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Waiman Long Cc: cgroups@vger.kernel.org --- include/linux/kthread.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/kthread.h') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 8d27403888ce..c92c1149ee6e 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -100,6 +100,7 @@ void kthread_unpark(struct task_struct *k); void kthread_parkme(void); void kthread_exit(long result) __noreturn; void kthread_complete_and_exit(struct completion *, long) __noreturn; +int kthreads_update_housekeeping(void); int kthreadd(void *unused); extern struct task_struct *kthreadd_task; -- cgit v1.2.3 From 28aaa9c39945b7925a1cc1d513c8f21ed38f5e4f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 26 Feb 2026 10:43:55 +0100 Subject: kthread: consolidate kthread exit paths to prevent use-after-free Guillaume reported crashes via corrupted RCU callback function pointers during KUnit testing. The crash was traced back to the pidfs rhashtable conversion which replaced the 24-byte rb_node with an 8-byte rhash_head in struct pid, shrinking it from 160 to 144 bytes. struct kthread (without CONFIG_BLK_CGROUP) is also 144 bytes. With CONFIG_SLAB_MERGE_DEFAULT and SLAB_HWCACHE_ALIGN both round up to 192 bytes and share the same slab cache. struct pid.rcu.func and struct kthread.affinity_node both sit at offset 0x78. When a kthread exits via make_task_dead() it bypasses kthread_exit() and misses the affinity_node cleanup. free_kthread_struct() frees the memory while the node is still linked into the global kthread_affinity_list. A subsequent list_del() by another kthread writes through dangling list pointers into the freed and reused memory, corrupting the pid's rcu.func pointer. Instead of patching free_kthread_struct() to handle the missed cleanup, consolidate all kthread exit paths. Turn kthread_exit() into a macro that calls do_exit() and add kthread_do_exit() which is called from do_exit() for any task with PF_KTHREAD set. This guarantees that kthread-specific cleanup always happens regardless of the exit path - make_task_dead(), direct do_exit(), or kthread_exit(). Replace __to_kthread() with a new tsk_is_kthread() accessor in the public header. Export do_exit() since module code using the kthread_exit() macro now needs it directly. Reported-by: Guillaume Tucker Tested-by: Guillaume Tucker Tested-by: Mark Brown Tested-by: David Gow Cc: Link: https://lore.kernel.org/all/20260224-mittlerweile-besessen-2738831ae7f6@brauner Co-developed-by: Linus Torvalds Fixes: 4d13f4304fa4 ("kthread: Implement preferred affinity") Signed-off-by: Linus Torvalds Signed-off-by: Christian Brauner --- include/linux/kthread.h | 21 ++++++++++++++++++++- kernel/exit.c | 6 ++++++ kernel/kthread.c | 41 +++++------------------------------------ 3 files changed, 31 insertions(+), 37 deletions(-) (limited to 'include/linux/kthread.h') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index c92c1149ee6e..a01a474719a7 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -7,6 +7,24 @@ struct mm_struct; +/* opaque kthread data */ +struct kthread; + +/* + * When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will + * always remain a kthread. For kthreads p->worker_private always + * points to a struct kthread. For tasks that are not kthreads + * p->worker_private is used to point to other things. + * + * Return NULL for any task that is not a kthread. + */ +static inline struct kthread *tsk_is_kthread(struct task_struct *p) +{ + if (p->flags & PF_KTHREAD) + return p->worker_private; + return NULL; +} + __printf(4, 5) struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void *data, @@ -98,9 +116,10 @@ void *kthread_probe_data(struct task_struct *k); int kthread_park(struct task_struct *k); void kthread_unpark(struct task_struct *k); void kthread_parkme(void); -void kthread_exit(long result) __noreturn; +#define kthread_exit(result) do_exit(result) void kthread_complete_and_exit(struct completion *, long) __noreturn; int kthreads_update_housekeeping(void); +void kthread_do_exit(struct kthread *, long); int kthreadd(void *unused); extern struct task_struct *kthreadd_task; diff --git a/kernel/exit.c b/kernel/exit.c index 8a87021211ae..ede3117fa7d4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -896,11 +896,16 @@ static void synchronize_group_exit(struct task_struct *tsk, long code) void __noreturn do_exit(long code) { struct task_struct *tsk = current; + struct kthread *kthread; int group_dead; WARN_ON(irqs_disabled()); WARN_ON(tsk->plug); + kthread = tsk_is_kthread(tsk); + if (unlikely(kthread)) + kthread_do_exit(kthread, code); + kcov_task_exit(tsk); kmsan_task_exit(tsk); @@ -1013,6 +1018,7 @@ void __noreturn do_exit(long code) lockdep_free_task(tsk); do_task_dead(); } +EXPORT_SYMBOL(do_exit); void __noreturn make_task_dead(int signr) { diff --git a/kernel/kthread.c b/kernel/kthread.c index 20451b624b67..791210daf8b4 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -85,24 +85,6 @@ static inline struct kthread *to_kthread(struct task_struct *k) return k->worker_private; } -/* - * Variant of to_kthread() that doesn't assume @p is a kthread. - * - * When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will - * always remain a kthread. For kthreads p->worker_private always - * points to a struct kthread. For tasks that are not kthreads - * p->worker_private is used to point to other things. - * - * Return NULL for any task that is not a kthread. - */ -static inline struct kthread *__to_kthread(struct task_struct *p) -{ - void *kthread = p->worker_private; - if (kthread && !(p->flags & PF_KTHREAD)) - kthread = NULL; - return kthread; -} - void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk) { struct kthread *kthread = to_kthread(tsk); @@ -193,7 +175,7 @@ EXPORT_SYMBOL_GPL(kthread_should_park); bool kthread_should_stop_or_park(void) { - struct kthread *kthread = __to_kthread(current); + struct kthread *kthread = tsk_is_kthread(current); if (!kthread) return false; @@ -234,7 +216,7 @@ EXPORT_SYMBOL_GPL(kthread_freezable_should_stop); */ void *kthread_func(struct task_struct *task) { - struct kthread *kthread = __to_kthread(task); + struct kthread *kthread = tsk_is_kthread(task); if (kthread) return kthread->threadfn; return NULL; @@ -266,7 +248,7 @@ EXPORT_SYMBOL_GPL(kthread_data); */ void *kthread_probe_data(struct task_struct *task) { - struct kthread *kthread = __to_kthread(task); + struct kthread *kthread = tsk_is_kthread(task); void *data = NULL; if (kthread) @@ -309,19 +291,8 @@ void kthread_parkme(void) } EXPORT_SYMBOL_GPL(kthread_parkme); -/** - * kthread_exit - Cause the current kthread return @result to kthread_stop(). - * @result: The integer value to return to kthread_stop(). - * - * While kthread_exit can be called directly, it exists so that - * functions which do some additional work in non-modular code such as - * module_put_and_kthread_exit can be implemented. - * - * Does not return. - */ -void __noreturn kthread_exit(long result) +void kthread_do_exit(struct kthread *kthread, long result) { - struct kthread *kthread = to_kthread(current); kthread->result = result; if (!list_empty(&kthread->affinity_node)) { mutex_lock(&kthread_affinity_lock); @@ -333,9 +304,7 @@ void __noreturn kthread_exit(long result) kthread->preferred_affinity = NULL; } } - do_exit(0); } -EXPORT_SYMBOL(kthread_exit); /** * kthread_complete_and_exit - Exit the current kthread. @@ -683,7 +652,7 @@ void kthread_set_per_cpu(struct task_struct *k, int cpu) bool kthread_is_per_cpu(struct task_struct *p) { - struct kthread *kthread = __to_kthread(p); + struct kthread *kthread = tsk_is_kthread(p); if (!kthread) return false; -- cgit v1.2.3