From eeb61e53ea19be0c4015b00b2e8b3b2185436f2b Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 27 Oct 2014 14:18:25 +0400 Subject: sched: Fix race between task_group and sched_task_group The race may happen when somebody is changing task_group of a forking task. Child's cgroup is the same as parent's after dup_task_struct() (there just memory copying). Also, cfs_rq and rt_rq are the same as parent's. But if parent changes its task_group before it's called cgroup_post_fork(), we do not reflect this situation on child. Child's cfs_rq and rt_rq remain the same, while child's task_group changes in cgroup_post_fork(). To fix this we introduce fork() method, which calls sched_move_task() directly. This function changes sched_task_group on appropriate (also its logic has no problem with freshly created tasks, so we shouldn't introduce something special; we are able just to use it). Possibly, this decides the Burke Libbey's problem: https://lkml.org/lkml/2014/10/24/456 Signed-off-by: Kirill Tkhai Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1414405105.19914.169.camel@tkhai Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/sched/core.c') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 44999505e1bf..dde8adb7d0c0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7833,6 +7833,11 @@ static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css) sched_offline_group(tg); } +static void cpu_cgroup_fork(struct task_struct *task) +{ + sched_move_task(task); +} + static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css, struct cgroup_taskset *tset) { @@ -8205,6 +8210,7 @@ struct cgroup_subsys cpu_cgrp_subsys = { .css_free = cpu_cgroup_css_free, .css_online = cpu_cgroup_css_online, .css_offline = cpu_cgroup_css_offline, + .fork = cpu_cgroup_fork, .can_attach = cpu_cgroup_can_attach, .attach = cpu_cgroup_attach, .exit = cpu_cgroup_exit, -- cgit v1.2.3 From 009f60e2763568cdcd75bd1cf360c7c7165e2e60 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 5 Oct 2014 22:23:22 +0200 Subject: sched: stop the unbound recursion in preempt_schedule_context() preempt_schedule_context() does preempt_enable_notrace() at the end and this can call the same function again; exception_exit() is heavy and it is quite possible that need-resched is true again. 1. Change this code to dec preempt_count() and check need_resched() by hand. 2. As Linus suggested, we can use the PREEMPT_ACTIVE bit and avoid the enable/disable dance around __schedule(). But in this case we need to move into sched/core.c. 3. Cosmetic, but x86 forgets to declare this function. This doesn't really matter because it is only called by asm helpers, still it make sense to add the declaration into asm/preempt.h to match preempt_schedule(). Reported-by: Sasha Levin Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Graf Cc: Andrew Morton Cc: Christoph Lameter Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Steven Rostedt Cc: Peter Anvin Cc: Andy Lutomirski Cc: Denys Vlasenko Cc: Chuck Ebbert Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20141005202322.GB27962@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/preempt.h | 1 + kernel/context_tracking.c | 40 ---------------------------------------- kernel/sched/core.c | 41 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 40 deletions(-) (limited to 'kernel/sched/core.c') diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 7024c12f7bfe..400873450e33 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -105,6 +105,7 @@ static __always_inline bool should_resched(void) # ifdef CONFIG_CONTEXT_TRACKING extern asmlinkage void ___preempt_schedule_context(void); # define __preempt_schedule_context() asm ("call ___preempt_schedule_context") + extern asmlinkage void preempt_schedule_context(void); # endif #endif diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 5664985c46a0..937ecdfdf258 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -107,46 +107,6 @@ void context_tracking_user_enter(void) } NOKPROBE_SYMBOL(context_tracking_user_enter); -#ifdef CONFIG_PREEMPT -/** - * preempt_schedule_context - preempt_schedule called by tracing - * - * The tracing infrastructure uses preempt_enable_notrace to prevent - * recursion and tracing preempt enabling caused by the tracing - * infrastructure itself. But as tracing can happen in areas coming - * from userspace or just about to enter userspace, a preempt enable - * can occur before user_exit() is called. This will cause the scheduler - * to be called when the system is still in usermode. - * - * To prevent this, the preempt_enable_notrace will use this function - * instead of preempt_schedule() to exit user context if needed before - * calling the scheduler. - */ -asmlinkage __visible void __sched notrace preempt_schedule_context(void) -{ - enum ctx_state prev_ctx; - - if (likely(!preemptible())) - return; - - /* - * Need to disable preemption in case user_exit() is traced - * and the tracer calls preempt_enable_notrace() causing - * an infinite recursion. - */ - preempt_disable_notrace(); - prev_ctx = exception_enter(); - preempt_enable_no_resched_notrace(); - - preempt_schedule(); - - preempt_disable_notrace(); - exception_exit(prev_ctx); - preempt_enable_notrace(); -} -EXPORT_SYMBOL_GPL(preempt_schedule_context); -#endif /* CONFIG_PREEMPT */ - /** * context_tracking_user_exit - Inform the context tracking that the CPU is * exiting userspace mode and entering the kernel. diff --git a/kernel/sched/core.c b/kernel/sched/core.c index dde8adb7d0c0..240157c13ddc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2951,6 +2951,47 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) } NOKPROBE_SYMBOL(preempt_schedule); EXPORT_SYMBOL(preempt_schedule); + +#ifdef CONFIG_CONTEXT_TRACKING +/** + * preempt_schedule_context - preempt_schedule called by tracing + * + * The tracing infrastructure uses preempt_enable_notrace to prevent + * recursion and tracing preempt enabling caused by the tracing + * infrastructure itself. But as tracing can happen in areas coming + * from userspace or just about to enter userspace, a preempt enable + * can occur before user_exit() is called. This will cause the scheduler + * to be called when the system is still in usermode. + * + * To prevent this, the preempt_enable_notrace will use this function + * instead of preempt_schedule() to exit user context if needed before + * calling the scheduler. + */ +asmlinkage __visible void __sched notrace preempt_schedule_context(void) +{ + enum ctx_state prev_ctx; + + if (likely(!preemptible())) + return; + + do { + __preempt_count_add(PREEMPT_ACTIVE); + /* + * Needs preempt disabled in case user_exit() is traced + * and the tracer calls preempt_enable_notrace() causing + * an infinite recursion. + */ + prev_ctx = exception_enter(); + __schedule(); + exception_exit(prev_ctx); + + __preempt_count_sub(PREEMPT_ACTIVE); + barrier(); + } while (need_resched()); +} +EXPORT_SYMBOL_GPL(preempt_schedule_context); +#endif /* CONFIG_CONTEXT_TRACKING */ + #endif /* CONFIG_PREEMPT */ /* -- cgit v1.2.3