From dc7fab8b3bb388c57c6c4a43ba68c8a32ca25204 Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Thu, 10 Jul 2008 00:32:40 +0200 Subject: sched: fix cpu hotplug I think we may have a race between try_to_wake_up() and migrate_live_tasks() -> move_task_off_dead_cpu() when the later one may end up looping endlessly. Interrupts are enabled on other CPUs when migration_call(CPU_DEAD, ...) is called so we may get a race between try_to_wake_up() and migrate_live_tasks() -> move_task_off_dead_cpu(). The former one may push a task out of a dead CPU causing the later one to loop endlessly. Heiko Carstens observed: | That's exactly what explains a dump I got yesterday. Thanks for fixing! :) Signed-off-by: Dmitry Adamushko Cc: miaox@cn.fujitsu.com Cc: Lai Jiangshan Cc: Heiko Carstens Cc: Peter Zijlstra Cc: Avi Kivity Cc: Andrew Morton Signed-off-by: Ingo Molnar --- kernel/sched.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/sched.c b/kernel/sched.c index 94ead43eda62..9397b8710138 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5621,8 +5621,10 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) double_rq_lock(rq_src, rq_dest); /* Already moved. */ - if (task_cpu(p) != src_cpu) + if (task_cpu(p) != src_cpu) { + ret = 1; goto out; + } /* Affinity changed (again). */ if (!cpu_isset(dest_cpu, p->cpus_allowed)) goto out; -- cgit v1.2.3 From b1e387348a2a70954312b102d0589c3e2ca3dba1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 10 Jul 2008 11:25:03 -0700 Subject: sched: fix cpu hotplug, cleanup Clean up __migrate_task(): to just have separate "done" and "fail" cases, instead of that "out" case with random error behavior. Signed-off-by: Ingo Molnar --- kernel/sched.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 9397b8710138..4e2f60335656 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5621,13 +5621,11 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) double_rq_lock(rq_src, rq_dest); /* Already moved. */ - if (task_cpu(p) != src_cpu) { - ret = 1; - goto out; - } + if (task_cpu(p) != src_cpu) + goto done; /* Affinity changed (again). */ if (!cpu_isset(dest_cpu, p->cpus_allowed)) - goto out; + goto fail; on_rq = p->se.on_rq; if (on_rq) @@ -5638,8 +5636,9 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) activate_task(rq_dest, p, 0); check_preempt_curr(rq_dest, p); } +done: ret = 1; -out: +fail: double_rq_unlock(rq_src, rq_dest); return ret; } -- cgit v1.2.3