diff options
author | Srikar Dronamraju <srikar@linux.vnet.ibm.com> | 2018-06-20 20:02:50 +0300 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2018-07-25 12:41:07 +0300 |
commit | 0ad4e3dfe6cf3f207e61cbd8e3e4a943f8c1ad20 (patch) | |
tree | ca75536eb7cd48e7ea79c1c66f881bff954cc470 /kernel/sched | |
parent | 10864a9e222048a862da2c21efa28929a4dfed15 (diff) | |
download | linux-0ad4e3dfe6cf3f207e61cbd8e3e4a943f8c1ad20.tar.xz |
sched/numa: Modify migrate_swap() to accept additional parameters
There are checks in migrate_swap_stop() that check if the task/CPU
combination is as per migrate_swap_arg before migrating.
However atleast one of the two tasks to be swapped by migrate_swap() could
have migrated to a completely different CPU before updating the
migrate_swap_arg. The new CPU where the task is currently running could
be a different node too. If the task has migrated, numa balancer might
end up placing a task in a wrong node. Instead of achieving node
consolidation, it may end up spreading the load across nodes.
To avoid that pass the CPUs as additional parameters.
While here, place migrate_swap under CONFIG_NUMA_BALANCING.
Running SPECjbb2005 on a 4 node machine and comparing bops/JVM
JVMS LAST_PATCH WITH_PATCH %CHANGE
16 25377.3 25226.6 -0.59
1 72287 73326 1.437
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Rik van Riel <riel@surriel.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1529514181-9842-10-git-send-email-srikar@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 9 | ||||
-rw-r--r-- | kernel/sched/fair.c | 3 | ||||
-rw-r--r-- | kernel/sched/sched.h | 3 |
3 files changed, 10 insertions, 5 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2bc391a574e6..deafa9fe602b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1176,6 +1176,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) __set_task_cpu(p, new_cpu); } +#ifdef CONFIG_NUMA_BALANCING static void __migrate_swap_task(struct task_struct *p, int cpu) { if (task_on_rq_queued(p)) { @@ -1257,16 +1258,17 @@ unlock: /* * Cross migrate two tasks */ -int migrate_swap(struct task_struct *cur, struct task_struct *p) +int migrate_swap(struct task_struct *cur, struct task_struct *p, + int target_cpu, int curr_cpu) { struct migration_swap_arg arg; int ret = -EINVAL; arg = (struct migration_swap_arg){ .src_task = cur, - .src_cpu = task_cpu(cur), + .src_cpu = curr_cpu, .dst_task = p, - .dst_cpu = task_cpu(p), + .dst_cpu = target_cpu, }; if (arg.src_cpu == arg.dst_cpu) @@ -1291,6 +1293,7 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p) out: return ret; } +#endif /* CONFIG_NUMA_BALANCING */ /* * wait_task_inactive - wait for a thread to unschedule. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4ac60b296d96..7b4eddec3ccc 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1848,7 +1848,8 @@ static int task_numa_migrate(struct task_struct *p) return ret; } - ret = migrate_swap(p, env.best_task); + ret = migrate_swap(p, env.best_task, env.best_cpu, env.src_cpu); + if (ret != 0) trace_sched_stick_numa(p, env.src_cpu, task_cpu(env.best_task)); put_task_struct(env.best_task); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 614170d9b1aa..4a2e8cae63c4 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1099,7 +1099,8 @@ enum numa_faults_stats { }; extern void sched_setnuma(struct task_struct *p, int node); extern int migrate_task_to(struct task_struct *p, int cpu); -extern int migrate_swap(struct task_struct *, struct task_struct *); +extern int migrate_swap(struct task_struct *p, struct task_struct *t, + int cpu, int scpu); extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p); #else static inline void |