diff options
| -rw-r--r-- | kernel/sched/fair.c | 47 |
1 files changed, 46 insertions, 1 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c9cd064223e5..a55ada22e40c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1522,6 +1522,51 @@ static void task_tick_cache(struct rq *rq, struct task_struct *p) } } +static void get_scan_cpumasks(cpumask_var_t cpus, struct task_struct *p) +{ +#ifdef CONFIG_NUMA_BALANCING + int cpu, curr_cpu, nid, pref_nid; + + if (!static_branch_likely(&sched_numa_balancing)) + goto out; + + cpu = p->mm->sc_stat.cpu; + if (cpu != -1) + nid = cpu_to_node(cpu); + curr_cpu = task_cpu(p); + + /* + * Scanning in the preferred NUMA node is ideal. However, the NUMA + * preferred node is per-task rather than per-process. It is possible + * for different threads of the process to have distinct preferred + * nodes; consequently, the process-wide preferred LLC may bounce + * between different nodes. As a workaround, maintain the scan + * CPU mask to also cover the process's current preferred LLC and the + * current running node to mitigate the bouncing risk. + * TBD: numa_group should be considered during task aggregation. + */ + pref_nid = p->numa_preferred_nid; + /* honor the task's preferred node */ + if (pref_nid == NUMA_NO_NODE) + goto out; + + cpumask_or(cpus, cpus, cpumask_of_node(pref_nid)); + + /* honor the task's preferred LLC CPU */ + if (cpu != -1 && !cpumask_test_cpu(cpu, cpus) && nid != NUMA_NO_NODE) + cpumask_or(cpus, cpus, cpumask_of_node(nid)); + + /* make sure the task's current running node is included */ + if (!cpumask_test_cpu(curr_cpu, cpus)) + cpumask_or(cpus, cpus, cpumask_of_node(cpu_to_node(curr_cpu))); + + return; + +out: +#endif + cpumask_copy(cpus, cpu_online_mask); +} + static void task_cache_work(struct callback_head *work) { struct task_struct *p = current; @@ -1544,7 +1589,7 @@ static void task_cache_work(struct callback_head *work) scoped_guard (cpus_read_lock) { guard(rcu)(); - cpumask_copy(cpus, cpu_online_mask); + get_scan_cpumasks(cpus, p); for_each_cpu(cpu, cpus) { /* XXX sched_cluster_active */ |
