sched/cache: Skip cache-aware scheduling for single-threaded processes

For a single thread, the current wakeup path tends to place it on the same LLC where it was previously running with cache-hot data. There is no need to enable cache-aware scheduling for single-threaded processes for the following reasons: 1. Cache-aware scheduling primarily benefits multi-threaded processes where threads share data. Single-threaded processes typically have no inter-thread data sharing and thus gain little. 2. Enabling it incurs the additional overhead of tracking the thread's residency in the LLCs. 3. Bypassing single-threaded processes avoids excessive concentration of such tasks on a single LLC. Nevertheless, this check can be omitted if users explicitly provide hints for such single-threaded workloads where different processes have shared memory, e.g., via prctl() or other interfaces to be added in the future. Signed-off-by: Chen Yu <yu.c.chen@intel.com> Co-developed-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Tested-by: Tingyin Duan <tingyin.duan@gmail.com> Link: https://patch.msgid.link/8a59a13aa58fdb48e410ecb2aabd97fe3ea5d256.1778703694.git.tim.c.chen@linux.intel.com
author: Chen Yu <yu.c.chen@intel.com> 2026-05-13 23:39:14 +0300
committer: Peter Zijlstra <peterz@infradead.org> 2026-05-18 22:33:14 +0300
commit: 7b34bb1ca324451c84c0a69136ce92e7928cf72b (patch)
tree: f745e629cba55630fa0fcab18ccabb1b5a3adac4
parent: deee5e27d5b608323c04dc99979e55f944016a13 (diff)
download: linux-7b34bb1ca324451c84c0a69136ce92e7928cf72b.tar.xz
1 files changed, 16 insertions, 4 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 808f614fc2d2..df21366ba1ca 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1384,8 +1384,12 @@ static int llc_id(int cpu)
 	return per_cpu(sd_llc_id, cpu);
 }
 
-static bool invalid_llc_nr(struct mm_struct *mm, int cpu)
+static bool invalid_llc_nr(struct mm_struct *mm, struct task_struct *p,
+			   int cpu)
 {
+	if (get_nr_threads(p) <= 1)
+		return true;
+
 	return !fits_capacity((mm->sc_stat.nr_running_avg * cpu_smt_num_threads),
 			per_cpu(sd_llc_size, cpu));
 }
@@ -1581,7 +1585,7 @@ void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
 	 * its preferred state.
 	 */
 	if (epoch - READ_ONCE(mm->sc_stat.epoch) > EPOCH_LLC_AFFINITY_TIMEOUT ||
-	    invalid_llc_nr(mm, cpu_of(rq))) {
+	    invalid_llc_nr(mm, p, cpu_of(rq))) {
 		if (mm->sc_stat.cpu != -1)
 			mm->sc_stat.cpu = -1;
 	}
@@ -1687,9 +1691,9 @@ static inline void update_avg_scale(u64 *avg, u64 sample)
 
 static void task_cache_work(struct callback_head *work)
 {
+	int cpu, m_a_cpu = -1, nr_running = 0, curr_cpu;
 	unsigned long next_scan, now = jiffies;
 	struct task_struct *p = current, *cur;
-	int cpu, m_a_cpu = -1, nr_running = 0;
 	unsigned long curr_m_a_occ = 0;
 	struct mm_struct *mm = p->mm;
 	unsigned long m_a_occ = 0;
@@ -1711,6 +1715,14 @@ static void task_cache_work(struct callback_head *work)
 			 now + EPOCH_PERIOD))
 		return;
 
+	curr_cpu = task_cpu(p);
+	if (invalid_llc_nr(mm, p, curr_cpu)) {
+		if (mm->sc_stat.cpu != -1)
+			mm->sc_stat.cpu = -1;
+
+		return;
+	}
+
 	if (!zalloc_cpumask_var(&cpus, GFP_KERNEL))
 		return;
 
@@ -10326,7 +10338,7 @@ static enum llc_mig can_migrate_llc_task(int src_cpu, int dst_cpu,
 		return mig_unrestricted;
 
 	/* skip cache aware load balance for too many threads */
-	if (invalid_llc_nr(mm, dst_cpu)) {
+	if (invalid_llc_nr(mm, p, dst_cpu)) {
 		if (mm->sc_stat.cpu != -1)
 			mm->sc_stat.cpu = -1;
 		return mig_unrestricted;
author	Chen Yu <yu.c.chen@intel.com>	2026-05-13 23:39:14 +0300
committer	Peter Zijlstra <peterz@infradead.org>	2026-05-18 22:33:14 +0300
commit	7b34bb1ca324451c84c0a69136ce92e7928cf72b (patch)
tree	f745e629cba55630fa0fcab18ccabb1b5a3adac4
parent	deee5e27d5b608323c04dc99979e55f944016a13 (diff)
download	linux-7b34bb1ca324451c84c0a69136ce92e7928cf72b.tar.xz