summaryrefslogtreecommitdiff
path: root/kernel/sched/sched.h
diff options
context:
space:
mode:
authorJosh Don <joshdon@google.com>2022-11-17 03:54:18 +0300
committerPeter Zijlstra <peterz@infradead.org>2022-12-27 14:52:09 +0300
commit8ad075c2eb1f6b4b33436144ea1ef2619f3b6398 (patch)
tree2042529107850a120489d67b80e7fdcf81f7aab2 /kernel/sched/sched.h
parent9a5322db46332a4ce42369e86f031b5e963d841c (diff)
downloadlinux-8ad075c2eb1f6b4b33436144ea1ef2619f3b6398.tar.xz
sched: Async unthrottling for cfs bandwidth
CFS bandwidth currently distributes new runtime and unthrottles cfs_rq's inline in an hrtimer callback. Runtime distribution is a per-cpu operation, and unthrottling is a per-cgroup operation, since a tg walk is required. On machines with a large number of cpus and large cgroup hierarchies, this cpus*cgroups work can be too much to do in a single hrtimer callback: since IRQ are disabled, hard lockups may easily occur. Specifically, we've found this scalability issue on configurations with 256 cpus, O(1000) cgroups in the hierarchy being throttled, and high memory bandwidth usage. To fix this, we can instead unthrottle cfs_rq's asynchronously via a CSD. Each cpu is responsible for unthrottling itself, thus sharding the total work more fairly across the system, and avoiding hard lockups. Signed-off-by: Josh Don <joshdon@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20221117005418.3499691-1-joshdon@google.com
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r--kernel/sched/sched.h8
1 files changed, 8 insertions, 0 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 771f8ddb7053..b3d6e819127c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -645,6 +645,9 @@ struct cfs_rq {
int throttled;
int throttle_count;
struct list_head throttled_list;
+#ifdef CONFIG_SMP
+ struct list_head throttled_csd_list;
+#endif
#endif /* CONFIG_CFS_BANDWIDTH */
#endif /* CONFIG_FAIR_GROUP_SCHED */
};
@@ -1154,6 +1157,11 @@ struct rq {
/* Scratch cpumask to be temporarily used under rq_lock */
cpumask_var_t scratch_mask;
+
+#if defined(CONFIG_CFS_BANDWIDTH) && defined(CONFIG_SMP)
+ call_single_data_t cfsb_csd;
+ struct list_head cfsb_csd_list;
+#endif
};
#ifdef CONFIG_FAIR_GROUP_SCHED