diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-09-30 20:35:11 +0300 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-09-30 20:35:11 +0300 |
| commit | 6c7340a7a8d2b6ecad1ad108f6daa73ba1dc082f (patch) | |
| tree | e3381cf64f5094a266547552b0a9945012566bfa /include/linux | |
| parent | 755fa5b4fb36627796af19932a432d343220ec63 (diff) | |
| parent | 45b7f780739a3145aeef24d2dfa02517a6c82ed6 (diff) | |
| download | linux-6c7340a7a8d2b6ecad1ad108f6daa73ba1dc082f.tar.xz | |
Merge tag 'sched-core-2025-09-26' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
"Core scheduler changes:
- Make migrate_{en,dis}able() inline, to improve performance
(Menglong Dong)
- Move STDL_INIT() functions out-of-line (Peter Zijlstra)
- Unify the SCHED_{SMT,CLUSTER,MC} Kconfig (Peter Zijlstra)
Fair scheduling:
- Defer throttling to when tasks exit to user-space, to reduce the
chance & impact of throttle-preemption with held locks and other
resources (Aaron Lu, Valentin Schneider)
- Get rid of sched_domains_curr_level hack for tl->cpumask(), as the
warning was getting triggered on certain topologies (Peter
Zijlstra)
Misc cleanups & fixes:
- Header cleanups (Menglong Dong)
- Fix race in push_dl_task() (Harshit Agarwal)"
* tag 'sched-core-2025-09-26' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched: Fix some typos in include/linux/preempt.h
sched: Make migrate_{en,dis}able() inline
rcu: Replace preempt.h with sched.h in include/linux/rcupdate.h
arch: Add the macro COMPILE_OFFSETS to all the asm-offsets.c
sched/fair: Do not balance task to a throttled cfs_rq
sched/fair: Do not special case tasks in throttled hierarchy
sched/fair: update_cfs_group() for throttled cfs_rqs
sched/fair: Propagate load for throttled cfs_rq
sched/fair: Get rid of throttled_lb_pair()
sched/fair: Task based throttle time accounting
sched/fair: Switch to task based throttle model
sched/fair: Implement throttle task work and related helpers
sched/fair: Add related data structure for task based throttle
sched: Unify the SCHED_{SMT,CLUSTER,MC} Kconfig
sched: Move STDL_INIT() functions out-of-line
sched/fair: Get rid of sched_domains_curr_level hack for tl->cpumask()
sched/deadline: Fix race in push_dl_task()
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/preempt.h | 11 | ||||
| -rw-r--r-- | include/linux/rcupdate.h | 2 | ||||
| -rw-r--r-- | include/linux/sched.h | 118 | ||||
| -rw-r--r-- | include/linux/sched/topology.h | 29 | ||||
| -rw-r--r-- | include/linux/topology.h | 2 |
5 files changed, 134 insertions, 28 deletions
diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 1fad1c8a4c76..102202185d7a 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -372,7 +372,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, /* * Migrate-Disable and why it is undesired. * - * When a preempted task becomes elegible to run under the ideal model (IOW it + * When a preempted task becomes eligible to run under the ideal model (IOW it * becomes one of the M highest priority tasks), it might still have to wait * for the preemptee's migrate_disable() section to complete. Thereby suffering * a reduction in bandwidth in the exact duration of the migrate_disable() @@ -387,7 +387,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, * - a lower priority tasks; which under preempt_disable() could've instantly * migrated away when another CPU becomes available, is now constrained * by the ability to push the higher priority task away, which might itself be - * in a migrate_disable() section, reducing it's available bandwidth. + * in a migrate_disable() section, reducing its available bandwidth. * * IOW it trades latency / moves the interference term, but it stays in the * system, and as long as it remains unbounded, the system is not fully @@ -399,7 +399,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, * PREEMPT_RT breaks a number of assumptions traditionally held. By forcing a * number of primitives into becoming preemptible, they would also allow * migration. This turns out to break a bunch of per-cpu usage. To this end, - * all these primitives employ migirate_disable() to restore this implicit + * all these primitives employ migrate_disable() to restore this implicit * assumption. * * This is a 'temporary' work-around at best. The correct solution is getting @@ -407,7 +407,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, * per-cpu locking or short preempt-disable regions. * * The end goal must be to get rid of migrate_disable(), alternatively we need - * a schedulability theory that does not depend on abritrary migration. + * a schedulability theory that does not depend on arbitrary migration. * * * Notes on the implementation. @@ -424,8 +424,6 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, * work-conserving schedulers. * */ -extern void migrate_disable(void); -extern void migrate_enable(void); /** * preempt_disable_nested - Disable preemption inside a normally preempt disabled section @@ -471,7 +469,6 @@ static __always_inline void preempt_enable_nested(void) DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable()) DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace()) -DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) #ifdef CONFIG_PREEMPT_DYNAMIC diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 120536f4c6eb..8f346c847ee5 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -24,7 +24,7 @@ #include <linux/compiler.h> #include <linux/atomic.h> #include <linux/irqflags.h> -#include <linux/preempt.h> +#include <linux/sched.h> #include <linux/bottom_half.h> #include <linux/lockdep.h> #include <linux/cleanup.h> diff --git a/include/linux/sched.h b/include/linux/sched.h index e4ce0a76831e..cbb7340c5866 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -49,6 +49,9 @@ #include <linux/tracepoint-defs.h> #include <linux/unwind_deferred_types.h> #include <asm/kmap_size.h> +#ifndef COMPILE_OFFSETS +#include <generated/rq-offsets.h> +#endif /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; @@ -881,6 +884,11 @@ struct task_struct { #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; +#ifdef CONFIG_CFS_BANDWIDTH + struct callback_head sched_throttle_work; + struct list_head throttle_node; + bool throttled; +#endif #endif @@ -2310,4 +2318,114 @@ static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct allo #define alloc_tag_restore(_tag, _old) do {} while (0) #endif +#ifndef MODULE +#ifndef COMPILE_OFFSETS + +extern void ___migrate_enable(void); + +struct rq; +DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); + +/* + * The "struct rq" is not available here, so we can't access the + * "runqueues" with this_cpu_ptr(), as the compilation will fail in + * this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr(): + * typeof((ptr) + 0) + * + * So use arch_raw_cpu_ptr()/PERCPU_PTR() directly here. + */ +#ifdef CONFIG_SMP +#define this_rq_raw() arch_raw_cpu_ptr(&runqueues) +#else +#define this_rq_raw() PERCPU_PTR(&runqueues) +#endif +#define this_rq_pinned() (*(unsigned int *)((void *)this_rq_raw() + RQ_nr_pinned)) + +static inline void __migrate_enable(void) +{ + struct task_struct *p = current; + +#ifdef CONFIG_DEBUG_PREEMPT + /* + * Check both overflow from migrate_disable() and superfluous + * migrate_enable(). + */ + if (WARN_ON_ONCE((s16)p->migration_disabled <= 0)) + return; +#endif + + if (p->migration_disabled > 1) { + p->migration_disabled--; + return; + } + + /* + * Ensure stop_task runs either before or after this, and that + * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). + */ + guard(preempt)(); + if (unlikely(p->cpus_ptr != &p->cpus_mask)) + ___migrate_enable(); + /* + * Mustn't clear migration_disabled() until cpus_ptr points back at the + * regular cpus_mask, otherwise things that race (eg. + * select_fallback_rq) get confused. + */ + barrier(); + p->migration_disabled = 0; + this_rq_pinned()--; +} + +static inline void __migrate_disable(void) +{ + struct task_struct *p = current; + + if (p->migration_disabled) { +#ifdef CONFIG_DEBUG_PREEMPT + /* + *Warn about overflow half-way through the range. + */ + WARN_ON_ONCE((s16)p->migration_disabled < 0); +#endif + p->migration_disabled++; + return; + } + + guard(preempt)(); + this_rq_pinned()++; + p->migration_disabled = 1; +} +#else /* !COMPILE_OFFSETS */ +static inline void __migrate_disable(void) { } +static inline void __migrate_enable(void) { } +#endif /* !COMPILE_OFFSETS */ + +/* + * So that it is possible to not export the runqueues variable, define and + * export migrate_enable/migrate_disable in kernel/sched/core.c too, and use + * them for the modules. The macro "INSTANTIATE_EXPORTED_MIGRATE_DISABLE" will + * be defined in kernel/sched/core.c. + */ +#ifndef INSTANTIATE_EXPORTED_MIGRATE_DISABLE +static inline void migrate_disable(void) +{ + __migrate_disable(); +} + +static inline void migrate_enable(void) +{ + __migrate_enable(); +} +#else /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */ +extern void migrate_disable(void); +extern void migrate_enable(void); +#endif /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */ + +#else /* MODULE */ +extern void migrate_disable(void); +extern void migrate_enable(void); +#endif /* MODULE */ + +DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) + #endif diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 5263746b63e8..bbcfdf12aa6e 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -30,33 +30,24 @@ struct sd_flag_debug { }; extern const struct sd_flag_debug sd_flag_debug[]; +struct sched_domain_topology_level; + #ifdef CONFIG_SCHED_SMT -static inline int cpu_smt_flags(void) -{ - return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC; -} +extern int cpu_smt_flags(void); +extern const struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu); #endif #ifdef CONFIG_SCHED_CLUSTER -static inline int cpu_cluster_flags(void) -{ - return SD_CLUSTER | SD_SHARE_LLC; -} +extern int cpu_cluster_flags(void); +extern const struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu); #endif #ifdef CONFIG_SCHED_MC -static inline int cpu_core_flags(void) -{ - return SD_SHARE_LLC; -} +extern int cpu_core_flags(void); +extern const struct cpumask *tl_mc_mask(struct sched_domain_topology_level *tl, int cpu); #endif -#ifdef CONFIG_NUMA -static inline int cpu_numa_flags(void) -{ - return SD_NUMA; -} -#endif +extern const struct cpumask *tl_pkg_mask(struct sched_domain_topology_level *tl, int cpu); extern int arch_asym_cpu_priority(int cpu); @@ -172,7 +163,7 @@ bool cpus_equal_capacity(int this_cpu, int that_cpu); bool cpus_share_cache(int this_cpu, int that_cpu); bool cpus_share_resources(int this_cpu, int that_cpu); -typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); +typedef const struct cpumask *(*sched_domain_mask_f)(struct sched_domain_topology_level *tl, int cpu); typedef int (*sched_domain_flags_f)(void); struct sd_data { diff --git a/include/linux/topology.h b/include/linux/topology.h index 33b7fda97d39..6575af39fd10 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -260,7 +260,7 @@ static inline bool topology_is_primary_thread(unsigned int cpu) #endif -static inline const struct cpumask *cpu_cpu_mask(int cpu) +static inline const struct cpumask *cpu_node_mask(int cpu) { return cpumask_of_node(cpu_to_node(cpu)); } |
