From 50e7b416d2ab10b9771bd00a4d85df90ad2e4b37 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Thu, 28 Apr 2022 15:43:37 +0100 Subject: sched/fair: Remove sched_trace_*() helper functions We no longer need them as we can use DWARF debug info or BTF + pahole to re-generate the required structs to compile against them for a given kernel. This moves the burden of maintaining these helper functions to the module. https://github.com/qais-yousef/sched_tp Note that pahole v1.15 is required at least for using DWARF. And for BTF v1.23 which is not yet released will be required. There's alignment problem that will lead to crashes in earlier versions when used with BTF. We should have enough infrastructure to make these helper functions now obsolete, so remove them. [Rewrote commit message to reflect the new alternative] Signed-off-by: Dietmar Eggemann Signed-off-by: Qais Yousef Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220428144338.479094-2-qais.yousef@arm.com --- include/linux/sched.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 67f06f72c50e..fc74ea2578b7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2378,20 +2378,6 @@ static inline void rseq_syscall(struct pt_regs *regs) #endif -const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq); -char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len); -int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq); - -const struct sched_avg *sched_trace_rq_avg_rt(struct rq *rq); -const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq); -const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq); - -int sched_trace_rq_cpu(struct rq *rq); -int sched_trace_rq_cpu_capacity(struct rq *rq); -int sched_trace_rq_nr_running(struct rq *rq); - -const struct cpumask *sched_trace_rd_span(struct root_domain *rd); - #ifdef CONFIG_SCHED_CORE extern void sched_core_free(struct task_struct *tsk); extern void sched_core_fork(struct task_struct *p); -- cgit v1.2.3 From d664e399128bd78b905ff480917e2c2d4949e101 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Apr 2022 15:31:02 +0200 Subject: sched: Fix missing prototype warnings A W=1 build emits more than a dozen missing prototype warnings related to scheduler and scheduler specific includes. Reported-by: kernel test robot Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220413133024.249118058@linutronix.de --- include/linux/sched.h | 2 ++ kernel/sched/build_policy.c | 2 ++ kernel/sched/build_utility.c | 1 + kernel/sched/core.c | 3 +++ kernel/sched/deadline.c | 2 -- kernel/sched/fair.c | 1 + kernel/sched/sched.h | 8 ++------ kernel/sched/smp.h | 6 ++++++ kernel/stop_machine.c | 2 -- 9 files changed, 17 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index fc74ea2578b7..a27316f5f737 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2388,4 +2388,6 @@ static inline void sched_core_free(struct task_struct *tsk) { } static inline void sched_core_fork(struct task_struct *p) { } #endif +extern void sched_set_stop_task(int cpu, struct task_struct *stop); + #endif diff --git a/kernel/sched/build_policy.c b/kernel/sched/build_policy.c index e0104b45029a..d9dc9ab3773f 100644 --- a/kernel/sched/build_policy.c +++ b/kernel/sched/build_policy.c @@ -15,6 +15,7 @@ /* Headers: */ #include #include +#include #include #include @@ -31,6 +32,7 @@ #include #include "sched.h" +#include "smp.h" #include "autogroup.h" #include "stats.h" diff --git a/kernel/sched/build_utility.c b/kernel/sched/build_utility.c index eec0849b2aae..99bdd96f454f 100644 --- a/kernel/sched/build_utility.c +++ b/kernel/sched/build_utility.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 068c088e9584..e644578bc871 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -26,7 +26,10 @@ #include #include #include +#include #include +#include +#include #include #include #include diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index fb4255ae0b2c..6ae423627a7a 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1220,8 +1220,6 @@ int dl_runtime_exceeded(struct sched_dl_entity *dl_se) return (dl_se->runtime <= 0); } -extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); - /* * This function implements the GRUB accounting rule: * according to the GRUB reclaiming algorithm, the runtime is diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6ca054b11a16..bc9f6e94c84e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 762be73972bd..4784898e8f83 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1833,12 +1833,7 @@ static inline void dirty_sched_domain_sysctl(int cpu) #endif extern int sched_update_scaling(void); - -extern void flush_smp_call_function_from_idle(void); - -#else /* !CONFIG_SMP: */ -static inline void flush_smp_call_function_from_idle(void) { } -#endif +#endif /* CONFIG_SMP */ #include "stats.h" @@ -2315,6 +2310,7 @@ extern void resched_cpu(int cpu); extern struct rt_bandwidth def_rt_bandwidth; extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); +extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime); extern void init_dl_task_timer(struct sched_dl_entity *dl_se); diff --git a/kernel/sched/smp.h b/kernel/sched/smp.h index 9620e323162c..5719bf9280e9 100644 --- a/kernel/sched/smp.h +++ b/kernel/sched/smp.h @@ -7,3 +7,9 @@ extern void sched_ttwu_pending(void *arg); extern void send_call_function_single_ipi(int cpu); + +#ifdef CONFIG_SMP +extern void flush_smp_call_function_from_idle(void); +#else +static inline void flush_smp_call_function_from_idle(void) { } +#endif diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index cbc30271ea4d..6da7b91af353 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -535,8 +535,6 @@ void stop_machine_park(int cpu) kthread_park(stopper->thread); } -extern void sched_set_stop_task(int cpu, struct task_struct *stop); - static void cpu_stop_create(unsigned int cpu) { sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu)); -- cgit v1.2.3 From 1a90bfd220201fbe050dfc15deaac20ca5f15638 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 13 Apr 2022 15:31:05 +0200 Subject: smp: Make softirq handling RT safe in flush_smp_call_function_queue() flush_smp_call_function_queue() invokes do_softirq() which is not available on PREEMPT_RT. flush_smp_call_function_queue() is invoked from the idle task and the migration task with preemption or interrupts disabled. So RT kernels cannot process soft interrupts in that context as that has to acquire 'sleeping spinlocks' which is not possible with preemption or interrupts disabled and forbidden from the idle task anyway. The currently known SMP function call which raises a soft interrupt is in the block layer, but this functionality is not enabled on RT kernels due to latency and performance reasons. RT could wake up ksoftirqd unconditionally, but this wants to be avoided if there were soft interrupts pending already when this is invoked in the context of the migration task. The migration task might have preempted a threaded interrupt handler which raised a soft interrupt, but did not reach the local_bh_enable() to process it. The "running" ksoftirqd might prevent the handling in the interrupt thread context which is causing latency issues. Add a new function which handles this case explicitely for RT and falls back to do_softirq() on !RT kernels. In the RT case this warns when one of the flushed SMP function calls raised a soft interrupt so this can be investigated. [ tglx: Moved the RT part out of SMP code ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/YgKgL6aPj8aBES6G@linutronix.de Link: https://lore.kernel.org/r/20220413133024.356509586@linutronix.de --- include/linux/interrupt.h | 9 +++++++++ kernel/smp.c | 5 ++++- kernel/softirq.c | 13 +++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f40754caaefa..a49fe8d88676 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -607,6 +607,15 @@ struct softirq_action asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); +#ifdef CONFIG_PREEMPT_RT +extern void do_softirq_post_smp_call_flush(unsigned int was_pending); +#else +static inline void do_softirq_post_smp_call_flush(unsigned int unused) +{ + do_softirq(); +} +#endif + extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); extern void __raise_softirq_irqoff(unsigned int nr); diff --git a/kernel/smp.c b/kernel/smp.c index 8e85f22ed538..d54c2fe51ada 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -696,6 +696,7 @@ static void __flush_smp_call_function_queue(bool warn_cpu_offline) */ void flush_smp_call_function_queue(void) { + unsigned int was_pending; unsigned long flags; if (llist_empty(this_cpu_ptr(&call_single_queue))) @@ -704,9 +705,11 @@ void flush_smp_call_function_queue(void) cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->idle, CFD_SEQ_NOCPU, smp_processor_id(), CFD_SEQ_IDLE); local_irq_save(flags); + /* Get the already pending soft interrupts for RT enabled kernels */ + was_pending = local_softirq_pending(); __flush_smp_call_function_queue(true); if (local_softirq_pending()) - do_softirq(); + do_softirq_post_smp_call_flush(was_pending); local_irq_restore(flags); } diff --git a/kernel/softirq.c b/kernel/softirq.c index fac801815554..9f0aef8aa9ff 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -294,6 +294,19 @@ static inline void invoke_softirq(void) wakeup_softirqd(); } +/* + * flush_smp_call_function_queue() can raise a soft interrupt in a function + * call. On RT kernels this is undesired and the only known functionality + * in the block layer which does this is disabled on RT. If soft interrupts + * get raised which haven't been raised before the flush, warn so it can be + * investigated. + */ +void do_softirq_post_smp_call_flush(unsigned int was_pending) +{ + if (WARN_ON_ONCE(was_pending != local_softirq_pending())) + invoke_softirq(); +} + #else /* CONFIG_PREEMPT_RT */ /* -- cgit v1.2.3 From 546a3fee174969ff323d70ff27b1ef181f0d7ceb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 May 2022 13:46:54 +0200 Subject: sched: Reverse sched_class layout Because GCC-12 is fully stupid about array bounds and it's just really hard to get a solid array definition from a linker script, flip the array order to avoid needing negative offsets :-/ This makes the whole relational pointer magic a little less obvious, but alas. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/YoOLLmLG7HRTXeEm@hirez.programming.kicks-ass.net --- include/asm-generic/vmlinux.lds.h | 12 ++++++------ kernel/sched/core.c | 12 ++++++------ kernel/sched/sched.h | 15 ++++++++------- 3 files changed, 20 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 69138e9db787..7515a465ec03 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -126,13 +126,13 @@ */ #define SCHED_DATA \ STRUCT_ALIGN(); \ - __begin_sched_classes = .; \ - *(__idle_sched_class) \ - *(__fair_sched_class) \ - *(__rt_sched_class) \ - *(__dl_sched_class) \ + __sched_class_highest = .; \ *(__stop_sched_class) \ - __end_sched_classes = .; + *(__dl_sched_class) \ + *(__rt_sched_class) \ + *(__fair_sched_class) \ + *(__idle_sched_class) \ + __sched_class_lowest = .; /* The actual configuration determine if the init/exit sections * are handled as text/data or they can be discarded (which diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2c9f5e97682f..66c4e5922fe1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2193,7 +2193,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) { if (p->sched_class == rq->curr->sched_class) rq->curr->sched_class->check_preempt_curr(rq, p, flags); - else if (p->sched_class > rq->curr->sched_class) + else if (sched_class_above(p->sched_class, rq->curr->sched_class)) resched_curr(rq); /* @@ -5692,7 +5692,7 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) * higher scheduling class, because otherwise those lose the * opportunity to pull in more work from other CPUs. */ - if (likely(prev->sched_class <= &fair_sched_class && + if (likely(!sched_class_above(prev->sched_class, &fair_sched_class) && rq->nr_running == rq->cfs.h_nr_running)) { p = pick_next_task_fair(rq, prev, rf); @@ -9472,11 +9472,11 @@ void __init sched_init(void) int i; /* Make sure the linker didn't screw up */ - BUG_ON(&idle_sched_class + 1 != &fair_sched_class || - &fair_sched_class + 1 != &rt_sched_class || - &rt_sched_class + 1 != &dl_sched_class); + BUG_ON(&idle_sched_class != &fair_sched_class + 1 || + &fair_sched_class != &rt_sched_class + 1 || + &rt_sched_class != &dl_sched_class + 1); #ifdef CONFIG_SMP - BUG_ON(&dl_sched_class + 1 != &stop_sched_class); + BUG_ON(&dl_sched_class != &stop_sched_class + 1); #endif wait_bit_init(); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index fe4d1acb7e38..2ce18584dca3 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2177,6 +2177,8 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next) * * include/asm-generic/vmlinux.lds.h * + * *CAREFUL* they are laid out in *REVERSE* order!!! + * * Also enforce alignment on the instance, not the type, to guarantee layout. */ #define DEFINE_SCHED_CLASS(name) \ @@ -2185,17 +2187,16 @@ const struct sched_class name##_sched_class \ __section("__" #name "_sched_class") /* Defined in include/asm-generic/vmlinux.lds.h */ -extern struct sched_class __begin_sched_classes[]; -extern struct sched_class __end_sched_classes[]; - -#define sched_class_highest (__end_sched_classes - 1) -#define sched_class_lowest (__begin_sched_classes - 1) +extern struct sched_class __sched_class_highest[]; +extern struct sched_class __sched_class_lowest[]; #define for_class_range(class, _from, _to) \ - for (class = (_from); class != (_to); class--) + for (class = (_from); class < (_to); class++) #define for_each_class(class) \ - for_class_range(class, sched_class_highest, sched_class_lowest) + for_class_range(class, __sched_class_highest, __sched_class_lowest) + +#define sched_class_above(_a, _b) ((_a) < (_b)) extern const struct sched_class stop_sched_class; extern const struct sched_class dl_sched_class; -- cgit v1.2.3 From 991d8d8142cad94f9c5c05db25e67fa83d6f772a Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Fri, 13 May 2022 11:34:33 +0200 Subject: topology: Remove unused cpu_cluster_mask() default_topology[] uses cpu_clustergroup_mask() for the CLS level (guarded by CONFIG_SCHED_CLUSTER) which is currently provided by x86 (arch/x86/kernel/smpboot.c) and arm64 (drivers/base/arch_topology.c). Fixes: 778c558f49a2c ("sched: Add cluster scheduler level in core and related Kconfig for ARM64") Signed-off-by: Dietmar Eggemann Signed-off-by: Peter Zijlstra (Intel) Acked-by: Barry Song Link: https://lore.kernel.org/r/20220513093433.425163-1-dietmar.eggemann@arm.com --- include/linux/topology.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/topology.h b/include/linux/topology.h index f19bc3626297..4564faafd0e1 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -240,13 +240,6 @@ static inline const struct cpumask *cpu_smt_mask(int cpu) } #endif -#if defined(CONFIG_SCHED_CLUSTER) && !defined(cpu_cluster_mask) -static inline const struct cpumask *cpu_cluster_mask(int cpu) -{ - return topology_cluster_cpumask(cpu); -} -#endif - static inline const struct cpumask *cpu_cpu_mask(int cpu) { return cpumask_of_node(cpu_to_node(cpu)); -- cgit v1.2.3