diff options
-rw-r--r-- | drivers/cpufreq/Kconfig | 5 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_governor.c | 2 | ||||
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 2 | ||||
-rw-r--r-- | include/linux/sched.h | 12 | ||||
-rw-r--r-- | kernel/sched/cpufreq.c | 2 | ||||
-rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 67 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 4 | ||||
-rw-r--r-- | kernel/sched/fair.c | 12 | ||||
-rw-r--r-- | kernel/sched/rt.c | 4 | ||||
-rw-r--r-- | kernel/sched/sched.h | 31 |
10 files changed, 67 insertions, 74 deletions
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 74919aa81dcb..4dc95250cf4e 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -194,7 +194,7 @@ config CPU_FREQ_GOV_CONSERVATIVE If in doubt, say N. config CPU_FREQ_GOV_SCHEDUTIL - tristate "'schedutil' cpufreq policy governor" + bool "'schedutil' cpufreq policy governor" depends on CPU_FREQ && SMP select CPU_FREQ_GOV_ATTR_SET select IRQ_WORK @@ -208,9 +208,6 @@ config CPU_FREQ_GOV_SCHEDUTIL frequency tipping point is at utilization/capacity equal to 80% in both cases. - To compile this driver as a module, choose M here: the module will - be called cpufreq_schedutil. - If in doubt, say N. comment "CPU frequency scaling drivers" diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index e415349ab31b..642dd0f183a8 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -260,7 +260,7 @@ static void dbs_irq_work(struct irq_work *irq_work) } static void dbs_update_util_handler(struct update_util_data *data, u64 time, - unsigned long util, unsigned long max) + unsigned int flags) { struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util); struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index be9eade147f2..bdbe9369146b 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1329,7 +1329,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) } static void intel_pstate_update_util(struct update_util_data *data, u64 time, - unsigned long util, unsigned long max) + unsigned int flags) { struct cpudata *cpu = container_of(data, struct cpudata, update_util); u64 delta_ns = time - cpu->sample.time; diff --git a/include/linux/sched.h b/include/linux/sched.h index 62c68e513e39..b0fa726b7f31 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3469,15 +3469,19 @@ static inline unsigned long rlimit_max(unsigned int limit) return task_rlimit_max(current, limit); } +#define SCHED_CPUFREQ_RT (1U << 0) +#define SCHED_CPUFREQ_DL (1U << 1) + +#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL) + #ifdef CONFIG_CPU_FREQ struct update_util_data { - void (*func)(struct update_util_data *data, - u64 time, unsigned long util, unsigned long max); + void (*func)(struct update_util_data *data, u64 time, unsigned int flags); }; void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, - void (*func)(struct update_util_data *data, u64 time, - unsigned long util, unsigned long max)); + void (*func)(struct update_util_data *data, u64 time, + unsigned int flags)); void cpufreq_remove_update_util_hook(int cpu); #endif /* CONFIG_CPU_FREQ */ diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c index 1141954e73b4..dbc51442ecbc 100644 --- a/kernel/sched/cpufreq.c +++ b/kernel/sched/cpufreq.c @@ -33,7 +33,7 @@ DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); */ void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, void (*func)(struct update_util_data *data, u64 time, - unsigned long util, unsigned long max)) + unsigned int flags)) { if (WARN_ON(!data || !func)) return; diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index a84641b222c1..60d985f4dc47 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -12,7 +12,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/cpufreq.h> -#include <linux/module.h> #include <linux/slab.h> #include <trace/events/power.h> @@ -53,6 +52,7 @@ struct sugov_cpu { unsigned long util; unsigned long max; u64 last_update; + unsigned int flags; }; static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); @@ -144,24 +144,39 @@ static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, return cpufreq_driver_resolve_freq(policy, freq); } +static void sugov_get_util(unsigned long *util, unsigned long *max) +{ + struct rq *rq = this_rq(); + unsigned long cfs_max = rq->cpu_capacity_orig; + + *util = min(rq->cfs.avg.util_avg, cfs_max); + *max = cfs_max; +} + static void sugov_update_single(struct update_util_data *hook, u64 time, - unsigned long util, unsigned long max) + unsigned int flags) { struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; + unsigned long util, max; unsigned int next_f; if (!sugov_should_update_freq(sg_policy, time)) return; - next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq : - get_next_freq(sg_cpu, util, max); + if (flags & SCHED_CPUFREQ_RT_DL) { + next_f = policy->cpuinfo.max_freq; + } else { + sugov_get_util(&util, &max); + next_f = get_next_freq(sg_cpu, util, max); + } sugov_update_commit(sg_policy, time, next_f); } static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, - unsigned long util, unsigned long max) + unsigned long util, unsigned long max, + unsigned int flags) { struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; @@ -169,7 +184,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 last_freq_update_time = sg_policy->last_freq_update_time; unsigned int j; - if (util == ULONG_MAX) + if (flags & SCHED_CPUFREQ_RT_DL) return max_f; for_each_cpu(j, policy->cpus) { @@ -192,10 +207,10 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, if (delta_ns > TICK_NSEC) continue; - j_util = j_sg_cpu->util; - if (j_util == ULONG_MAX) + if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL) return max_f; + j_util = j_sg_cpu->util; j_max = j_sg_cpu->max; if (j_util * max > j_max * util) { util = j_util; @@ -207,20 +222,24 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, } static void sugov_update_shared(struct update_util_data *hook, u64 time, - unsigned long util, unsigned long max) + unsigned int flags) { struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); struct sugov_policy *sg_policy = sg_cpu->sg_policy; + unsigned long util, max; unsigned int next_f; + sugov_get_util(&util, &max); + raw_spin_lock(&sg_policy->update_lock); sg_cpu->util = util; sg_cpu->max = max; + sg_cpu->flags = flags; sg_cpu->last_update = time; if (sugov_should_update_freq(sg_policy, time)) { - next_f = sugov_next_freq_shared(sg_cpu, util, max); + next_f = sugov_next_freq_shared(sg_cpu, util, max, flags); sugov_update_commit(sg_policy, time, next_f); } @@ -444,8 +463,9 @@ static int sugov_start(struct cpufreq_policy *policy) sg_cpu->sg_policy = sg_policy; if (policy_is_shared(policy)) { - sg_cpu->util = ULONG_MAX; + sg_cpu->util = 0; sg_cpu->max = 0; + sg_cpu->flags = SCHED_CPUFREQ_RT; sg_cpu->last_update = 0; sg_cpu->cached_raw_freq = 0; cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, @@ -495,28 +515,15 @@ static struct cpufreq_governor schedutil_gov = { .limits = sugov_limits, }; -static int __init sugov_module_init(void) -{ - return cpufreq_register_governor(&schedutil_gov); -} - -static void __exit sugov_module_exit(void) -{ - cpufreq_unregister_governor(&schedutil_gov); -} - -MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>"); -MODULE_DESCRIPTION("Utilization-based CPU frequency selection"); -MODULE_LICENSE("GPL"); - #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL struct cpufreq_governor *cpufreq_default_governor(void) { return &schedutil_gov; } - -fs_initcall(sugov_module_init); -#else -module_init(sugov_module_init); #endif -module_exit(sugov_module_exit); + +static int __init sugov_register(void) +{ + return cpufreq_register_governor(&schedutil_gov); +} +fs_initcall(sugov_register); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 1ce8867283dc..4464cc3e4f3d 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -735,9 +735,9 @@ static void update_curr_dl(struct rq *rq) return; } - /* kick cpufreq (see the comment in linux/cpufreq.h). */ + /* kick cpufreq (see the comment in kernel/sched/sched.h). */ if (cpu_of(rq) == smp_processor_id()) - cpufreq_trigger_update(rq_clock(rq)); + cpufreq_update_util(rq_clock(rq), SCHED_CPUFREQ_DL); schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 039de34f1521..f91fa5796e50 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2875,11 +2875,8 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {} static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) { - struct rq *rq = rq_of(cfs_rq); - int cpu = cpu_of(rq); - - if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) { - unsigned long max = rq->cpu_capacity_orig; + if (&this_rq()->cfs == cfs_rq) { + struct rq *rq = rq_of(cfs_rq); /* * There are a few boundary cases this might miss but it should @@ -2897,8 +2894,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) * * See cpu_util(). */ - cpufreq_update_util(rq_clock(rq), - min(cfs_rq->avg.util_avg, max), max); + cpufreq_update_util(rq_clock(rq), 0); } } @@ -3162,7 +3158,7 @@ static inline void update_load_avg(struct sched_entity *se, int not_used) struct cfs_rq *cfs_rq = cfs_rq_of(se); struct rq *rq = rq_of(cfs_rq); - cpufreq_trigger_update(rq_clock(rq)); + cpufreq_update_util(rq_clock(rq), 0); } static inline void diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index d5690b722691..8a9cd9ba5153 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -957,9 +957,9 @@ static void update_curr_rt(struct rq *rq) if (unlikely((s64)delta_exec <= 0)) return; - /* Kick cpufreq (see the comment in linux/cpufreq.h). */ + /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ if (cpu_of(rq) == smp_processor_id()) - cpufreq_trigger_update(rq_clock(rq)); + cpufreq_update_util(rq_clock(rq), SCHED_CPUFREQ_RT); schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c64fc5114004..82fc5542708c 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1764,26 +1764,12 @@ DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); /** * cpufreq_update_util - Take a note about CPU utilization changes. * @time: Current time. - * @util: Current utilization. - * @max: Utilization ceiling. + * @flags: Update reason flags. * - * This function is called by the scheduler on every invocation of - * update_load_avg() on the CPU whose utilization is being updated. + * This function is called by the scheduler on the CPU whose utilization is + * being updated. * * It can only be called from RCU-sched read-side critical sections. - */ -static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) -{ - struct update_util_data *data; - - data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); - if (data) - data->func(data, time, util, max); -} - -/** - * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed. - * @time: Current time. * * The way cpufreq is currently arranged requires it to evaluate the CPU * performance state (frequency/voltage) on a regular basis to prevent it from @@ -1797,13 +1783,16 @@ static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned lo * but that really is a band-aid. Going forward it should be replaced with * solutions targeted more specifically at RT and DL tasks. */ -static inline void cpufreq_trigger_update(u64 time) +static inline void cpufreq_update_util(u64 time, unsigned int flags) { - cpufreq_update_util(time, ULONG_MAX, 0); + struct update_util_data *data; + + data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); + if (data) + data->func(data, time, flags); } #else -static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {} -static inline void cpufreq_trigger_update(u64 time) {} +static inline void cpufreq_update_util(u64 time, unsigned int flags) {} #endif /* CONFIG_CPU_FREQ */ #ifdef arch_scale_freq_capacity |