diff options
-rw-r--r-- | kernel/sched/autogroup.c | 12 | ||||
-rw-r--r-- | kernel/sched/autogroup.h | 8 | ||||
-rw-r--r-- | kernel/sched/clock.c | 22 | ||||
-rw-r--r-- | kernel/sched/core.c | 6 | ||||
-rw-r--r-- | kernel/sched/cpuacct.c | 20 | ||||
-rw-r--r-- | kernel/sched/cpudeadline.c | 13 | ||||
-rw-r--r-- | kernel/sched/cpudeadline.h | 27 | ||||
-rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 129 | ||||
-rw-r--r-- | kernel/sched/cpupri.c | 9 | ||||
-rw-r--r-- | kernel/sched/cpupri.h | 24 | ||||
-rw-r--r-- | kernel/sched/cputime.c | 48 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 51 | ||||
-rw-r--r-- | kernel/sched/debug.c | 88 | ||||
-rw-r--r-- | kernel/sched/fair.c | 183 | ||||
-rw-r--r-- | kernel/sched/idle.c | 6 | ||||
-rw-r--r-- | kernel/sched/idle_task.c | 3 | ||||
-rw-r--r-- | kernel/sched/isolation.c | 2 | ||||
-rw-r--r-- | kernel/sched/loadavg.c | 30 | ||||
-rw-r--r-- | kernel/sched/membarrier.c | 18 | ||||
-rw-r--r-- | kernel/sched/rt.c | 25 | ||||
-rw-r--r-- | kernel/sched/sched.h | 529 | ||||
-rw-r--r-- | kernel/sched/stats.c | 7 | ||||
-rw-r--r-- | kernel/sched/stats.h | 86 | ||||
-rw-r--r-- | kernel/sched/stop_task.c | 3 | ||||
-rw-r--r-- | kernel/sched/swait.c | 3 | ||||
-rw-r--r-- | kernel/sched/topology.c | 42 | ||||
-rw-r--r-- | kernel/sched/wait.c | 4 | ||||
-rw-r--r-- | kernel/sched/wait_bit.c | 18 |
28 files changed, 706 insertions, 710 deletions
diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c index bb4b9fe026a1..ff1b7b647b86 100644 --- a/kernel/sched/autogroup.c +++ b/kernel/sched/autogroup.c @@ -168,18 +168,19 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag) autogroup_kref_put(prev); } -/* Allocates GFP_KERNEL, cannot be called under any spinlock */ +/* Allocates GFP_KERNEL, cannot be called under any spinlock: */ void sched_autogroup_create_attach(struct task_struct *p) { struct autogroup *ag = autogroup_create(); autogroup_move_group(p, ag); - /* drop extra reference added by autogroup_create() */ + + /* Drop extra reference added by autogroup_create(): */ autogroup_kref_put(ag); } EXPORT_SYMBOL(sched_autogroup_create_attach); -/* Cannot be called under siglock. Currently has no users */ +/* Cannot be called under siglock. Currently has no users: */ void sched_autogroup_detach(struct task_struct *p) { autogroup_move_group(p, &autogroup_default); @@ -202,7 +203,6 @@ static int __init setup_autogroup(char *str) return 1; } - __setup("noautogroup", setup_autogroup); #ifdef CONFIG_PROC_FS @@ -224,7 +224,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) if (nice < 0 && !can_nice(current, nice)) return -EPERM; - /* this is a heavy operation taking global locks.. */ + /* This is a heavy operation, taking global locks.. */ if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next)) return -EAGAIN; @@ -267,4 +267,4 @@ int autogroup_path(struct task_group *tg, char *buf, int buflen) return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); } -#endif /* CONFIG_SCHED_DEBUG */ +#endif diff --git a/kernel/sched/autogroup.h b/kernel/sched/autogroup.h index 27cd22b89824..49e6ec9559cf 100644 --- a/kernel/sched/autogroup.h +++ b/kernel/sched/autogroup.h @@ -7,9 +7,9 @@ struct autogroup { /* - * reference doesn't mean how many thread attach to this - * autogroup now. It just stands for the number of task - * could use this autogroup. + * Reference doesn't mean how many threads attach to this + * autogroup now. It just stands for the number of tasks + * which could use this autogroup. */ struct kref kref; struct task_group *tg; @@ -56,11 +56,9 @@ autogroup_task_group(struct task_struct *p, struct task_group *tg) return tg; } -#ifdef CONFIG_SCHED_DEBUG static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) { return 0; } -#endif #endif /* CONFIG_SCHED_AUTOGROUP */ diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index e086babe6c61..7da6bec8a2ff 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -1,5 +1,5 @@ /* - * sched_clock for unstable cpu clocks + * sched_clock() for unstable CPU clocks * * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra * @@ -11,7 +11,7 @@ * Guillaume Chazarain <guichaz@gmail.com> * * - * What: + * What this file implements: * * cpu_clock(i) provides a fast (execution time) high resolution * clock with bounded drift between CPUs. The value of cpu_clock(i) @@ -26,11 +26,11 @@ * at 0 on boot (but people really shouldn't rely on that). * * cpu_clock(i) -- can be used from any context, including NMI. - * local_clock() -- is cpu_clock() on the current cpu. + * local_clock() -- is cpu_clock() on the current CPU. * * sched_clock_cpu(i) * - * How: + * How it is implemented: * * The implementation either uses sched_clock() when * !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the @@ -302,21 +302,21 @@ again: * cmpxchg64 below only protects one readout. * * We must reread via sched_clock_local() in the retry case on - * 32bit as an NMI could use sched_clock_local() via the + * 32-bit kernels as an NMI could use sched_clock_local() via the * tracer and hit between the readout of - * the low32bit and the high 32bit portion. + * the low 32-bit and the high 32-bit portion. */ this_clock = sched_clock_local(my_scd); /* - * We must enforce atomic readout on 32bit, otherwise the - * update on the remote cpu can hit inbetween the readout of - * the low32bit and the high 32bit portion. + * We must enforce atomic readout on 32-bit, otherwise the + * update on the remote CPU can hit inbetween the readout of + * the low 32-bit and the high 32-bit portion. */ remote_clock = cmpxchg64(&scd->clock, 0, 0); #else /* - * On 64bit the read of [my]scd->clock is atomic versus the - * update, so we can avoid the above 32bit dance. + * On 64-bit kernels the read of [my]scd->clock is atomic versus the + * update, so we can avoid the above 32-bit dance. */ sched_clock_local(my_scd); again: diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8fff4f16c510..9427b59551c1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -135,7 +135,7 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) * [L] ->on_rq * RELEASE (rq->lock) * - * If we observe the old cpu in task_rq_lock, the acquire of + * If we observe the old CPU in task_rq_lock, the acquire of * the old rq->lock will fully serialize against the stores. * * If we observe the new CPU in task_rq_lock, the acquire will @@ -1457,7 +1457,7 @@ EXPORT_SYMBOL_GPL(kick_process); * * - cpu_active must be a subset of cpu_online * - * - on cpu-up we allow per-cpu kthreads on the online && !active cpu, + * - on CPU-up we allow per-CPU kthreads on the online && !active CPU, * see __set_cpus_allowed_ptr(). At this point the newly online * CPU isn't yet part of the sched domains, and balancing will not * see it. @@ -3037,7 +3037,7 @@ unsigned long long task_sched_runtime(struct task_struct *p) #if defined(CONFIG_64BIT) && defined(CONFIG_SMP) /* - * 64-bit doesn't need locks to atomically read a 64bit value. + * 64-bit doesn't need locks to atomically read a 64-bit value. * So we have a optimization chance when the task's delta_exec is 0. * Reading ->on_cpu is racy, but this is ok. * diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index 44ab32a4fab6..1abd325e733a 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -18,7 +18,7 @@ * (balbir@in.ibm.com). */ -/* Time spent by the tasks of the cpu accounting group executing in ... */ +/* Time spent by the tasks of the CPU accounting group executing in ... */ enum cpuacct_stat_index { CPUACCT_STAT_USER, /* ... user mode */ CPUACCT_STAT_SYSTEM, /* ... kernel mode */ @@ -35,12 +35,12 @@ struct cpuacct_usage { u64 usages[CPUACCT_STAT_NSTATS]; }; -/* track cpu usage of a group of tasks and its child groups */ +/* track CPU usage of a group of tasks and its child groups */ struct cpuacct { - struct cgroup_subsys_state css; - /* cpuusage holds pointer to a u64-type object on every cpu */ - struct cpuacct_usage __percpu *cpuusage; - struct kernel_cpustat __percpu *cpustat; + struct cgroup_subsys_state css; + /* cpuusage holds pointer to a u64-type object on every CPU */ + struct cpuacct_usage __percpu *cpuusage; + struct kernel_cpustat __percpu *cpustat; }; static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css) @@ -48,7 +48,7 @@ static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css) return css ? container_of(css, struct cpuacct, css) : NULL; } -/* return cpu accounting group to which this task belongs */ +/* Return CPU accounting group to which this task belongs */ static inline struct cpuacct *task_ca(struct task_struct *tsk) { return css_ca(task_css(tsk, cpuacct_cgrp_id)); @@ -65,7 +65,7 @@ static struct cpuacct root_cpuacct = { .cpuusage = &root_cpuacct_cpuusage, }; -/* create a new cpu accounting group */ +/* Create a new CPU accounting group */ static struct cgroup_subsys_state * cpuacct_css_alloc(struct cgroup_subsys_state *parent_css) { @@ -96,7 +96,7 @@ out: return ERR_PTR(-ENOMEM); } -/* destroy an existing cpu accounting group */ +/* Destroy an existing CPU accounting group */ static void cpuacct_css_free(struct cgroup_subsys_state *css) { struct cpuacct *ca = css_ca(css); @@ -162,7 +162,7 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) #endif } -/* return total cpu usage (in nanoseconds) of a group */ +/* Return total CPU usage (in nanoseconds) of a group */ static u64 __cpuusage_read(struct cgroup_subsys_state *css, enum cpuacct_stat_index index) { diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index 6a9defebbb54..cb172b61d191 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c @@ -10,7 +10,6 @@ * as published by the Free Software Foundation; version 2 * of the License. */ - #include <linux/gfp.h> #include <linux/kernel.h> #include <linux/slab.h> @@ -147,9 +146,9 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, } /* - * cpudl_clear - remove a cpu from the cpudl max-heap + * cpudl_clear - remove a CPU from the cpudl max-heap * @cp: the cpudl max-heap context - * @cpu: the target cpu + * @cpu: the target CPU * * Notes: assumes cpu_rq(cpu)->lock is locked * @@ -188,8 +187,8 @@ void cpudl_clear(struct cpudl *cp, int cpu) /* * cpudl_set - update the cpudl max-heap * @cp: the cpudl max-heap context - * @cpu: the target cpu - * @dl: the new earliest deadline for this cpu + * @cpu: the target CPU + * @dl: the new earliest deadline for this CPU * * Notes: assumes cpu_rq(cpu)->lock is locked * @@ -224,7 +223,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl) /* * cpudl_set_freecpu - Set the cpudl.free_cpus * @cp: the cpudl max-heap context - * @cpu: rd attached cpu + * @cpu: rd attached CPU */ void cpudl_set_freecpu(struct cpudl *cp, int cpu) { @@ -234,7 +233,7 @@ void cpudl_set_freecpu(struct cpudl *cp, int cpu) /* * cpudl_clear_freecpu - Clear the cpudl.free_cpus * @cp: the cpudl max-heap context - * @cpu: rd attached cpu + * @cpu: rd attached CPU */ void cpudl_clear_freecpu(struct cpudl *cp, int cpu) { diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h index b010d26e108e..c26e7a0e5a66 100644 --- a/kernel/sched/cpudeadline.h +++ b/kernel/sched/cpudeadline.h @@ -1,35 +1,28 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_CPUDL_H -#define _LINUX_CPUDL_H - #include <linux/sched.h> #include <linux/sched/deadline.h> -#define IDX_INVALID -1 +#define IDX_INVALID -1 struct cpudl_item { - u64 dl; - int cpu; - int idx; + u64 dl; + int cpu; + int idx; }; struct cpudl { - raw_spinlock_t lock; - int size; - cpumask_var_t free_cpus; - struct cpudl_item *elements; + raw_spinlock_t lock; + int size; + cpumask_var_t free_cpus; + struct cpudl_item *elements; }; - #ifdef CONFIG_SMP -int cpudl_find(struct cpudl *cp, struct task_struct *p, - struct cpumask *later_mask); +int cpudl_find(struct cpudl *cp, struct task_struct *p, struct cpumask *later_mask); void cpudl_set(struct cpudl *cp, int cpu, u64 dl); void cpudl_clear(struct cpudl *cp, int cpu); -int cpudl_init(struct cpudl *cp); +int cpudl_init(struct cpudl *cp); void cpudl_set_freecpu(struct cpudl *cp, int cpu); void cpudl_clear_freecpu(struct cpudl *cp, int cpu); void cpudl_cleanup(struct cpudl *cp); #endif /* CONFIG_SMP */ - -#endif /* _LINUX_CPUDL_H */ diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 7936f548e071..0dad8160e00f 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -20,52 +20,52 @@ #include "sched.h" struct sugov_tunables { - struct gov_attr_set attr_set; - unsigned int rate_limit_us; + struct gov_attr_set attr_set; + unsigned int rate_limit_us; }; struct sugov_policy { - struct cpufreq_policy *policy; - - struct sugov_tunables *tunables; - struct list_head tunables_hook; - - raw_spinlock_t update_lock; /* For shared policies */ - u64 last_freq_update_time; - s64 freq_update_delay_ns; - unsigned int next_freq; - unsigned int cached_raw_freq; - - /* The next fields are only needed if fast switch cannot be used. */ - struct irq_work irq_work; - struct kthread_work work; - struct mutex work_lock; - struct kthread_worker worker; - struct task_struct *thread; - bool work_in_progress; - - bool need_freq_update; + struct cpufreq_policy *policy; + + struct sugov_tunables *tunables; + struct list_head tunables_hook; + + raw_spinlock_t update_lock; /* For shared policies */ + u64 last_freq_update_time; + s64 freq_update_delay_ns; + unsigned int next_freq; + unsigned int cached_raw_freq; + + /* The next fields are only needed if fast switch cannot be used: */ + struct irq_work irq_work; + struct kthread_work work; + struct mutex work_lock; + struct kthread_worker worker; + struct task_struct *thread; + bool work_in_progress; + + bool need_freq_update; }; struct sugov_cpu { - struct update_util_data update_util; - struct sugov_policy *sg_policy; - unsigned int cpu; + struct update_util_data update_util; + struct sugov_policy *sg_policy; + unsigned int cpu; - bool iowait_boost_pending; - unsigned int iowait_boost; - unsigned int iowait_boost_max; + bool iowait_boost_pending; + unsigned int iowait_boost; + unsigned int iowait_boost_max; u64 last_update; - /* The fields below are only needed when sharing a policy. */ - unsigned long util_cfs; - unsigned long util_dl; - unsigned long max; - unsigned int flags; + /* The fields below are only needed when sharing a policy: */ + unsigned long util_cfs; + unsigned long util_dl; + unsigned long max; + unsigned int flags; - /* The field below is for single-CPU policies only. */ + /* The field below is for single-CPU policies only: */ #ifdef CONFIG_NO_HZ_COMMON - unsigned long saved_idle_calls; + unsigned long saved_idle_calls; #endif }; @@ -79,9 +79,9 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) /* * Since cpufreq_update_util() is called with rq->lock held for - * the @target_cpu, our per-cpu data is fully serialized. + * the @target_cpu, our per-CPU data is fully serialized. * - * However, drivers cannot in general deal with cross-cpu + * However, drivers cannot in general deal with cross-CPU * requests, so while get_next_freq() will work, our * sugov_update_commit() call may not for the fast switching platforms. * @@ -111,6 +111,7 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) } delta_ns = time - sg_policy->last_freq_update_time; + return delta_ns >= sg_policy->freq_update_delay_ns; } @@ -345,8 +346,8 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) return get_next_freq(sg_policy, util, max); } -static void sugov_update_shared(struct update_util_data *hook, u64 time, - unsigned int flags) +static void +sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) { struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); struct sugov_policy *sg_policy = sg_cpu->sg_policy; @@ -423,8 +424,8 @@ static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) return sprintf(buf, "%u\n", tunables->rate_limit_us); } -static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, - size_t count) +static ssize_t +rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct sugov_tunables *tunables = to_sugov_tunables(attr_set); struct sugov_policy *sg_policy; @@ -479,11 +480,11 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) { struct task_struct *thread; struct sched_attr attr = { - .size = sizeof(struct sched_attr), - .sched_policy = SCHED_DEADLINE, - .sched_flags = SCHED_FLAG_SUGOV, - .sched_nice = 0, - .sched_priority = 0, + .size = sizeof(struct sched_attr), + .sched_policy = SCHED_DEADLINE, + .sched_flags = SCHED_FLAG_SUGOV, + .sched_nice = 0, + .sched_priority = 0, /* * Fake (unused) bandwidth; workaround to "fix" * priority inheritance. @@ -663,21 +664,21 @@ static int sugov_start(struct cpufreq_policy *policy) struct sugov_policy *sg_policy = policy->governor_data; unsigned int cpu; - sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; - sg_policy->last_freq_update_time = 0; - sg_policy->next_freq = UINT_MAX; - sg_policy->work_in_progress = false; - sg_policy->need_freq_update = false; - sg_policy->cached_raw_freq = 0; + sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; + sg_policy->last_freq_update_time = 0; + sg_policy->next_freq = UINT_MAX; + sg_policy->work_in_progress = false; + sg_policy->need_freq_update = false; + sg_policy->cached_raw_freq = 0; for_each_cpu(cpu, policy->cpus) { struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); memset(sg_cpu, 0, sizeof(*sg_cpu)); - sg_cpu->cpu = cpu; - sg_cpu->sg_policy = sg_policy; - sg_cpu->flags = 0; - sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; + sg_cpu->cpu = cpu; + sg_cpu->sg_policy = sg_policy; + sg_cpu->flags = 0; + sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; } for_each_cpu(cpu, policy->cpus) { @@ -721,14 +722,14 @@ static void sugov_limits(struct cpufreq_policy *policy) } static struct cpufreq_governor schedutil_gov = { - .name = "schedutil", - .owner = THIS_MODULE, - .dynamic_switching = true, - .init = sugov_init, - .exit = sugov_exit, - .start = sugov_start, - .stop = sugov_stop, - .limits = sugov_limits, + .name = "schedutil", + .owner = THIS_MODULE, + .dynamic_switching = true, + .init = sugov_init, + .exit = sugov_exit, + .start = sugov_start, + .stop = sugov_stop, + .limits = sugov_limits, }; #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 2511aba36b89..f43e14ccb67d 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -14,7 +14,7 @@ * * going from the lowest priority to the highest. CPUs in the INVALID state * are not eligible for routing. The system maintains this state with - * a 2 dimensional bitmap (the first for priority class, the second for cpus + * a 2 dimensional bitmap (the first for priority class, the second for CPUs * in that class). Therefore a typical application without affinity * restrictions can find a suitable CPU with O(1) complexity (e.g. two bit * searches). For tasks with affinity restrictions, the algorithm has a @@ -26,7 +26,6 @@ * as published by the Free Software Foundation; version 2 * of the License. */ - #include <linux/gfp.h> #include <linux/sched.h> #include <linux/sched/rt.h> @@ -128,9 +127,9 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, } /** - * cpupri_set - update the cpu priority setting + * cpupri_set - update the CPU priority setting * @cp: The cpupri context - * @cpu: The target cpu + * @cpu: The target CPU * @newpri: The priority (INVALID-RT99) to assign to this CPU * * Note: Assumes cpu_rq(cpu)->lock is locked @@ -151,7 +150,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) return; /* - * If the cpu was currently mapped to a different value, we + * If the CPU was currently mapped to a different value, we * need to map it to the new value then remove the old value. * Note, we must add the new value first, otherwise we risk the * cpu being missed by the priority loop in cpupri_find. diff --git a/kernel/sched/cpupri.h b/kernel/sched/cpupri.h index bab050019071..141a06c914c6 100644 --- a/kernel/sched/cpupri.h +++ b/kernel/sched/cpupri.h @@ -1,32 +1,26 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_CPUPRI_H -#define _LINUX_CPUPRI_H - #include <linux/sched.h> #define CPUPRI_NR_PRIORITIES (MAX_RT_PRIO + 2) -#define CPUPRI_INVALID -1 -#define CPUPRI_IDLE 0 -#define CPUPRI_NORMAL 1 +#define CPUPRI_INVALID -1 +#define CPUPRI_IDLE 0 +#define CPUPRI_NORMAL 1 /* values 2-101 are RT priorities 0-99 */ struct cpupri_vec { - atomic_t count; - cpumask_var_t mask; + atomic_t count; + cpumask_var_t mask; }; struct cpupri { - struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES]; - int *cpu_to_pri; + struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES]; + int *cpu_to_pri; }; #ifdef CONFIG_SMP -int cpupri_find(struct cpupri *cp, - struct task_struct *p, struct cpumask *lowest_mask); +int cpupri_find(struct cpupri *cp, struct task_struct *p, struct cpumask *lowest_mask); void cpupri_set(struct cpupri *cp, int cpu, int pri); -int cpupri_init(struct cpupri *cp); +int cpupri_init(struct cpupri *cp); void cpupri_cleanup(struct cpupri *cp); #endif - -#endif /* _LINUX_CPUPRI_H */ diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index bac6ac9a4ec7..d3b450b57ade 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -113,9 +113,9 @@ static inline void task_group_account_field(struct task_struct *p, int index, } /* - * Account user cpu time to a process. - * @p: the process that the cpu time gets accounted to - * @cputime: the cpu time spent in user space since the last update + * Account user CPU time to a process. + * @p: the process that the CPU time gets accounted to + * @cputime: the CPU time spent in user space since the last update */ void account_user_time(struct task_struct *p, u64 cputime) { @@ -135,9 +135,9 @@ void account_user_time(struct task_struct *p, u64 cputime) } /* - * Account guest cpu time to a process. - * @p: the process that the cpu time gets accounted to - * @cputime: the cpu time spent in virtual machine since the last update + * Account guest CPU time to a process. + * @p: the process that the CPU time gets accounted to + * @cputime: the CPU time spent in virtual machine since the last update */ void account_guest_time(struct task_struct *p, u64 cputime) { @@ -159,9 +159,9 @@ void account_guest_time(struct task_struct *p, u64 cputime) } /* - * Account system cpu time to a process and desired cpustat field - * @p: the process that the cpu time gets accounted to - * @cputime: the cpu time spent in kernel space since the last update + * Account system CPU time to a process and desired cpustat field + * @p: the process that the CPU time gets accounted to + * @cputime: the CPU time spent in kernel space since the last update * @index: pointer to cpustat field that has to be updated */ void account_system_index_time(struct task_struct *p, @@ -179,10 +179,10 @@ void account_system_index_time(struct task_struct *p, } /* - * Account system cpu time to a process. - * @p: the process that the cpu time gets accounted to + * Account system CPU time to a process. + * @p: the process that the CPU time gets accounted to * @hardirq_offset: the offset to subtract from hardirq_count() - * @cputime: the cpu time spent in kernel space since the last update + * @cputime: the CPU time spent in kernel space since the last update */ void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime) { @@ -205,7 +205,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime) /* * Account for involuntary wait time. - * @cputime: the cpu time spent in involuntary wait + * @cputime: the CPU time spent in involuntary wait */ void account_steal_time(u64 cputime) { @@ -216,7 +216,7 @@ void account_steal_time(u64 cputime) /* * Account for idle time. - * @cputime: the cpu time spent in idle wait + * @cputime: the CPU time spent in idle wait */ void account_idle_time(u64 cputime) { @@ -338,7 +338,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) #ifdef CONFIG_IRQ_TIME_ACCOUNTING /* * Account a tick to a process and cpustat - * @p: the process that the cpu time gets accounted to + * @p: the process that the CPU time gets accounted to * @user_tick: is the tick from userspace * @rq: the pointer to rq * @@ -400,17 +400,16 @@ static void irqtime_account_idle_ticks(int ticks) irqtime_account_process_tick(current, 0, rq, ticks); } #else /* CONFIG_IRQ_TIME_ACCOUNTING */ -static inline void irqtime_account_idle_ticks(int ticks) {} +static inline void irqtime_account_idle_ticks(int ticks) { } static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, - struct rq *rq, int nr_ticks) {} + struct rq *rq, int nr_ticks) { } #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ /* * Use precise platform statistics if available: */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING - -#ifndef __ARCH_HAS_VTIME_TASK_SWITCH +# ifndef __ARCH_HAS_VTIME_TASK_SWITCH void vtime_common_task_switch(struct task_struct *prev) { if (is_idle_task(prev)) @@ -421,8 +420,7 @@ void vtime_common_task_switch(struct task_struct *prev) vtime_flush(prev); arch_vtime_task_switch(prev); } -#endif - +# endif #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ @@ -469,10 +467,12 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) *ut = cputime.utime; *st = cputime.stime; } -#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ + +#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE: */ + /* - * Account a single tick of cpu time. - * @p: the process that the cpu time gets accounted to + * Account a single tick of CPU time. + * @p: the process that the CPU time gets accounted to * @user_tick: indicates if the tick is a user or a system tick */ void account_process_tick(struct task_struct *p, int user_tick) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 65cd5ead1759..58f8b7b37983 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -539,12 +539,12 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p /* * If we cannot preempt any rq, fall back to pick any - * online cpu. + * online CPU: */ cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); if (cpu >= nr_cpu_ids) { /* - * Fail to find any suitable cpu. + * Failed to find any suitable CPU. * The task will never come back! */ BUG_ON(dl_bandwidth_enabled()); @@ -608,8 +608,7 @@ static inline void queue_pull_task(struct rq *rq) static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags); static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags); -static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, - int flags); +static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, int flags); /* * We are being explicitly informed that a new instance is starting, @@ -1873,7 +1872,7 @@ static int find_later_rq(struct task_struct *task) /* * We have to consider system topology and task affinity - * first, then we can look for a suitable cpu. + * first, then we can look for a suitable CPU. */ if (!cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask)) return -1; @@ -1887,7 +1886,7 @@ static int find_later_rq(struct task_struct *task) * Now we check how well this matches with task's * affinity and system topology. * - * The last cpu where the task run is our first + * The last CPU where the task run is our first * guess, since it is most likely cache-hot there. */ if (cpumask_test_cpu(cpu, later_mask)) @@ -1917,9 +1916,9 @@ static int find_later_rq(struct task_struct *task) best_cpu = cpumask_first_and(later_mask, sched_domain_span(sd)); /* - * Last chance: if a cpu being in both later_mask + * Last chance: if a CPU being in both later_mask * and current sd span is valid, that becomes our - * choice. Of course, the latest possible cpu is + * choice. Of course, the latest possible CPU is * already under consideration through later_mask. */ if (best_cpu < nr_cpu_ids) { @@ -2075,7 +2074,7 @@ retry: if (task == next_task) { /* * The task is still there. We don't try - * again, some other cpu will pull it when ready. + * again, some other CPU will pull it when ready. */ goto out; } @@ -2308,7 +2307,7 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p) /* * Since this might be the only -deadline task on the rq, * this is the right place to try to pull some other one - * from an overloaded cpu, if any. + * from an overloaded CPU, if any. */ if (!task_on_rq_queued(p) || rq->dl.dl_nr_running) return; @@ -2634,17 +2633,17 @@ void __dl_clear_params(struct task_struct *p) { struct sched_dl_entity *dl_se = &p->dl; - dl_se->dl_runtime = 0; - dl_se->dl_deadline = 0; - dl_se->dl_period = 0; - dl_se->flags = 0; - dl_se->dl_bw = 0; - dl_se->dl_density = 0; + dl_se->dl_runtime = 0; + dl_se->dl_deadline = 0; + dl_se->dl_period = 0; + dl_se->flags = 0; + dl_se->dl_bw = 0; + dl_se->dl_density = 0; - dl_se->dl_throttled = 0; - dl_se->dl_yielded = 0; - dl_se->dl_non_contending = 0; - dl_se->dl_overrun = 0; + dl_se->dl_throttled = 0; + dl_se->dl_yielded = 0; + dl_se->dl_non_contending = 0; + dl_se->dl_overrun = 0; } bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr) @@ -2663,21 +2662,22 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr) #ifdef CONFIG_SMP int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed) { - unsigned int dest_cpu = cpumask_any_and(cpu_active_mask, - cs_cpus_allowed); + unsigned int dest_cpu; struct dl_bw *dl_b; bool overflow; int cpus, ret; unsigned long flags; + dest_cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed); + rcu_read_lock_sched(); dl_b = dl_bw_of(dest_cpu); raw_spin_lock_irqsave(&dl_b->lock, flags); cpus = dl_bw_cpus(dest_cpu); overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw); - if (overflow) + if (overflow) { ret = -EBUSY; - else { + } else { /* * We reserve space for this task in the destination * root_domain, as we can't fail after this point. @@ -2689,6 +2689,7 @@ int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allo } raw_spin_unlock_irqrestore(&dl_b->lock, flags); rcu_read_unlock_sched(); + return ret; } @@ -2709,6 +2710,7 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, ret = 0; raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags); rcu_read_unlock_sched(); + return ret; } @@ -2726,6 +2728,7 @@ bool dl_cpu_busy(unsigned int cpu) overflow = __dl_overflow(dl_b, cpus, 0, 0); raw_spin_unlock_irqrestore(&dl_b->lock, flags); rcu_read_unlock_sched(); + return overflow; } #endif diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 1ca0130ed4f9..7c82a9b88510 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -9,7 +9,6 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ - #include <linux/proc_fs.h> #include <linux/sched/mm.h> #include <linux/sched/task.h> @@ -274,34 +273,19 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd) if (table == NULL) return NULL; - set_table_entry(&table[0], "min_interval", &sd->min_interval, - sizeof(long), 0644, proc_doulongvec_minmax, false); - set_table_entry(&table[1], "max_interval", &sd->max_interval, - sizeof(long), 0644, proc_doulongvec_minmax, false); - set_table_entry(&table[2], "busy_idx", &sd->busy_idx, - sizeof(int), 0644, proc_dointvec_minmax, true); - set_table_entry(&table[3], "idle_idx", &sd->idle_idx, - sizeof(int), 0644, proc_dointvec_minmax, true); - set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx, - sizeof(int), 0644, proc_dointvec_minmax, true); - set_table_entry(&table[5], "wake_idx", &sd->wake_idx, - sizeof(int), 0644, proc_dointvec_minmax, true); - set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx, - sizeof(int), 0644, proc_dointvec_minmax, true); - set_table_entry(&table[7], "busy_factor", &sd->busy_factor, - sizeof(int), 0644, proc_dointvec_minmax, false); - set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct, - sizeof(int), 0644, proc_dointvec_minmax, false); - set_table_entry(&table[9], "cache_nice_tries", - &sd->cache_nice_tries, - sizeof(int), 0644, proc_dointvec_minmax, false); - set_table_entry(&table[10], "flags", &sd->flags, - sizeof(int), 0644, proc_dointvec_minmax, false); - set_table_entry(&table[11], "max_newidle_lb_cost", - &sd->max_newidle_lb_cost, - sizeof(long), 0644, proc_doulongvec_minmax, false); - set_table_entry(&table[12], "name", sd->name, - CORENAME_MAX_SIZE, 0444, proc_dostring, false); + set_table_entry(&table[0] , "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax, false); + set_table_entry(&table[1] , "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax, false); + set_table_entry(&table[2] , "busy_idx", &sd->busy_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); + set_table_entry(&table[3] , "idle_idx", &sd->idle_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); + set_table_entry(&table[4] , "newidle_idx", &sd->newidle_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); + set_table_entry(&table[5] , "wake_idx", &sd->wake_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); + set_table_entry(&table[6] , "forkexec_idx", &sd->forkexec_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); + set_table_entry(&table[7] , "busy_factor", &sd->busy_factor, sizeof(int) , 0644, proc_dointvec_minmax, false); + set_table_entry(&table[8] , "imbalance_pct", &sd->imbalance_pct, sizeof(int) , 0644, proc_dointvec_minmax, false); + set_table_entry(&table[9] , "cache_nice_tries", &sd->cache_nice_tries, sizeof(int) , 0644, proc_dointvec_minmax, false); + set_table_entry(&table[10], "flags", &sd->flags, sizeof(int) , 0644, proc_dointvec_minmax, false); + set_table_entry(&table[11], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax, false); + set_table_entry(&table[12], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring, false); /* &table[13] is terminator */ return table; @@ -332,8 +316,8 @@ static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu) return table; } -static cpumask_var_t sd_sysctl_cpus; -static struct ctl_table_header *sd_sysctl_header; +static cpumask_var_t sd_sysctl_cpus; +static struct ctl_table_header *sd_sysctl_header; void register_sched_domain_sysctl(void) { @@ -413,14 +397,10 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group { struct sched_entity *se = tg->se[cpu]; -#define P(F) \ - SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F) -#define P_SCHEDSTAT(F) \ - SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)schedstat_val(F)) -#define PN(F) \ - SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F)) -#define PN_SCHEDSTAT(F) \ - SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F))) +#define P(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F) +#define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)schedstat_val(F)) +#define PN(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F)) +#define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F))) if (!se) return; @@ -428,6 +408,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group PN(se->exec_start); PN(se->vruntime); PN(se->sum_exec_runtime); + if (schedstat_enabled()) { PN_SCHEDSTAT(se->statistics.wait_start); PN_SCHEDSTAT(se->statistics.sleep_start); @@ -440,6 +421,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group PN_SCHEDSTAT(se->statistics.wait_sum); P_SCHEDSTAT(se->statistics.wait_count); } + P(se->load.weight); P(se->runnable_weight); #ifdef CONFIG_SMP @@ -464,6 +446,7 @@ static char *task_group_path(struct task_group *tg) return group_path; cgroup_path(tg->css.cgroup, group_path, PATH_MAX); + return group_path; } #endif @@ -799,9 +782,9 @@ void sysrq_sched_debug_show(void) /* * This itererator needs some explanation. * It returns 1 for the header position. - * This means 2 is cpu 0. - * In a hotplugged system some cpus, including cpu 0, may be missing so we have - * to use cpumask_* to iterate over the cpus. + * This means 2 is CPU 0. + * In a hotplugged system some CPUs, including CPU 0, may be missing so we have + * to use cpumask_* to iterate over the CPUs. */ static void *sched_debug_start(struct seq_file *file, loff_t *offset) { @@ -821,6 +804,7 @@ static void *sched_debug_start(struct seq_file *file, loff_t *offset) if (n < nr_cpu_ids) return (void *)(unsigned long)(n + 2); + return NULL; } @@ -835,10 +819,10 @@ static void sched_debug_stop(struct seq_file *file, void *data) } static const struct seq_operations sched_debug_sops = { - .start = sched_debug_start, - .next = sched_debug_next, - .stop = sched_debug_stop, - .show = sched_debug_show, + .start = sched_debug_start, + .next = sched_debug_next, + .stop = sched_debug_stop, + .show = sched_debug_show, }; static int sched_debug_release(struct inode *inode, struct file *file) @@ -876,14 +860,10 @@ static int __init init_sched_debug_procfs(void) __initcall(init_sched_debug_procfs); -#define __P(F) \ - SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F) -#define P(F) \ - SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F) -#define __PN(F) \ - SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F)) -#define PN(F) \ - SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F)) +#define __P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F) +#define P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F) +#define __PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F)) +#define PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F)) #ifdef CONFIG_NUMA_BALANCING diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e1febd252a84..1f877de96c9b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -20,7 +20,6 @@ * Adaptive scheduling granularity, math enhancements by Peter Zijlstra * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra */ - #include <linux/sched/mm.h> #include <linux/sched/topology.h> @@ -103,7 +102,7 @@ const_debug unsigned int sysctl_sched_migration_cost = 500000UL; #ifdef CONFIG_SMP /* - * For asym packing, by default the lower numbered cpu has higher priority. + * For asym packing, by default the lower numbered CPU has higher priority. */ int __weak arch_asym_cpu_priority(int cpu) { @@ -1181,7 +1180,7 @@ pid_t task_numa_group_id(struct task_struct *p) } /* - * The averaged statistics, shared & private, memory & cpu, + * The averaged statistics, shared & private, memory & CPU, * occupy the first half of the array. The second half of the * array is for current counters, which are averaged into the * first set by task_numa_placement. @@ -1587,7 +1586,7 @@ static void task_numa_compare(struct task_numa_env *env, * be incurred if the tasks were swapped. */ if (cur) { - /* Skip this swap candidate if cannot move to the source cpu */ + /* Skip this swap candidate if cannot move to the source CPU: */ if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed)) goto unlock; @@ -1631,7 +1630,7 @@ static void task_numa_compare(struct task_numa_env *env, goto balance; } - /* Balance doesn't matter much if we're running a task per cpu */ + /* Balance doesn't matter much if we're running a task per CPU: */ if (imp > env->best_imp && src_rq->nr_running == 1 && dst_rq->nr_running == 1) goto assign; @@ -1676,7 +1675,7 @@ balance: */ if (!cur) { /* - * select_idle_siblings() uses an per-cpu cpumask that + * select_idle_siblings() uses an per-CPU cpumask that * can be used from IRQ context. */ local_irq_disable(); @@ -3362,7 +3361,7 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) } /* - * Called within set_task_rq() right before setting a task's cpu. The + * Called within set_task_rq() right before setting a task's CPU. The * caller only guarantees p->pi_lock is held; no other assumptions, * including the state of rq->lock, should be made. */ @@ -3541,7 +3540,7 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf /* * runnable_sum can't be lower than running_sum - * As running sum is scale with cpu capacity wehreas the runnable sum + * As running sum is scale with CPU capacity wehreas the runnable sum * is not we rescale running_sum 1st */ running_sum = se->avg.util_sum / @@ -4688,7 +4687,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) if (!se) add_nr_running(rq, task_delta); - /* determine whether we need to wake up potentially idle cpu */ + /* Determine whether we need to wake up potentially idle CPU: */ if (rq->curr == rq->idle && rq->cfs.nr_running) resched_curr(rq); } @@ -5053,7 +5052,7 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) } /* - * Both these cpu hotplug callbacks race against unregister_fair_sched_group() + * Both these CPU hotplug callbacks race against unregister_fair_sched_group() * * The race is harmless, since modifying bandwidth settings of unhooked group * bits doesn't do much. @@ -5098,7 +5097,7 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq) */ cfs_rq->runtime_remaining = 1; /* - * Offline rq is schedulable till cpu is completely disabled + * Offline rq is schedulable till CPU is completely disabled * in take_cpu_down(), so we prevent new cfs throttling here. */ cfs_rq->runtime_enabled = 0; @@ -5335,8 +5334,8 @@ DEFINE_PER_CPU(cpumask_var_t, select_idle_mask); * * load' = (1 - 1/2^i) * load + (1/2^i) * cur_load * - * If a cpu misses updates for n ticks (as it was idle) and update gets - * called on the n+1-th tick when cpu may be busy, then we have: + * If a CPU misses updates for n ticks (as it was idle) and update gets + * called on the n+1-th tick when CPU may be busy, then we have: * * load_n = (1 - 1/2^i)^n * load_0 * load_n+1 = (1 - 1/2^i) * load_n + (1/2^i) * cur_load @@ -5480,7 +5479,7 @@ static unsigned long weighted_cpuload(struct rq *rq) #ifdef CONFIG_NO_HZ_COMMON /* * There is no sane way to deal with nohz on smp when using jiffies because the - * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading + * CPU doing the jiffies update might drift wrt the CPU doing the jiffy reading * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. * * Therefore we need to avoid the delta approach from the regular tick when @@ -5591,7 +5590,7 @@ void cpu_load_update_active(struct rq *this_rq) } /* - * Return a low guess at the load of a migration-source cpu weighted + * Return a low guess at the load of a migration-source CPU weighted * according to the scheduling class and "nice" value. * * We want to under-estimate the load of migration sources, to @@ -5609,7 +5608,7 @@ static unsigned long source_load(int cpu, int type) } /* - * Return a high guess at the load of a migration-target cpu weighted + * Return a high guess at the load of a migration-target CPU weighted * according to the scheduling class and "nice" value. */ static unsigned long target_load(int cpu, int type) @@ -5889,7 +5888,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, max_spare_cap = 0; for_each_cpu(i, sched_group_span(group)) { - /* Bias balancing toward cpus of our domain */ + /* Bias balancing toward CPUs of our domain */ if (local_group) load = source_load(i, load_idx); else @@ -5919,7 +5918,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, if (min_runnable_load > (runnable_load + imbalance)) { /* * The runnable load is significantly smaller - * so we can pick this new cpu + * so we can pick this new CPU: */ min_runnable_load = runnable_load; min_avg_load = avg_load; @@ -5928,7 +5927,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, (100*min_avg_load > imbalance_scale*avg_load)) { /* * The runnable loads are close so take the - * blocked load into account through avg_load. + * blocked load into account through avg_load: */ min_avg_load = avg_load; idlest = group; @@ -5989,7 +5988,7 @@ skip_spare: } /* - * find_idlest_group_cpu - find the idlest cpu among the cpus in group. + * find_idlest_group_cpu - find the idlest CPU among the CPUs in the group. */ static int find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) @@ -6067,12 +6066,12 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p new_cpu = find_idlest_group_cpu(group, p, cpu); if (new_cpu == cpu) { - /* Now try balancing at a lower domain level of cpu */ + /* Now try balancing at a lower domain level of 'cpu': */ sd = sd->child; continue; } - /* Now try balancing at a lower domain level of new_cpu */ + /* Now try balancing at a lower domain level of 'new_cpu': */ cpu = new_cpu; weight = sd->span_weight; sd = NULL; @@ -6082,7 +6081,6 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p if (tmp->flags & sd_flag) sd = tmp; } - /* while loop will break here if sd == NULL */ } return new_cpu; @@ -6278,12 +6276,12 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) return target; /* - * If the previous cpu is cache affine and idle, don't be stupid. + * If the previous CPU is cache affine and idle, don't be stupid: */ if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev)) return prev; - /* Check a recently used CPU as a potential idle candidate */ + /* Check a recently used CPU as a potential idle candidate: */ recent_used_cpu = p->recent_used_cpu; if (recent_used_cpu != prev && recent_used_cpu != target && @@ -6292,7 +6290,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) { /* * Replace recent_used_cpu with prev as it is a potential - * candidate for the next wake. + * candidate for the next wake: */ p->recent_used_cpu = prev; return recent_used_cpu; @@ -6357,7 +6355,7 @@ static inline unsigned long task_util(struct task_struct *p) } /* - * cpu_util_wake: Compute cpu utilization with any contributions from + * cpu_util_wake: Compute CPU utilization with any contributions from * the waking task p removed. */ static unsigned long cpu_util_wake(int cpu, struct task_struct *p) @@ -6403,10 +6401,10 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE, * SD_BALANCE_FORK, or SD_BALANCE_EXEC. * - * Balances load by selecting the idlest cpu in the idlest group, or under - * certain conditions an idle sibling cpu if the domain has SD_WAKE_AFFINE set. + * Balances load by selecting the idlest CPU in the idlest group, or under + * certain conditions an idle sibling CPU if the domain has SD_WAKE_AFFINE set. * - * Returns the target cpu number. + * Returns the target CPU number. * * preempt must be disabled. */ @@ -6431,7 +6429,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f break; /* - * If both cpu and prev_cpu are part of this domain, + * If both 'cpu' and 'prev_cpu' are part of this domain, * cpu is a valid SD_WAKE_AFFINE target. */ if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && @@ -6482,9 +6480,9 @@ pick_cpu: static void detach_entity_cfs_rq(struct sched_entity *se); /* - * Called immediately before a task is migrated to a new cpu; task_cpu(p) and + * Called immediately before a task is migrated to a new CPU; task_cpu(p) and * cfs_rq_of(p) references at time of call are still valid and identify the - * previous cpu. The caller guarantees p->pi_lock or task_rq(p)->lock is held. + * previous CPU. The caller guarantees p->pi_lock or task_rq(p)->lock is held. */ static void migrate_task_rq_fair(struct task_struct *p) { @@ -6918,17 +6916,17 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp * BASICS * * The purpose of load-balancing is to achieve the same basic fairness the - * per-cpu scheduler provides, namely provide a proportional amount of compute + * per-CPU scheduler provides, namely provide a proportional amount of compute * time to each task. This is expressed in the following equation: * * W_i,n/P_i == W_j,n/P_j for all i,j (1) * - * Where W_i,n is the n-th weight average for cpu i. The instantaneous weight + * Where W_i,n is the n-th weight average for CPU i. The instantaneous weight * W_i,0 is defined as: * * W_i,0 = \Sum_j w_i,j (2) * - * Where w_i,j is the weight of the j-th runnable task on cpu i. This weight + * Where w_i,j is the weight of the j-th runnable task on CPU i. This weight * is derived from the nice value as per sched_prio_to_weight[]. * * The weight average is an exponential decay average of the instantaneous @@ -6936,7 +6934,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp * * W'_i,n = (2^n - 1) / 2^n * W_i,n + 1 / 2^n * W_i,0 (3) * - * C_i is the compute capacity of cpu i, typically it is the + * C_i is the compute capacity of CPU i, typically it is the * fraction of 'recent' time available for SCHED_OTHER task execution. But it * can also include other factors [XXX]. * @@ -6957,11 +6955,11 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp * SCHED DOMAINS * * In order to solve the imbalance equation (4), and avoid the obvious O(n^2) - * for all i,j solution, we create a tree of cpus that follows the hardware + * for all i,j solution, we create a tree of CPUs that follows the hardware * topology where each level pairs two lower groups (or better). This results - * in O(log n) layers. Furthermore we reduce the number of cpus going up the + * in O(log n) layers. Furthermore we reduce the number of CPUs going up the * tree to only the first of the previous level and we decrease the frequency - * of load-balance at each level inv. proportional to the number of cpus in + * of load-balance at each level inv. proportional to the number of CPUs in * the groups. * * This yields: @@ -6970,7 +6968,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp * \Sum { --- * --- * 2^i } = O(n) (5) * i = 0 2^i 2^i * `- size of each group - * | | `- number of cpus doing load-balance + * | | `- number of CPUs doing load-balance * | `- freq * `- sum over all levels * @@ -6978,7 +6976,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp * this makes (5) the runtime complexity of the balancer. * * An important property here is that each CPU is still (indirectly) connected - * to every other cpu in at most O(log n) steps: + * to every other CPU in at most O(log n) steps: * * The adjacency matrix of the resulting graph is given by: * @@ -6990,7 +6988,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp * * A^(log_2 n)_i,j != 0 for all i,j (7) * - * Showing there's indeed a path between every cpu in at most O(log n) steps. + * Showing there's indeed a path between every CPU in at most O(log n) steps. * The task movement gives a factor of O(m), giving a convergence complexity * of: * @@ -7000,7 +6998,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp * WORK CONSERVING * * In order to avoid CPUs going idle while there's still work to do, new idle - * balancing is more aggressive and has the newly idle cpu iterate up the domain + * balancing is more aggressive and has the newly idle CPU iterate up the domain * tree itself instead of relying on other CPUs to bring it work. * * This adds some complexity to both (5) and (8) but it reduces the total idle @@ -7021,7 +7019,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp * * s_k,i = \Sum_j w_i,j,k and S_k = \Sum_i s_k,i (10) * - * w_i,j,k is the weight of the j-th runnable task in the k-th cgroup on cpu i. + * w_i,j,k is the weight of the j-th runnable task in the k-th cgroup on CPU i. * * The big problem is S_k, its a global sum needed to compute a local (W_i) * property. @@ -7185,7 +7183,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) env->flags |= LBF_SOME_PINNED; /* - * Remember if this task can be migrated to any other cpu in + * Remember if this task can be migrated to any other CPU in * our sched_group. We may want to revisit it if we couldn't * meet load balance goals by pulling other tasks on src_cpu. * @@ -7195,7 +7193,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) if (env->idle == CPU_NEWLY_IDLE || (env->flags & LBF_DST_PINNED)) return 0; - /* Prevent to re-select dst_cpu via env's cpus */ + /* Prevent to re-select dst_cpu via env's CPUs: */ for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) { if (cpumask_test_cpu(cpu, &p->cpus_allowed)) { env->flags |= LBF_DST_PINNED; @@ -7769,8 +7767,8 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd) * Group imbalance indicates (and tries to solve) the problem where balancing * groups is inadequate due to ->cpus_allowed constraints. * - * Imagine a situation of two groups of 4 cpus each and 4 tasks each with a - * cpumask covering 1 cpu of the first group and 3 cpus of the second group. + * Imagine a situation of two groups of 4 CPUs each and 4 tasks each with a + * cpumask covering 1 CPU of the first group and 3 CPUs of the second group. * Something like: * * { 0 1 2 3 } { 4 5 6 7 } @@ -7778,7 +7776,7 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd) * * If we were to balance group-wise we'd place two tasks in the first group and * two tasks in the second group. Clearly this is undesired as it will overload - * cpu 3 and leave one of the cpus in the second group unused. + * cpu 3 and leave one of the CPUs in the second group unused. * * The current solution to this issue is detecting the skew in the first group * by noticing the lower domain failed to reach balance and had difficulty @@ -7891,7 +7889,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, for_each_cpu_and(i, sched_group_span(group), env->cpus) { struct rq *rq = cpu_rq(i); - /* Bias balancing toward cpus of our domain */ + /* Bias balancing toward CPUs of our domain: */ if (local_group) load = target_load(i, load_idx); else @@ -7977,7 +7975,7 @@ asym_packing: if (!(env->sd->flags & SD_ASYM_PACKING)) return true; - /* No ASYM_PACKING if target cpu is already busy */ + /* No ASYM_PACKING if target CPU is already busy */ if (env->idle == CPU_NOT_IDLE) return true; /* @@ -7990,7 +7988,7 @@ asym_packing: if (!sds->busiest) return true; - /* Prefer to move from lowest priority cpu's work */ + /* Prefer to move from lowest priority CPU's work */ if (sched_asym_prefer(sds->busiest->asym_prefer_cpu, sg->asym_prefer_cpu)) return true; @@ -8243,7 +8241,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s if (busiest->group_type == group_imbalanced) { /* * In the group_imb case we cannot rely on group-wide averages - * to ensure cpu-load equilibrium, look at wider averages. XXX + * to ensure CPU-load equilibrium, look at wider averages. XXX */ busiest->load_per_task = min(busiest->load_per_task, sds->avg_load); @@ -8262,7 +8260,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s } /* - * If there aren't any idle cpus, avoid creating some. + * If there aren't any idle CPUs, avoid creating some. */ if (busiest->group_type == group_overloaded && local->group_type == group_overloaded) { @@ -8276,9 +8274,9 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s } /* - * We're trying to get all the cpus to the average_load, so we don't + * We're trying to get all the CPUs to the average_load, so we don't * want to push ourselves above the average load, nor do we wish to - * reduce the max loaded cpu below the average load. At the same time, + * reduce the max loaded CPU below the average load. At the same time, * we also don't want to reduce the group load below the group * capacity. Thus we look for the minimum possible imbalance. */ @@ -8372,9 +8370,9 @@ static struct sched_group *find_busiest_group(struct lb_env *env) if (env->idle == CPU_IDLE) { /* - * This cpu is idle. If the busiest group is not overloaded + * This CPU is idle. If the busiest group is not overloaded * and there is no imbalance between this and busiest group - * wrt idle cpus, it is balanced. The imbalance becomes + * wrt idle CPUs, it is balanced. The imbalance becomes * significant if the diff is greater than 1 otherwise we * might end up to just move the imbalance on another group */ @@ -8402,7 +8400,7 @@ out_balanced: } /* - * find_busiest_queue - find the busiest runqueue among the cpus in group. + * find_busiest_queue - find the busiest runqueue among the CPUs in the group. */ static struct rq *find_busiest_queue(struct lb_env *env, struct sched_group *group) @@ -8446,7 +8444,7 @@ static struct rq *find_busiest_queue(struct lb_env *env, /* * When comparing with imbalance, use weighted_cpuload() - * which is not scaled with the cpu capacity. + * which is not scaled with the CPU capacity. */ if (rq->nr_running == 1 && wl > env->imbalance && @@ -8454,9 +8452,9 @@ static struct rq *find_busiest_queue(struct lb_env *env, continue; /* - * For the load comparisons with the other cpu's, consider - * the weighted_cpuload() scaled with the cpu capacity, so - * that the load can be moved away from the cpu that is + * For the load comparisons with the other CPU's, consider + * the weighted_cpuload() scaled with the CPU capacity, so + * that the load can be moved away from the CPU that is * potentially running at a lower capacity. * * Thus we're looking for max(wl_i / capacity_i), crosswise @@ -8527,13 +8525,13 @@ static int should_we_balance(struct lb_env *env) return 0; /* - * In the newly idle case, we will allow all the cpu's + * In the newly idle case, we will allow all the CPUs * to do the newly idle load balance. */ if (env->idle == CPU_NEWLY_IDLE) return 1; - /* Try to find first idle cpu */ + /* Try to find first idle CPU */ for_each_cpu_and(cpu, group_balance_mask(sg), env->cpus) { if (!idle_cpu(cpu)) continue; @@ -8546,7 +8544,7 @@ static int should_we_balance(struct lb_env *env) balance_cpu = group_balance_cpu(sg); /* - * First idle cpu or the first cpu(busiest) in this sched group + * First idle CPU or the first CPU(busiest) in this sched group * is eligible for doing load balancing at this and above domains. */ return balance_cpu == env->dst_cpu; @@ -8655,7 +8653,7 @@ more_balance: * Revisit (affine) tasks on src_cpu that couldn't be moved to * us and move them to an alternate dst_cpu in our sched_group * where they can run. The upper limit on how many times we - * iterate on same src_cpu is dependent on number of cpus in our + * iterate on same src_cpu is dependent on number of CPUs in our * sched_group. * * This changes load balance semantics a bit on who can move @@ -8672,7 +8670,7 @@ more_balance: */ if ((env.flags & LBF_DST_PINNED) && env.imbalance > 0) { - /* Prevent to re-select dst_cpu via env's cpus */ + /* Prevent to re-select dst_cpu via env's CPUs */ cpumask_clear_cpu(env.dst_cpu, env.cpus); env.dst_rq = cpu_rq(env.new_dst_cpu); @@ -8734,9 +8732,10 @@ more_balance: raw_spin_lock_irqsave(&busiest->lock, flags); - /* don't kick the active_load_balance_cpu_stop, - * if the curr task on busiest cpu can't be - * moved to this_cpu + /* + * Don't kick the active_load_balance_cpu_stop, + * if the curr task on busiest CPU can't be + * moved to this_cpu: */ if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) { raw_spin_unlock_irqrestore(&busiest->lock, @@ -8962,7 +8961,7 @@ out: } /* - * active_load_balance_cpu_stop is run by cpu stopper. It pushes + * active_load_balance_cpu_stop is run by the CPU stopper. It pushes * running tasks off the busiest CPU onto idle CPUs. It requires at * least 1 task to be running on each physical CPU where possible, and * avoids physical / logical imbalances. @@ -8986,7 +8985,7 @@ static int active_load_balance_cpu_stop(void *data) if (!cpu_active(busiest_cpu) || !cpu_active(target_cpu)) goto out_unlock; - /* make sure the requested cpu hasn't gone down in the meantime */ + /* Make sure the requested CPU hasn't gone down in the meantime: */ if (unlikely(busiest_cpu != smp_processor_id() || !busiest_rq->active_balance)) goto out_unlock; @@ -8998,7 +8997,7 @@ static int active_load_balance_cpu_stop(void *data) /* * This condition is "impossible", if it occurs * we need to fix it. Originally reported by - * Bjorn Helgaas on a 128-cpu setup. + * Bjorn Helgaas on a 128-CPU setup. */ BUG_ON(busiest_rq == target_rq); @@ -9100,7 +9099,7 @@ static void nohz_balancer_kick(void) return; /* * Use smp_send_reschedule() instead of resched_cpu(). - * This way we generate a sched IPI on the target cpu which + * This way we generate a sched IPI on the target CPU which * is idle. And the softirq performing nohz idle load balance * will be run before returning from the IPI. */ @@ -9157,14 +9156,12 @@ unlock: } /* - * This routine will record that the cpu is going idle with tick stopped. + * This routine will record that the CPU is going idle with tick stopped. * This info will be used in performing idle load balancing in the future. */ void nohz_balance_enter_idle(int cpu) { - /* - * If this cpu is going down, then nothing needs to be done. - */ + /* If this CPU is going down, then nothing needs to be done: */ if (!cpu_active(cpu)) return; @@ -9175,9 +9172,7 @@ void nohz_balance_enter_idle(int cpu) if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) return; - /* - * If we're a completely isolated CPU, we don't play. - */ + /* If we're a completely isolated CPU, we don't play: */ if (on_null_domain(cpu_rq(cpu))) return; @@ -9286,7 +9281,7 @@ out: /* * next_balance will be updated only when there is a need. - * When the cpu is attached to null domain for ex, it will not be + * When the CPU is attached to null domain for ex, it will not be * updated. */ if (likely(update_next_balance)) { @@ -9310,7 +9305,7 @@ out: #ifdef CONFIG_NO_HZ_COMMON /* * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the - * rebalancing for all the cpus for whom scheduler ticks are stopped. + * rebalancing for all the CPUs for whom scheduler ticks are stopped. */ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { @@ -9330,8 +9325,8 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) continue; /* - * If this cpu gets work to do, stop the load balancing - * work being done for other cpus. Next load + * If this CPU gets work to do, stop the load balancing + * work being done for other CPUs. Next load * balancing owner will pick it up. */ if (need_resched()) @@ -9373,13 +9368,13 @@ end: /* * Current heuristic for kicking the idle load balancer in the presence - * of an idle cpu in the system. + * of an idle CPU in the system. * - This rq has more than one task. * - This rq has at least one CFS task and the capacity of the CPU is * significantly reduced because of RT tasks or IRQs. - * - At parent of LLC scheduler domain level, this cpu's scheduler group has - * multiple busy cpu. - * - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler + * - At parent of LLC scheduler domain level, this CPU's scheduler group has + * multiple busy CPUs. + * - For SD_ASYM_PACKING, if the lower numbered CPU's in the scheduler * domain span are idle. */ static inline bool nohz_kick_needed(struct rq *rq) @@ -9469,10 +9464,10 @@ static __latent_entropy void run_rebalance_domains(struct softirq_action *h) CPU_IDLE : CPU_NOT_IDLE; /* - * If this cpu has a pending nohz_balance_kick, then do the - * balancing on behalf of the other idle cpus whose ticks are + * If this CPU has a pending nohz_balance_kick, then do the + * balancing on behalf of the other idle CPUs whose ticks are * stopped. Do nohz_idle_balance *before* rebalance_domains to - * give the idle cpus a chance to load balance. Else we may + * give the idle CPUs a chance to load balance. Else we may * load balance only within the local sched_domain hierarchy * and abort nohz_idle_balance altogether if we pull some load. */ diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 7dae9eb8c042..343d25f85477 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -1,5 +1,5 @@ /* - * Generic entry point for the idle threads + * Generic entry points for the idle threads */ #include <linux/sched.h> #include <linux/sched/idle.h> @@ -332,8 +332,8 @@ void cpu_startup_entry(enum cpuhp_state state) { /* * This #ifdef needs to die, but it's too late in the cycle to - * make this generic (arm and sh have never invoked the canary - * init for the non boot cpus!). Will be fixed in 3.11 + * make this generic (ARM and SH have never invoked the canary + * init for the non boot CPUs!). Will be fixed in 3.11 */ #ifdef CONFIG_X86 /* diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index 48b8a83f5185..ec73680922f8 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c @@ -14,7 +14,7 @@ select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags) { return task_cpu(p); /* IDLE tasks as never migrated */ } -#endif /* CONFIG_SMP */ +#endif /* * Idle tasks are unconditionally rescheduled: @@ -30,6 +30,7 @@ pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf put_prev_task(rq, prev); update_idle_core(rq); schedstat_inc(rq->sched_goidle); + return rq->idle; } diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 39f340dde1d7..aad5f48a07c6 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -6,13 +6,13 @@ * Copyright (C) 2017-2018 SUSE, Frederic Weisbecker * */ - #include <linux/sched/isolation.h> #include <linux/tick.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/static_key.h> #include <linux/ctype.h> + #include "sched.h" DEFINE_STATIC_KEY_FALSE(housekeeping_overriden); diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c index 89a989e4d758..a398e7e28a8a 100644 --- a/kernel/sched/loadavg.c +++ b/kernel/sched/loadavg.c @@ -32,29 +32,29 @@ * Due to a number of reasons the above turns in the mess below: * * - for_each_possible_cpu() is prohibitively expensive on machines with - * serious number of cpus, therefore we need to take a distributed approach + * serious number of CPUs, therefore we need to take a distributed approach * to calculating nr_active. * * \Sum_i x_i(t) = \Sum_i x_i(t) - x_i(t_0) | x_i(t_0) := 0 * = \Sum_i { \Sum_j=1 x_i(t_j) - x_i(t_j-1) } * * So assuming nr_active := 0 when we start out -- true per definition, we - * can simply take per-cpu deltas and fold those into a global accumulate + * can simply take per-CPU deltas and fold those into a global accumulate * to obtain the same result. See calc_load_fold_active(). * - * Furthermore, in order to avoid synchronizing all per-cpu delta folding + * Furthermore, in order to avoid synchronizing all per-CPU delta folding * across the machine, we assume 10 ticks is sufficient time for every - * cpu to have completed this task. + * CPU to have completed this task. * * This places an upper-bound on the IRQ-off latency of the machine. Then * again, being late doesn't loose the delta, just wrecks the sample. * - * - cpu_rq()->nr_uninterruptible isn't accurately tracked per-cpu because - * this would add another cross-cpu cacheline miss and atomic operation - * to the wakeup path. Instead we increment on whatever cpu the task ran - * when it went into uninterruptible state and decrement on whatever cpu + * - cpu_rq()->nr_uninterruptible isn't accurately tracked per-CPU because + * this would add another cross-CPU cacheline miss and atomic operation + * to the wakeup path. Instead we increment on whatever CPU the task ran + * when it went into uninterruptible state and decrement on whatever CPU * did the wakeup. This means that only the sum of nr_uninterruptible over - * all cpus yields the correct result. + * all CPUs yields the correct result. * * This covers the NO_HZ=n code, for extra head-aches, see the comment below. */ @@ -115,11 +115,11 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) * Handle NO_HZ for the global load-average. * * Since the above described distributed algorithm to compute the global - * load-average relies on per-cpu sampling from the tick, it is affected by + * load-average relies on per-CPU sampling from the tick, it is affected by * NO_HZ. * * The basic idea is to fold the nr_active delta into a global NO_HZ-delta upon - * entering NO_HZ state such that we can include this as an 'extra' cpu delta + * entering NO_HZ state such that we can include this as an 'extra' CPU delta * when we read the global state. * * Obviously reality has to ruin such a delightfully simple scheme: @@ -146,9 +146,9 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) * busy state. * * This is solved by pushing the window forward, and thus skipping the - * sample, for this cpu (effectively using the NO_HZ-delta for this cpu which + * sample, for this CPU (effectively using the NO_HZ-delta for this CPU which * was in effect at the time the window opened). This also solves the issue - * of having to deal with a cpu having been in NO_HZ for multiple LOAD_FREQ + * of having to deal with a CPU having been in NO_HZ for multiple LOAD_FREQ * intervals. * * When making the ILB scale, we should try to pull this in as well. @@ -299,7 +299,7 @@ calc_load_n(unsigned long load, unsigned long exp, } /* - * NO_HZ can leave us missing all per-cpu ticks calling + * NO_HZ can leave us missing all per-CPU ticks calling * calc_load_fold_active(), but since a NO_HZ CPU folds its delta into * calc_load_nohz per calc_load_nohz_start(), all we need to do is fold * in the pending NO_HZ delta if our NO_HZ period crossed a load cycle boundary. @@ -363,7 +363,7 @@ void calc_global_load(unsigned long ticks) return; /* - * Fold the 'old' NO_HZ-delta to include all NO_HZ cpus. + * Fold the 'old' NO_HZ-delta to include all NO_HZ CPUs. */ delta = calc_load_nohz_fold(); if (delta) diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index 5d0762633639..2c6ae2413fa2 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -27,18 +27,18 @@ * except MEMBARRIER_CMD_QUERY. */ #ifdef CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE -#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \ - (MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE \ +#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \ + (MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE \ | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE) #else #define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK 0 #endif -#define MEMBARRIER_CMD_BITMASK \ - (MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \ - | MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \ - | MEMBARRIER_CMD_PRIVATE_EXPEDITED \ - | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \ +#define MEMBARRIER_CMD_BITMASK \ + (MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \ + | MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \ + | MEMBARRIER_CMD_PRIVATE_EXPEDITED \ + | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \ | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK) static void ipi_mb(void *info) @@ -85,6 +85,7 @@ static int membarrier_global_expedited(void) */ if (cpu == raw_smp_processor_id()) continue; + rcu_read_lock(); p = task_rcu_dereference(&cpu_rq(cpu)->curr); if (p && p->mm && (atomic_read(&p->mm->membarrier_state) & @@ -188,6 +189,7 @@ static int membarrier_private_expedited(int flags) * rq->curr modification in scheduler. */ smp_mb(); /* exit from system call is not a mb */ + return 0; } @@ -219,6 +221,7 @@ static int membarrier_register_global_expedited(void) } atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY, &mm->membarrier_state); + return 0; } @@ -253,6 +256,7 @@ static int membarrier_register_private_expedited(int flags) synchronize_sched(); } atomic_or(state, &mm->membarrier_state); + return 0; } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index c80563b4f6b9..e40498872111 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1453,9 +1453,9 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) return; /* - * There appears to be other cpus that can accept - * current and none to run 'p', so lets reschedule - * to try and push current away: + * There appear to be other CPUs that can accept + * the current task but none can run 'p', so lets reschedule + * to try and push the current task away: */ requeue_task_rt(rq, p, 1); resched_curr(rq); @@ -1596,12 +1596,13 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) if (!task_running(rq, p) && cpumask_test_cpu(cpu, &p->cpus_allowed)) return 1; + return 0; } /* * Return the highest pushable rq's task, which is suitable to be executed - * on the cpu, NULL otherwise + * on the CPU, NULL otherwise */ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) { @@ -1639,11 +1640,11 @@ static int find_lowest_rq(struct task_struct *task) return -1; /* No targets found */ /* - * At this point we have built a mask of cpus representing the + * At this point we have built a mask of CPUs representing the * lowest priority tasks in the system. Now we want to elect * the best one based on our affinity and topology. * - * We prioritize the last cpu that the task executed on since + * We prioritize the last CPU that the task executed on since * it is most likely cache-hot in that location. */ if (cpumask_test_cpu(cpu, lowest_mask)) @@ -1651,7 +1652,7 @@ static int find_lowest_rq(struct task_struct *task) /* * Otherwise, we consult the sched_domains span maps to figure - * out which cpu is logically closest to our hot cache data. + * out which CPU is logically closest to our hot cache data. */ if (!cpumask_test_cpu(this_cpu, lowest_mask)) this_cpu = -1; /* Skip this_cpu opt if not among lowest */ @@ -1692,6 +1693,7 @@ static int find_lowest_rq(struct task_struct *task) cpu = cpumask_any(lowest_mask); if (cpu < nr_cpu_ids) return cpu; + return -1; } @@ -1827,7 +1829,7 @@ retry: * The task hasn't migrated, and is still the next * eligible task, but we failed to find a run-queue * to push it to. Do not retry in this case, since - * other cpus will pull from us when ready. + * other CPUs will pull from us when ready. */ goto out; } @@ -1919,7 +1921,7 @@ static int rto_next_cpu(struct root_domain *rd) * rt_next_cpu() will simply return the first CPU found in * the rto_mask. * - * If rto_next_cpu() is called with rto_cpu is a valid cpu, it + * If rto_next_cpu() is called with rto_cpu is a valid CPU, it * will return the next CPU found in the rto_mask. * * If there are no more CPUs left in the rto_mask, then a check is made @@ -1980,7 +1982,7 @@ static void tell_cpu_to_push(struct rq *rq) raw_spin_lock(&rq->rd->rto_lock); /* - * The rto_cpu is updated under the lock, if it has a valid cpu + * The rto_cpu is updated under the lock, if it has a valid CPU * then the IPI is still running and will continue due to the * update to loop_next, and nothing needs to be done here. * Otherwise it is finishing up and an ipi needs to be sent. @@ -2105,7 +2107,7 @@ static void pull_rt_task(struct rq *this_rq) /* * There's a chance that p is higher in priority - * than what's currently running on its cpu. + * than what's currently running on its CPU. * This is just that p is wakeing up and hasn't * had a chance to schedule. We only pull * p if it is lower in priority than the @@ -2693,6 +2695,7 @@ int sched_rr_handler(struct ctl_table *table, int write, msecs_to_jiffies(sysctl_sched_rr_timeslice); } mutex_unlock(&mutex); + return ret; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index dc6c8b5a24ad..bd1461ae06e4 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1,5 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ - +/* + * Scheduler internal types and methods: + */ #include <linux/sched.h> #include <linux/sched/autogroup.h> #include <linux/sched/sysctl.h> @@ -79,11 +81,11 @@ static inline void cpu_load_update_active(struct rq *this_rq) { } * and does not change the user-interface for setting shares/weights. * * We increase resolution only if we have enough bits to allow this increased - * resolution (i.e. 64bit). The costs for increasing resolution when 32bit are - * pretty high and the returns do not justify the increased costs. + * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit + * are pretty high and the returns do not justify the increased costs. * - * Really only required when CONFIG_FAIR_GROUP_SCHED is also set, but to - * increase coverage and consistency always enable it on 64bit platforms. + * Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to + * increase coverage and consistency always enable it on 64-bit platforms. */ #ifdef CONFIG_64BIT # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT) @@ -111,16 +113,12 @@ static inline void cpu_load_update_active(struct rq *this_rq) { } * 10 -> just above 1us * 9 -> just above 0.5us */ -#define DL_SCALE (10) - -/* - * These are the 'tuning knobs' of the scheduler: - */ +#define DL_SCALE 10 /* - * single value that denotes runtime == period, ie unlimited time. + * Single value that denotes runtime == period, ie unlimited time. */ -#define RUNTIME_INF ((u64)~0ULL) +#define RUNTIME_INF ((u64)~0ULL) static inline int idle_policy(int policy) { @@ -235,9 +233,9 @@ void __dl_clear_params(struct task_struct *p); * control. */ struct dl_bandwidth { - raw_spinlock_t dl_runtime_lock; - u64 dl_runtime; - u64 dl_period; + raw_spinlock_t dl_runtime_lock; + u64 dl_runtime; + u64 dl_period; }; static inline int dl_bandwidth_enabled(void) @@ -246,8 +244,9 @@ static inline int dl_bandwidth_enabled(void) } struct dl_bw { - raw_spinlock_t lock; - u64 bw, total_bw; + raw_spinlock_t lock; + u64 bw; + u64 total_bw; }; static inline void __dl_update(struct dl_bw *dl_b, s64 bw); @@ -273,20 +272,17 @@ bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw) dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw; } -void dl_change_utilization(struct task_struct *p, u64 new_bw); +extern void dl_change_utilization(struct task_struct *p, u64 new_bw); extern void init_dl_bw(struct dl_bw *dl_b); -extern int sched_dl_global_validate(void); +extern int sched_dl_global_validate(void); extern void sched_dl_do_global(void); -extern int sched_dl_overflow(struct task_struct *p, int policy, - const struct sched_attr *attr); +extern int sched_dl_overflow(struct task_struct *p, int policy, const struct sched_attr *attr); extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr); extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr); extern bool __checkparam_dl(const struct sched_attr *attr); extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr); -extern int dl_task_can_attach(struct task_struct *p, - const struct cpumask *cs_cpus_allowed); -extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, - const struct cpumask *trial); +extern int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed); +extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); extern bool dl_cpu_busy(unsigned int cpu); #ifdef CONFIG_CGROUP_SCHED @@ -300,32 +296,36 @@ extern struct list_head task_groups; struct cfs_bandwidth { #ifdef CONFIG_CFS_BANDWIDTH - raw_spinlock_t lock; - ktime_t period; - u64 quota, runtime; - s64 hierarchical_quota; - u64 runtime_expires; - - int idle, period_active; - struct hrtimer period_timer, slack_timer; - struct list_head throttled_cfs_rq; - - /* statistics */ - int nr_periods, nr_throttled; - u64 throttled_time; + raw_spinlock_t lock; + ktime_t period; + u64 quota; + u64 runtime; + s64 hierarchical_quota; + u64 runtime_expires; + + int idle; + int period_active; + struct hrtimer period_timer; + struct hrtimer slack_timer; + struct list_head throttled_cfs_rq; + + /* Statistics: */ + int nr_periods; + int nr_throttled; + u64 throttled_time; #endif }; -/* task group related information */ +/* Task group related information */ struct task_group { struct cgroup_subsys_state css; #ifdef CONFIG_FAIR_GROUP_SCHED - /* schedulable entities of this group on each cpu */ - struct sched_entity **se; - /* runqueue "owned" by this group on each cpu */ - struct cfs_rq **cfs_rq; - unsigned long shares; + /* schedulable entities of this group on each CPU */ + struct sched_entity **se; + /* runqueue "owned" by this group on each CPU */ + struct cfs_rq **cfs_rq; + unsigned long shares; #ifdef CONFIG_SMP /* @@ -333,29 +333,29 @@ struct task_group { * it in its own cacheline separated from the fields above which * will also be accessed at each tick. */ - atomic_long_t load_avg ____cacheline_aligned; + atomic_long_t load_avg ____cacheline_aligned; #endif #endif #ifdef CONFIG_RT_GROUP_SCHED - struct sched_rt_entity **rt_se; - struct rt_rq **rt_rq; + struct sched_rt_entity **rt_se; + struct rt_rq **rt_rq; - struct rt_bandwidth rt_bandwidth; + struct rt_bandwidth rt_bandwidth; #endif - struct rcu_head rcu; - struct list_head list; + struct rcu_head rcu; + struct list_head list; - struct task_group *parent; - struct list_head siblings; - struct list_head children; + struct task_group *parent; + struct list_head siblings; + struct list_head children; #ifdef CONFIG_SCHED_AUTOGROUP - struct autogroup *autogroup; + struct autogroup *autogroup; #endif - struct cfs_bandwidth cfs_bandwidth; + struct cfs_bandwidth cfs_bandwidth; }; #ifdef CONFIG_FAIR_GROUP_SCHED @@ -369,8 +369,8 @@ struct task_group { * (The default weight is 1024 - so there's no practical * limitation from this.) */ -#define MIN_SHARES (1UL << 1) -#define MAX_SHARES (1UL << 18) +#define MIN_SHARES (1UL << 1) +#define MAX_SHARES (1UL << 18) #endif typedef int (*tg_visitor)(struct task_group *, void *); @@ -443,35 +443,39 @@ struct cfs_bandwidth { }; /* CFS-related fields in a runqueue */ struct cfs_rq { - struct load_weight load; - unsigned long runnable_weight; - unsigned int nr_running, h_nr_running; + struct load_weight load; + unsigned long runnable_weight; + unsigned int nr_running; + unsigned int h_nr_running; - u64 exec_clock; - u64 min_vruntime; + u64 exec_clock; + u64 min_vruntime; #ifndef CONFIG_64BIT - u64 min_vruntime_copy; + u64 min_vruntime_copy; #endif - struct rb_root_cached tasks_timeline; + struct rb_root_cached tasks_timeline; /* * 'curr' points to currently running entity on this cfs_rq. * It is set to NULL otherwise (i.e when none are currently running). */ - struct sched_entity *curr, *next, *last, *skip; + struct sched_entity *curr; + struct sched_entity *next; + struct sched_entity *last; + struct sched_entity *skip; #ifdef CONFIG_SCHED_DEBUG - unsigned int nr_spread_over; + unsigned int nr_spread_over; #endif #ifdef CONFIG_SMP /* * CFS load tracking */ - struct sched_avg avg; + struct sched_avg avg; #ifndef CONFIG_64BIT - u64 load_last_update_time_copy; + u64 load_last_update_time_copy; #endif struct { raw_spinlock_t lock ____cacheline_aligned; @@ -482,9 +486,9 @@ struct cfs_rq { } removed; #ifdef CONFIG_FAIR_GROUP_SCHED - unsigned long tg_load_avg_contrib; - long propagate; - long prop_runnable_sum; + unsigned long tg_load_avg_contrib; + long propagate; + long prop_runnable_sum; /* * h_load = weight * f(tg) @@ -492,36 +496,38 @@ struct cfs_rq { * Where f(tg) is the recursive weight fraction assigned to * this group. */ - unsigned long h_load; - u64 last_h_load_update; - struct sched_entity *h_load_next; + unsigned long h_load; + u64 last_h_load_update; + struct sched_entity *h_load_next; #endif /* CONFIG_FAIR_GROUP_SCHED */ #endif /* CONFIG_SMP */ #ifdef CONFIG_FAIR_GROUP_SCHED - struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ + struct rq *rq; /* CPU runqueue to which this cfs_rq is attached */ /* * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in * a hierarchy). Non-leaf lrqs hold other higher schedulable entities * (like users, containers etc.) * - * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This - * list is used during load balance. + * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a CPU. + * This list is used during load balance. */ - int on_list; - struct list_head leaf_cfs_rq_list; - struct task_group *tg; /* group that "owns" this runqueue */ + int on_list; + struct list_head leaf_cfs_rq_list; + struct task_group *tg; /* group that "owns" this runqueue */ #ifdef CONFIG_CFS_BANDWIDTH - int runtime_enabled; - u64 runtime_expires; - s64 runtime_remaining; - - u64 throttled_clock, throttled_clock_task; - u64 throttled_clock_task_time; - int throttled, throttle_count; - struct list_head throttled_list; + int runtime_enabled; + u64 runtime_expires; + s64 runtime_remaining; + + u64 throttled_clock; + u64 throttled_clock_task; + u64 throttled_clock_task_time; + int throttled; + int throttle_count; + struct list_head throttled_list; #endif /* CONFIG_CFS_BANDWIDTH */ #endif /* CONFIG_FAIR_GROUP_SCHED */ }; @@ -538,45 +544,45 @@ static inline int rt_bandwidth_enabled(void) /* Real-Time classes' related field in a runqueue: */ struct rt_rq { - struct rt_prio_array active; - unsigned int rt_nr_running; - unsigned int rr_nr_running; + struct rt_prio_array active; + unsigned int rt_nr_running; + unsigned int rr_nr_running; #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED struct { - int curr; /* highest queued rt task prio */ + int curr; /* highest queued rt task prio */ #ifdef CONFIG_SMP - int next; /* next highest */ + int next; /* next highest */ #endif } highest_prio; #endif #ifdef CONFIG_SMP - unsigned long rt_nr_migratory; - unsigned long rt_nr_total; - int overloaded; - struct plist_head pushable_tasks; + unsigned long rt_nr_migratory; + unsigned long rt_nr_total; + int overloaded; + struct plist_head pushable_tasks; #endif /* CONFIG_SMP */ - int rt_queued; + int rt_queued; - int rt_throttled; - u64 rt_time; - u64 rt_runtime; + int rt_throttled; + u64 rt_time; + u64 rt_runtime; /* Nests inside the rq lock: */ - raw_spinlock_t rt_runtime_lock; + raw_spinlock_t rt_runtime_lock; #ifdef CONFIG_RT_GROUP_SCHED - unsigned long rt_nr_boosted; + unsigned long rt_nr_boosted; - struct rq *rq; - struct task_group *tg; + struct rq *rq; + struct task_group *tg; #endif }; /* Deadline class' related fields in a runqueue */ struct dl_rq { /* runqueue is an rbtree, ordered by deadline */ - struct rb_root_cached root; + struct rb_root_cached root; - unsigned long dl_nr_running; + unsigned long dl_nr_running; #ifdef CONFIG_SMP /* @@ -586,28 +592,28 @@ struct dl_rq { * should migrate somewhere else. */ struct { - u64 curr; - u64 next; + u64 curr; + u64 next; } earliest_dl; - unsigned long dl_nr_migratory; - int overloaded; + unsigned long dl_nr_migratory; + int overloaded; /* * Tasks on this rq that can be pushed away. They are kept in * an rb-tree, ordered by tasks' deadlines, with caching * of the leftmost (earliest deadline) element. */ - struct rb_root_cached pushable_dl_tasks_root; + struct rb_root_cached pushable_dl_tasks_root; #else - struct dl_bw dl_bw; + struct dl_bw dl_bw; #endif /* * "Active utilization" for this runqueue: increased when a * task wakes up (becomes TASK_RUNNING) and decreased when a * task blocks */ - u64 running_bw; + u64 running_bw; /* * Utilization of the tasks "assigned" to this runqueue (including @@ -618,14 +624,14 @@ struct dl_rq { * This is needed to compute the "inactive utilization" for the * runqueue (inactive utilization = this_bw - running_bw). */ - u64 this_bw; - u64 extra_bw; + u64 this_bw; + u64 extra_bw; /* * Inverse of the fraction of CPU utilization that can be reclaimed * by the GRUB algorithm. */ - u64 bw_ratio; + u64 bw_ratio; }; #ifdef CONFIG_SMP @@ -638,51 +644,51 @@ static inline bool sched_asym_prefer(int a, int b) /* * We add the notion of a root-domain which will be used to define per-domain * variables. Each exclusive cpuset essentially defines an island domain by - * fully partitioning the member cpus from any other cpuset. Whenever a new + * fully partitioning the member CPUs from any other cpuset. Whenever a new * exclusive cpuset is created, we also create and attach a new root-domain * object. * */ struct root_domain { - atomic_t refcount; - atomic_t rto_count; - struct rcu_head rcu; - cpumask_var_t span; - cpumask_var_t online; + atomic_t refcount; + atomic_t rto_count; + struct rcu_head rcu; + cpumask_var_t span; + cpumask_var_t online; /* Indicate more than one runnable task for any CPU */ - bool overload; + bool overload; /* * The bit corresponding to a CPU gets set here if such CPU has more * than one runnable -deadline task (as it is below for RT tasks). */ - cpumask_var_t dlo_mask; - atomic_t dlo_count; - struct dl_bw dl_bw; - struct cpudl cpudl; + cpumask_var_t dlo_mask; + atomic_t dlo_count; + struct dl_bw dl_bw; + struct cpudl cpudl; #ifdef HAVE_RT_PUSH_IPI /* * For IPI pull requests, loop across the rto_mask. */ - struct irq_work rto_push_work; - raw_spinlock_t rto_lock; + struct irq_work rto_push_work; + raw_spinlock_t rto_lock; /* These are only updated and read within rto_lock */ - int rto_loop; - int rto_cpu; + int rto_loop; + int rto_cpu; /* These atomics are updated outside of a lock */ - atomic_t rto_loop_next; - atomic_t rto_loop_start; + atomic_t rto_loop_next; + atomic_t rto_loop_start; #endif /* * The "RT overload" flag: it gets set if a CPU has more than * one runnable RT task. */ - cpumask_var_t rto_mask; - struct cpupri cpupri; + cpumask_var_t rto_mask; + struct cpupri cpupri; - unsigned long max_cpu_capacity; + unsigned long max_cpu_capacity; }; extern struct root_domain def_root_domain; @@ -708,39 +714,39 @@ extern void rto_push_irq_work_func(struct irq_work *work); */ struct rq { /* runqueue lock: */ - raw_spinlock_t lock; + raw_spinlock_t lock; /* * nr_running and cpu_load should be in the same cacheline because * remote CPUs use both these fields when doing load calculation. */ - unsigned int nr_running; + unsigned int nr_running; #ifdef CONFIG_NUMA_BALANCING - unsigned int nr_numa_running; - unsigned int nr_preferred_running; + unsigned int nr_numa_running; + unsigned int nr_preferred_running; #endif #define CPU_LOAD_IDX_MAX 5 - unsigned long cpu_load[CPU_LOAD_IDX_MAX]; + unsigned long cpu_load[CPU_LOAD_IDX_MAX]; #ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_SMP - unsigned long last_load_update_tick; + unsigned long last_load_update_tick; #endif /* CONFIG_SMP */ - unsigned long nohz_flags; + unsigned long nohz_flags; #endif /* CONFIG_NO_HZ_COMMON */ - /* capture load from *all* tasks on this cpu: */ - struct load_weight load; - unsigned long nr_load_updates; - u64 nr_switches; + /* capture load from *all* tasks on this CPU: */ + struct load_weight load; + unsigned long nr_load_updates; + u64 nr_switches; - struct cfs_rq cfs; - struct rt_rq rt; - struct dl_rq dl; + struct cfs_rq cfs; + struct rt_rq rt; + struct dl_rq dl; #ifdef CONFIG_FAIR_GROUP_SCHED - /* list of leaf cfs_rq on this cpu: */ - struct list_head leaf_cfs_rq_list; - struct list_head *tmp_alone_branch; + /* list of leaf cfs_rq on this CPU: */ + struct list_head leaf_cfs_rq_list; + struct list_head *tmp_alone_branch; #endif /* CONFIG_FAIR_GROUP_SCHED */ /* @@ -749,94 +755,98 @@ struct rq { * one CPU and if it got migrated afterwards it may decrease * it on another CPU. Always updated under the runqueue lock: */ - unsigned long nr_uninterruptible; + unsigned long nr_uninterruptible; - struct task_struct *curr, *idle, *stop; - unsigned long next_balance; - struct mm_struct *prev_mm; + struct task_struct *curr; + struct task_struct *idle; + struct task_struct *stop; + unsigned long next_balance; + struct mm_struct *prev_mm; - unsigned int clock_update_flags; - u64 clock; - u64 clock_task; + unsigned int clock_update_flags; + u64 clock; + u64 clock_task; - atomic_t nr_iowait; + atomic_t nr_iowait; #ifdef CONFIG_SMP - struct root_domain *rd; - struct sched_domain *sd; + struct root_domain *rd; + struct sched_domain *sd; + + unsigned long cpu_capacity; + unsigned long cpu_capacity_orig; - unsigned long cpu_capacity; - unsigned long cpu_capacity_orig; + struct callback_head *balance_callback; - struct callback_head *balance_callback; + unsigned char idle_balance; - unsigned char idle_balance; /* For active balancing */ - int active_balance; - int push_cpu; - struct cpu_stop_work active_balance_work; - /* cpu of this runqueue: */ - int cpu; - int online; + int active_balance; + int push_cpu; + struct cpu_stop_work active_balance_work; + + /* CPU of this runqueue: */ + int cpu; + int online; struct list_head cfs_tasks; - u64 rt_avg; - u64 age_stamp; - u64 idle_stamp; - u64 avg_idle; + u64 rt_avg; + u64 age_stamp; + u64 idle_stamp; + u64 avg_idle; /* This is used to determine avg_idle's max value */ - u64 max_idle_balance_cost; + u64 max_idle_balance_cost; #endif #ifdef CONFIG_IRQ_TIME_ACCOUNTING - u64 prev_irq_time; + u64 prev_irq_time; #endif #ifdef CONFIG_PARAVIRT - u64 prev_steal_time; + u64 prev_steal_time; #endif #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING - u64 prev_steal_time_rq; + u64 prev_steal_time_rq; #endif /* calc_load related fields */ - unsigned long calc_load_update; - long calc_load_active; + unsigned long calc_load_update; + long calc_load_active; #ifdef CONFIG_SCHED_HRTICK #ifdef CONFIG_SMP - int hrtick_csd_pending; - call_single_data_t hrtick_csd; + int hrtick_csd_pending; + call_single_data_t hrtick_csd; #endif - struct hrtimer hrtick_timer; + struct hrtimer hrtick_timer; #endif #ifdef CONFIG_SCHEDSTATS /* latency stats */ - struct sched_info rq_sched_info; - unsigned long long rq_cpu_time; + struct sched_info rq_sched_info; + unsigned long long rq_cpu_time; /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */ /* sys_sched_yield() stats */ - unsigned int yld_count; + unsigned int yld_count; /* schedule() stats */ - unsigned int sched_count; - unsigned int sched_goidle; + unsigned int sched_count; + unsigned int sched_goidle; /* try_to_wake_up() stats */ - unsigned int ttwu_count; - unsigned int ttwu_local; + unsigned int ttwu_count; + unsigned int ttwu_local; #endif #ifdef CONFIG_SMP - struct llist_head wake_list; + struct llist_head wake_list; #endif #ifdef CONFIG_CPU_IDLE /* Must be inspected within a rcu lock section */ - struct cpuidle_state *idle_state; + struct cpuidle_state *idle_state; #endif }; @@ -902,9 +912,9 @@ static inline u64 __rq_clock_broken(struct rq *rq) * one position though, because the next rq_unpin_lock() will shift it * back. */ -#define RQCF_REQ_SKIP 0x01 -#define RQCF_ACT_SKIP 0x02 -#define RQCF_UPDATED 0x04 +#define RQCF_REQ_SKIP 0x01 +#define RQCF_ACT_SKIP 0x02 +#define RQCF_UPDATED 0x04 static inline void assert_clock_updated(struct rq *rq) { @@ -1057,12 +1067,12 @@ extern void sched_ttwu_pending(void); /** * highest_flag_domain - Return highest sched_domain containing flag. - * @cpu: The cpu whose highest level of sched domain is to + * @cpu: The CPU whose highest level of sched domain is to * be returned. * @flag: The flag to check for the highest sched_domain - * for the given cpu. + * for the given CPU. * - * Returns the highest sched_domain of a cpu which contains the given flag. + * Returns the highest sched_domain of a CPU which contains the given flag. */ static inline struct sched_domain *highest_flag_domain(int cpu, int flag) { @@ -1097,30 +1107,30 @@ DECLARE_PER_CPU(struct sched_domain *, sd_numa); DECLARE_PER_CPU(struct sched_domain *, sd_asym); struct sched_group_capacity { - atomic_t ref; + atomic_t ref; /* * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity * for a single CPU. */ - unsigned long capacity; - unsigned long min_capacity; /* Min per-CPU capacity in group */ - unsigned long next_update; - int imbalance; /* XXX unrelated to capacity but shared group state */ + unsigned long capacity; + unsigned long min_capacity; /* Min per-CPU capacity in group */ + unsigned long next_update; + int imbalance; /* XXX unrelated to capacity but shared group state */ #ifdef CONFIG_SCHED_DEBUG - int id; + int id; #endif - unsigned long cpumask[0]; /* balance mask */ + unsigned long cpumask[0]; /* Balance mask */ }; struct sched_group { - struct sched_group *next; /* Must be a circular list */ - atomic_t ref; + struct sched_group *next; /* Must be a circular list */ + atomic_t ref; - unsigned int group_weight; + unsigned int group_weight; struct sched_group_capacity *sgc; - int asym_prefer_cpu; /* cpu of highest priority in group */ + int asym_prefer_cpu; /* CPU of highest priority in group */ /* * The CPUs this group covers. @@ -1129,7 +1139,7 @@ struct sched_group { * by attaching extra space to the end of the structure, * depending on how many CPUs the kernel has booted up with) */ - unsigned long cpumask[0]; + unsigned long cpumask[0]; }; static inline struct cpumask *sched_group_span(struct sched_group *sg) @@ -1146,8 +1156,8 @@ static inline struct cpumask *group_balance_mask(struct sched_group *sg) } /** - * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. - * @group: The group whose first cpu is to be returned. + * group_first_cpu - Returns the first CPU in the cpumask of a sched_group. + * @group: The group whose first CPU is to be returned. */ static inline unsigned int group_first_cpu(struct sched_group *group) { @@ -1357,9 +1367,9 @@ static inline int task_on_rq_migrating(struct task_struct *p) /* * wake flags */ -#define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ -#define WF_FORK 0x02 /* child wakeup after fork */ -#define WF_MIGRATED 0x4 /* internal use, task got migrated */ +#define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */ +#define WF_FORK 0x02 /* Child wakeup after fork */ +#define WF_MIGRATED 0x4 /* Internal use, task got migrated */ /* * To aid in avoiding the subversion of "niceness" due to uneven distribution @@ -1370,11 +1380,11 @@ static inline int task_on_rq_migrating(struct task_struct *p) * slice expiry etc. */ -#define WEIGHT_IDLEPRIO 3 -#define WMULT_IDLEPRIO 1431655765 +#define WEIGHT_IDLEPRIO 3 +#define WMULT_IDLEPRIO 1431655765 -extern const int sched_prio_to_weight[40]; -extern const u32 sched_prio_to_wmult[40]; +extern const int sched_prio_to_weight[40]; +extern const u32 sched_prio_to_wmult[40]; /* * {de,en}queue flags: @@ -1396,9 +1406,9 @@ extern const u32 sched_prio_to_wmult[40]; */ #define DEQUEUE_SLEEP 0x01 -#define DEQUEUE_SAVE 0x02 /* matches ENQUEUE_RESTORE */ -#define DEQUEUE_MOVE 0x04 /* matches ENQUEUE_MOVE */ -#define DEQUEUE_NOCLOCK 0x08 /* matches ENQUEUE_NOCLOCK */ +#define DEQUEUE_SAVE 0x02 /* Matches ENQUEUE_RESTORE */ +#define DEQUEUE_MOVE 0x04 /* Matches ENQUEUE_MOVE */ +#define DEQUEUE_NOCLOCK 0x08 /* Matches ENQUEUE_NOCLOCK */ #define ENQUEUE_WAKEUP 0x01 #define ENQUEUE_RESTORE 0x02 @@ -1420,10 +1430,10 @@ struct sched_class { void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); - void (*yield_task) (struct rq *rq); - bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt); + void (*yield_task) (struct rq *rq); + bool (*yield_to_task)(struct rq *rq, struct task_struct *p, bool preempt); - void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); + void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags); /* * It is the responsibility of the pick_next_task() method that will @@ -1433,16 +1443,16 @@ struct sched_class { * May return RETRY_TASK when it finds a higher prio class has runnable * tasks. */ - struct task_struct * (*pick_next_task) (struct rq *rq, - struct task_struct *prev, - struct rq_flags *rf); - void (*put_prev_task) (struct rq *rq, struct task_struct *p); + struct task_struct * (*pick_next_task)(struct rq *rq, + struct task_struct *prev, + struct rq_flags *rf); + void (*put_prev_task)(struct rq *rq, struct task_struct *p); #ifdef CONFIG_SMP int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags); void (*migrate_task_rq)(struct task_struct *p); - void (*task_woken) (struct rq *this_rq, struct task_struct *task); + void (*task_woken)(struct rq *this_rq, struct task_struct *task); void (*set_cpus_allowed)(struct task_struct *p, const struct cpumask *newmask); @@ -1451,31 +1461,31 @@ struct sched_class { void (*rq_offline)(struct rq *rq); #endif - void (*set_curr_task) (struct rq *rq); - void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); - void (*task_fork) (struct task_struct *p); - void (*task_dead) (struct task_struct *p); + void (*set_curr_task)(struct rq *rq); + void (*task_tick)(struct rq *rq, struct task_struct *p, int queued); + void (*task_fork)(struct task_struct *p); + void (*task_dead)(struct task_struct *p); /* * The switched_from() call is allowed to drop rq->lock, therefore we * cannot assume the switched_from/switched_to pair is serliazed by * rq->lock. They are however serialized by p->pi_lock. */ - void (*switched_from) (struct rq *this_rq, struct task_struct *task); - void (*switched_to) (struct rq *this_rq, struct task_struct *task); + void (*switched_from)(struct rq *this_rq, struct task_struct *task); + void (*switched_to) (struct rq *this_rq, struct task_struct *task); void (*prio_changed) (struct rq *this_rq, struct task_struct *task, - int oldprio); + int oldprio); - unsigned int (*get_rr_interval) (struct rq *rq, - struct task_struct *task); + unsigned int (*get_rr_interval)(struct rq *rq, + struct task_struct *task); - void (*update_curr) (struct rq *rq); + void (*update_curr)(struct rq *rq); -#define TASK_SET_GROUP 0 -#define TASK_MOVE_GROUP 1 +#define TASK_SET_GROUP 0 +#define TASK_MOVE_GROUP 1 #ifdef CONFIG_FAIR_GROUP_SCHED - void (*task_change_group) (struct task_struct *p, int type); + void (*task_change_group)(struct task_struct *p, int type); #endif }; @@ -1524,6 +1534,7 @@ static inline void idle_set_state(struct rq *rq, static inline struct cpuidle_state *idle_get_state(struct rq *rq) { SCHED_WARN_ON(!rcu_read_lock_held()); + return rq->idle_state; } #else @@ -1562,9 +1573,9 @@ extern void init_dl_task_timer(struct sched_dl_entity *dl_se); extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se); extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq); -#define BW_SHIFT 20 -#define BW_UNIT (1 << BW_SHIFT) -#define RATIO_SHIFT 8 +#define BW_SHIFT 20 +#define BW_UNIT (1 << BW_SHIFT) +#define RATIO_SHIFT 8 unsigned long to_ratio(u64 period, u64 runtime); extern void init_entity_runnable_average(struct sched_entity *se); @@ -1814,8 +1825,8 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) /* * Unfair double_lock_balance: Optimizes throughput at the expense of * latency by eliminating extra atomic operations when the locks are - * already in proper order on entry. This favors lower cpu-ids and will - * grant the double lock to lower cpus over higher ids under contention, + * already in proper order on entry. This favors lower CPU-ids and will + * grant the double lock to lower CPUs over higher ids under contention, * regardless of entry order into the function. */ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) @@ -1847,7 +1858,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest) { if (unlikely(!irqs_disabled())) { - /* printk() doesn't work good under rq->lock */ + /* printk() doesn't work well under rq->lock */ raw_spin_unlock(&this_rq->lock); BUG_ON(1); } @@ -2106,15 +2117,14 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} #endif /* CONFIG_CPU_FREQ */ #ifdef arch_scale_freq_capacity -#ifndef arch_scale_freq_invariant -#define arch_scale_freq_invariant() (true) -#endif -#else /* arch_scale_freq_capacity */ -#define arch_scale_freq_invariant() (false) +# ifndef arch_scale_freq_invariant +# define arch_scale_freq_invariant() true +# endif +#else +# define arch_scale_freq_invariant() false #endif #ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL - static inline unsigned long cpu_util_dl(struct rq *rq) { return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT; @@ -2124,5 +2134,4 @@ static inline unsigned long cpu_util_cfs(struct rq *rq) { return rq->cfs.avg.util_avg; } - #endif diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 940b1fa1d2ce..968c1fe3099a 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -78,8 +78,8 @@ static int show_schedstat(struct seq_file *seq, void *v) * This itererator needs some explanation. * It returns 1 for the header position. * This means 2 is cpu 0. - * In a hotplugged system some cpus, including cpu 0, may be missing so we have - * to use cpumask_* to iterate over the cpus. + * In a hotplugged system some CPUs, including cpu 0, may be missing so we have + * to use cpumask_* to iterate over the CPUs. */ static void *schedstat_start(struct seq_file *file, loff_t *offset) { @@ -99,12 +99,14 @@ static void *schedstat_start(struct seq_file *file, loff_t *offset) if (n < nr_cpu_ids) return (void *)(unsigned long)(n + 2); + return NULL; } static void *schedstat_next(struct seq_file *file, void *data, loff_t *offset) { (*offset)++; + return schedstat_start(file, offset); } @@ -134,6 +136,7 @@ static const struct file_operations proc_schedstat_operations = { static int __init proc_schedstat_init(void) { proc_create("schedstat", 0, NULL, &proc_schedstat_operations); + return 0; } subsys_initcall(proc_schedstat_init); diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 8e7b58de61e7..8aea199a39b4 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -30,35 +30,29 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) if (rq) rq->rq_sched_info.run_delay += delta; } -#define schedstat_enabled() static_branch_unlikely(&sched_schedstats) +#define schedstat_enabled() static_branch_unlikely(&sched_schedstats) #define __schedstat_inc(var) do { var++; } while (0) -#define schedstat_inc(var) do { if (schedstat_enabled()) { var++; } } while (0) +#define schedstat_inc(var) do { if (schedstat_enabled()) { var++; } } while (0) #define __schedstat_add(var, amt) do { var += (amt); } while (0) -#define schedstat_add(var, amt) do { if (schedstat_enabled()) { var += (amt); } } while (0) -#define __schedstat_set(var, val) do { var = (val); } while (0) -#define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0) -#define schedstat_val(var) (var) -#define schedstat_val_or_zero(var) ((schedstat_enabled()) ? (var) : 0) - -#else /* !CONFIG_SCHEDSTATS */ -static inline void -rq_sched_info_arrive(struct rq *rq, unsigned long long delta) -{} -static inline void -rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) -{} -static inline void -rq_sched_info_depart(struct rq *rq, unsigned long long delta) -{} -#define schedstat_enabled() 0 -#define __schedstat_inc(var) do { } while (0) -#define schedstat_inc(var) do { } while (0) -#define __schedstat_add(var, amt) do { } while (0) -#define schedstat_add(var, amt) do { } while (0) -#define __schedstat_set(var, val) do { } while (0) -#define schedstat_set(var, val) do { } while (0) -#define schedstat_val(var) 0 -#define schedstat_val_or_zero(var) 0 +#define schedstat_add(var, amt) do { if (schedstat_enabled()) { var += (amt); } } while (0) +#define __schedstat_set(var, val) do { var = (val); } while (0) +#define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0) +#define schedstat_val(var) (var) +#define schedstat_val_or_zero(var) ((schedstat_enabled()) ? (var) : 0) + +#else /* !CONFIG_SCHEDSTATS: */ +static inline void rq_sched_info_arrive (struct rq *rq, unsigned long long delta) { } +static inline void rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) { } +static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delta) { } +# define schedstat_enabled() 0 +# define __schedstat_inc(var) do { } while (0) +# define schedstat_inc(var) do { } while (0) +# define __schedstat_add(var, amt) do { } while (0) +# define schedstat_add(var, amt) do { } while (0) +# define __schedstat_set(var, val) do { } while (0) +# define schedstat_set(var, val) do { } while (0) +# define schedstat_val(var) 0 +# define schedstat_val_or_zero(var) 0 #endif /* CONFIG_SCHEDSTATS */ #ifdef CONFIG_SCHED_INFO @@ -69,9 +63,9 @@ static inline void sched_info_reset_dequeued(struct task_struct *t) /* * We are interested in knowing how long it was from the *first* time a - * task was queued to the time that it finally hit a cpu, we call this routine - * from dequeue_task() to account for possible rq->clock skew across cpus. The - * delta taken on each cpu would annul the skew. + * task was queued to the time that it finally hit a CPU, we call this routine + * from dequeue_task() to account for possible rq->clock skew across CPUs. The + * delta taken on each CPU would annul the skew. */ static inline void sched_info_dequeued(struct rq *rq, struct task_struct *t) { @@ -87,7 +81,7 @@ static inline void sched_info_dequeued(struct rq *rq, struct task_struct *t) } /* - * Called when a task finally hits the cpu. We can now calculate how + * Called when a task finally hits the CPU. We can now calculate how * long it was waiting to run. We also note when it began so that we * can keep stats on how long its timeslice is. */ @@ -112,9 +106,10 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t) */ static inline void sched_info_queued(struct rq *rq, struct task_struct *t) { - if (unlikely(sched_info_on())) + if (unlikely(sched_info_on())) { if (!t->sched_info.last_queued) t->sched_info.last_queued = rq_clock(rq); + } } /* @@ -127,8 +122,7 @@ static inline void sched_info_queued(struct rq *rq, struct task_struct *t) */ static inline void sched_info_depart(struct rq *rq, struct task_struct *t) { - unsigned long long delta = rq_clock(rq) - - t->sched_info.last_arrival; + unsigned long long delta = rq_clock(rq) - t->sched_info.last_arrival; rq_sched_info_depart(rq, delta); @@ -142,11 +136,10 @@ static inline void sched_info_depart(struct rq *rq, struct task_struct *t) * the idle task.) We are only called when prev != next. */ static inline void -__sched_info_switch(struct rq *rq, - struct task_struct *prev, struct task_struct *next) +__sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { /* - * prev now departs the cpu. It's not interesting to record + * prev now departs the CPU. It's not interesting to record * stats about how efficient we were at scheduling the idle * process, however. */ @@ -156,18 +149,19 @@ __sched_info_switch(struct rq *rq, if (next != rq->idle) sched_info_arrive(rq, next); } + static inline void -sched_info_switch(struct rq *rq, - struct task_struct *prev, struct task_struct *next) +sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { if (unlikely(sched_info_on())) __sched_info_switch(rq, prev, next); } -#else -#define sched_info_queued(rq, t) do { } while (0) -#define sched_info_reset_dequeued(t) do { } while (0) -#define sched_info_dequeued(rq, t) do { } while (0) -#define sched_info_depart(rq, t) do { } while (0) -#define sched_info_arrive(rq, next) do { } while (0) -#define sched_info_switch(rq, t, next) do { } while (0) + +#else /* !CONFIG_SCHED_INFO: */ +# define sched_info_queued(rq, t) do { } while (0) +# define sched_info_reset_dequeued(t) do { } while (0) +# define sched_info_dequeued(rq, t) do { } while (0) +# define sched_info_depart(rq, t) do { } while (0) +# define sched_info_arrive(rq, next) do { } while (0) +# define sched_info_switch(rq, t, next) do { } while (0) #endif /* CONFIG_SCHED_INFO */ diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index ea8d2b6a1239..c183b790ca54 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -1,6 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#include "sched.h" - /* * stop-task scheduling class. * @@ -9,6 +7,7 @@ * * See kernel/stop_machine.c */ +#include "sched.h" #ifdef CONFIG_SMP static int diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c index 9ff1555341ed..b88ab4e0207f 100644 --- a/kernel/sched/swait.c +++ b/kernel/sched/swait.c @@ -1,4 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 +/* + * <linux/swait.h> (simple wait queues ) implementation: + */ #include <linux/sched/signal.h> #include <linux/swait.h> diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 519b024f4e94..219eee70e457 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -41,8 +41,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, if (!(sd->flags & SD_LOAD_BALANCE)) { printk("does not load-balance\n"); if (sd->parent) - printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain" - " has parent"); + printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain has parent"); return -1; } @@ -50,12 +49,10 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, cpumask_pr_args(sched_domain_span(sd)), sd->name); if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) { - printk(KERN_ERR "ERROR: domain->span does not contain " - "CPU%d\n", cpu); + printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu); } if (!cpumask_test_cpu(cpu, sched_group_span(group))) { - printk(KERN_ERR "ERROR: domain->groups does not contain" - " CPU%d\n", cpu); + printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu); } printk(KERN_DEBUG "%*s groups:", level + 1, ""); @@ -115,8 +112,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, if (sd->parent && !cpumask_subset(groupmask, sched_domain_span(sd->parent))) - printk(KERN_ERR "ERROR: parent span is not a superset " - "of domain->span\n"); + printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n"); return 0; } @@ -595,7 +591,7 @@ int group_balance_cpu(struct sched_group *sg) * are not. * * This leads to a few particularly weird cases where the sched_domain's are - * not of the same number for each cpu. Consider: + * not of the same number for each CPU. Consider: * * NUMA-2 0-3 0-3 * groups: {0-2},{1-3} {1-3},{0-2} @@ -780,7 +776,7 @@ fail: * ^ ^ ^ ^ * `-' `-' * - * The sched_domains are per-cpu and have a two way link (parent & child) and + * The sched_domains are per-CPU and have a two way link (parent & child) and * denote the ever growing mask of CPUs belonging to that level of topology. * * Each sched_domain has a circular (double) linked list of sched_group's, each @@ -1021,6 +1017,7 @@ __visit_domain_allocation_hell(struct s_data *d, const struct cpumask *cpu_map) d->rd = alloc_rootdomain(); if (!d->rd) return sa_sd; + return sa_rootdomain; } @@ -1047,12 +1044,14 @@ static void claim_allocations(int cpu, struct sched_domain *sd) } #ifdef CONFIG_NUMA -static int sched_domains_numa_levels; enum numa_topology_type sched_numa_topology_type; -static int *sched_domains_numa_distance; -int sched_max_numa_distance; -static struct cpumask ***sched_domains_numa_masks; -static int sched_domains_curr_level; + +static int sched_domains_numa_levels; +static int sched_domains_curr_level; + +int sched_max_numa_distance; +static int *sched_domains_numa_distance; +static struct cpumask ***sched_domains_numa_masks; #endif /* @@ -1074,11 +1073,11 @@ static int sched_domains_curr_level; * SD_ASYM_PACKING - describes SMT quirks */ #define TOPOLOGY_SD_FLAGS \ - (SD_SHARE_CPUCAPACITY | \ + (SD_SHARE_CPUCAPACITY | \ SD_SHARE_PKG_RESOURCES | \ - SD_NUMA | \ - SD_ASYM_PACKING | \ - SD_ASYM_CPUCAPACITY | \ + SD_NUMA | \ + SD_ASYM_PACKING | \ + SD_ASYM_CPUCAPACITY | \ SD_SHARE_POWERDOMAIN) static struct sched_domain * @@ -1628,7 +1627,7 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve pr_err(" the %s domain not a subset of the %s domain\n", child->name, sd->name); #endif - /* Fixup, ensure @sd has at least @child cpus. */ + /* Fixup, ensure @sd has at least @child CPUs. */ cpumask_or(sched_domain_span(sd), sched_domain_span(sd), sched_domain_span(child)); @@ -1720,6 +1719,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att ret = 0; error: __free_domain_allocs(&d, alloc_state, cpu_map); + return ret; } @@ -1824,6 +1824,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, return 1; tmp = SD_ATTR_INIT; + return !memcmp(cur ? (cur + idx_cur) : &tmp, new ? (new + idx_new) : &tmp, sizeof(struct sched_domain_attr)); @@ -1929,4 +1930,3 @@ match2: mutex_unlock(&sched_domains_mutex); } - diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 929ecb7d6b78..7b2a142ae629 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -107,6 +107,7 @@ static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode, break; } } + return nr_exclusive; } @@ -317,6 +318,7 @@ int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait) spin_unlock(&wq->lock); schedule(); spin_lock(&wq->lock); + return 0; } EXPORT_SYMBOL(do_wait_intr); @@ -333,6 +335,7 @@ int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait) spin_unlock_irq(&wq->lock); schedule(); spin_lock_irq(&wq->lock); + return 0; } EXPORT_SYMBOL(do_wait_intr_irq); @@ -378,6 +381,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i if (ret) list_del_init(&wq_entry->entry); + return ret; } EXPORT_SYMBOL(autoremove_wake_function); diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c index 84cb3acd9260..5293c59163a6 100644 --- a/kernel/sched/wait_bit.c +++ b/kernel/sched/wait_bit.c @@ -29,8 +29,8 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync wait_bit->key.bit_nr != key->bit_nr || test_bit(key->bit_nr, key->flags)) return 0; - else - return autoremove_wake_function(wq_entry, mode, sync, key); + + return autoremove_wake_function(wq_entry, mode, sync, key); } EXPORT_SYMBOL(wake_bit_function); @@ -50,7 +50,9 @@ __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_ if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + finish_wait(wq_head, &wbq_entry->wq_entry); + return ret; } EXPORT_SYMBOL(__wait_on_bit); @@ -73,6 +75,7 @@ int __sched out_of_line_wait_on_bit_timeout( DEFINE_WAIT_BIT(wq_entry, word, bit); wq_entry.key.timeout = jiffies + timeout; + return __wait_on_bit(wq_head, &wq_entry, action, mode); } EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout); @@ -120,6 +123,7 @@ EXPORT_SYMBOL(out_of_line_wait_on_bit_lock); void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit) { struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit); + if (waitqueue_active(wq_head)) __wake_up(wq_head, TASK_NORMAL, 1, &key); } @@ -157,6 +161,7 @@ static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p) { if (BITS_PER_LONG == 64) { unsigned long q = (unsigned long)p; + return bit_waitqueue((void *)(q & ~1), q & 1); } return bit_waitqueue(p, 0); @@ -173,6 +178,7 @@ static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mo wait_bit->key.bit_nr != key->bit_nr || atomic_read(val) != 0) return 0; + return autoremove_wake_function(wq_entry, mode, sync, key); } @@ -196,6 +202,7 @@ int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_en ret = (*action)(val, mode); } while (!ret && atomic_read(val) != 0); finish_wait(wq_head, &wbq_entry->wq_entry); + return ret; } @@ -226,6 +233,7 @@ __sched int atomic_t_wait(atomic_t *counter, unsigned int mode) schedule(); if (signal_pending_state(mode, current)) return -EINTR; + return 0; } EXPORT_SYMBOL(atomic_t_wait); @@ -250,6 +258,7 @@ __sched int bit_wait(struct wait_bit_key *word, int mode) schedule(); if (signal_pending_state(mode, current)) return -EINTR; + return 0; } EXPORT_SYMBOL(bit_wait); @@ -259,6 +268,7 @@ __sched int bit_wait_io(struct wait_bit_key *word, int mode) io_schedule(); if (signal_pending_state(mode, current)) return -EINTR; + return 0; } EXPORT_SYMBOL(bit_wait_io); @@ -266,11 +276,13 @@ EXPORT_SYMBOL(bit_wait_io); __sched int bit_wait_timeout(struct wait_bit_key *word, int mode) { unsigned long now = READ_ONCE(jiffies); + if (time_after_eq(now, word->timeout)) return -EAGAIN; schedule_timeout(word->timeout - now); if (signal_pending_state(mode, current)) return -EINTR; + return 0; } EXPORT_SYMBOL_GPL(bit_wait_timeout); @@ -278,11 +290,13 @@ EXPORT_SYMBOL_GPL(bit_wait_timeout); __sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode) { unsigned long now = READ_ONCE(jiffies); + if (time_after_eq(now, word->timeout)) return -EAGAIN; io_schedule_timeout(word->timeout - now); if (signal_pending_state(mode, current)) return -EINTR; + return 0; } EXPORT_SYMBOL_GPL(bit_wait_io_timeout); |