diff options
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/energy_model.h | 4 | ||||
| -rw-r--r-- | include/linux/sched.h | 91 | ||||
| -rw-r--r-- | include/linux/sched/topology.h | 26 | ||||
| -rw-r--r-- | include/linux/wait_bit.h | 4 |
4 files changed, 85 insertions, 40 deletions
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index e7497f804644..c909a8ba22e8 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -248,7 +248,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, struct em_perf_state *ps; int i; - WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n"); + lockdep_assert(rcu_read_lock_any_held()); if (!sum_util) return 0; @@ -267,7 +267,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * Find the lowest performance state of the Energy Model above the * requested performance. */ - em_table = rcu_dereference(pd->em_table); + em_table = rcu_dereference_all(pd->em_table); i = em_pd_get_efficient_state(em_table->state, pd, max_util); ps = &em_table->state[i]; diff --git a/include/linux/sched.h b/include/linux/sched.h index 54820a427014..004e6d56a499 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1239,6 +1239,7 @@ struct task_struct { #endif struct mutex *blocked_on; /* lock we're blocked on */ + raw_spinlock_t blocked_lock; #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER /* @@ -2180,61 +2181,85 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock) __must_hold(lock); }) #ifndef CONFIG_PREEMPT_RT + +/* + * With proxy exec, if a task has been proxy-migrated, it may be a donor + * on a cpu that it can't actually run on. Thus we need a special state + * to denote that the task is being woken, but that it needs to be + * evaluated for return-migration before it is run. So if the task is + * blocked_on PROXY_WAKING, return migrate it before running it. + */ +#define PROXY_WAKING ((struct mutex *)(-1L)) + static inline struct mutex *__get_task_blocked_on(struct task_struct *p) { - struct mutex *m = p->blocked_on; - - if (m) - lockdep_assert_held_once(&m->wait_lock); - return m; + lockdep_assert_held_once(&p->blocked_lock); + return p->blocked_on == PROXY_WAKING ? NULL : p->blocked_on; } static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m) { - struct mutex *blocked_on = READ_ONCE(p->blocked_on); - WARN_ON_ONCE(!m); /* The task should only be setting itself as blocked */ WARN_ON_ONCE(p != current); - /* Currently we serialize blocked_on under the mutex::wait_lock */ - lockdep_assert_held_once(&m->wait_lock); + /* Currently we serialize blocked_on under the task::blocked_lock */ + lockdep_assert_held_once(&p->blocked_lock); /* * Check ensure we don't overwrite existing mutex value * with a different mutex. Note, setting it to the same * lock repeatedly is ok. */ - WARN_ON_ONCE(blocked_on && blocked_on != m); - WRITE_ONCE(p->blocked_on, m); + WARN_ON_ONCE(p->blocked_on && p->blocked_on != m); + p->blocked_on = m; } -static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m) +static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m) { - guard(raw_spinlock_irqsave)(&m->wait_lock); - __set_task_blocked_on(p, m); + /* Currently we serialize blocked_on under the task::blocked_lock */ + lockdep_assert_held_once(&p->blocked_lock); + /* + * There may be cases where we re-clear already cleared + * blocked_on relationships, but make sure we are not + * clearing the relationship with a different lock. + */ + WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m && p->blocked_on != PROXY_WAKING); + p->blocked_on = NULL; } -static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m) +static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m) { - if (m) { - struct mutex *blocked_on = READ_ONCE(p->blocked_on); + guard(raw_spinlock_irqsave)(&p->blocked_lock); + __clear_task_blocked_on(p, m); +} - /* Currently we serialize blocked_on under the mutex::wait_lock */ - lockdep_assert_held_once(&m->wait_lock); - /* - * There may be cases where we re-clear already cleared - * blocked_on relationships, but make sure we are not - * clearing the relationship with a different lock. - */ - WARN_ON_ONCE(blocked_on && blocked_on != m); +static inline void __set_task_blocked_on_waking(struct task_struct *p, struct mutex *m) +{ + /* Currently we serialize blocked_on under the task::blocked_lock */ + lockdep_assert_held_once(&p->blocked_lock); + + if (!sched_proxy_exec()) { + __clear_task_blocked_on(p, m); + return; } - WRITE_ONCE(p->blocked_on, NULL); + + /* Don't set PROXY_WAKING if blocked_on was already cleared */ + if (!p->blocked_on) + return; + /* + * There may be cases where we set PROXY_WAKING on tasks that were + * already set to waking, but make sure we are not changing + * the relationship with a different lock. + */ + WARN_ON_ONCE(m && p->blocked_on != m && p->blocked_on != PROXY_WAKING); + p->blocked_on = PROXY_WAKING; } -static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m) +static inline void set_task_blocked_on_waking(struct task_struct *p, struct mutex *m) { - guard(raw_spinlock_irqsave)(&m->wait_lock); - __clear_task_blocked_on(p, m); + guard(raw_spinlock_irqsave)(&p->blocked_lock); + __set_task_blocked_on_waking(p, m); } + #else static inline void __clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m) { @@ -2243,6 +2268,14 @@ static inline void __clear_task_blocked_on(struct task_struct *p, struct rt_mute static inline void clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m) { } + +static inline void __set_task_blocked_on_waking(struct task_struct *p, struct rt_mutex *m) +{ +} + +static inline void set_task_blocked_on_waking(struct task_struct *p, struct rt_mutex *m) +{ +} #endif /* !CONFIG_PREEMPT_RT */ static __always_inline bool need_resched(void) diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 45c0022b91ce..36553e14866d 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -95,6 +95,7 @@ struct sched_domain { unsigned int newidle_call; unsigned int newidle_success; unsigned int newidle_ratio; + u64 newidle_stamp; u64 max_newidle_lb_cost; unsigned long last_decay_max_lb_cost; @@ -141,18 +142,30 @@ struct sched_domain { unsigned int span_weight; /* - * Span of all CPUs in this domain. + * See sched_domain_span(), on why flex arrays are broken. * - * NOTE: this field is variable length. (Allocated dynamically - * by attaching extra space to the end of the structure, - * depending on how many CPUs the kernel has booted up with) - */ unsigned long span[]; + */ }; static inline struct cpumask *sched_domain_span(struct sched_domain *sd) { - return to_cpumask(sd->span); + /* + * Turns out that C flexible arrays are fundamentally broken since it + * is allowed for offsetof(*sd, span) < sizeof(*sd), this means that + * structure initialzation *sd = { ... }; which writes every byte + * inside sizeof(*type), will over-write the start of the flexible + * array. + * + * Luckily, the way we allocate sched_domain is by: + * + * sizeof(*sd) + cpumask_size() + * + * this means that we have sufficient space for the whole flex array + * *outside* of sizeof(*sd). So use that, and avoid using sd->span. + */ + unsigned long *bitmap = (void *)sd + sizeof(*sd); + return to_cpumask(bitmap); } extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], @@ -171,7 +184,6 @@ typedef int (*sched_domain_flags_f)(void); struct sd_data { struct sched_domain *__percpu *sd; - struct sched_domain_shared *__percpu *sds; struct sched_group *__percpu *sg; struct sched_group_capacity *__percpu *sgc; }; diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 9e29d79fc790..ace7379d627d 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -406,7 +406,7 @@ do { \ schedule()) /** - * wait_var_event_killable - wait for a variable to be updated and notified + * wait_var_event_interruptible - wait for a variable to be updated and notified * @var: the address of variable being waited on * @condition: the condition to wait for * @@ -492,7 +492,7 @@ do { \ * wait_var_event_mutex - wait for a variable to be updated under a mutex * @var: the address of the variable being waited on * @condition: condition to wait for - * @mutex: the mutex which protects updates to the variable + * @lock: the mutex which protects updates to the variable * * Wait for a condition which can only be reliably tested while holding * a mutex. The variables assessed in the condition will normal be |
