summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/energy_model.h4
-rw-r--r--include/linux/sched.h91
-rw-r--r--include/linux/sched/topology.h26
-rw-r--r--include/linux/wait_bit.h4
4 files changed, 85 insertions, 40 deletions
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index e7497f804644..c909a8ba22e8 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -248,7 +248,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
struct em_perf_state *ps;
int i;
- WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n");
+ lockdep_assert(rcu_read_lock_any_held());
if (!sum_util)
return 0;
@@ -267,7 +267,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
* Find the lowest performance state of the Energy Model above the
* requested performance.
*/
- em_table = rcu_dereference(pd->em_table);
+ em_table = rcu_dereference_all(pd->em_table);
i = em_pd_get_efficient_state(em_table->state, pd, max_util);
ps = &em_table->state[i];
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 54820a427014..004e6d56a499 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1239,6 +1239,7 @@ struct task_struct {
#endif
struct mutex *blocked_on; /* lock we're blocked on */
+ raw_spinlock_t blocked_lock;
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
/*
@@ -2180,61 +2181,85 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock) __must_hold(lock);
})
#ifndef CONFIG_PREEMPT_RT
+
+/*
+ * With proxy exec, if a task has been proxy-migrated, it may be a donor
+ * on a cpu that it can't actually run on. Thus we need a special state
+ * to denote that the task is being woken, but that it needs to be
+ * evaluated for return-migration before it is run. So if the task is
+ * blocked_on PROXY_WAKING, return migrate it before running it.
+ */
+#define PROXY_WAKING ((struct mutex *)(-1L))
+
static inline struct mutex *__get_task_blocked_on(struct task_struct *p)
{
- struct mutex *m = p->blocked_on;
-
- if (m)
- lockdep_assert_held_once(&m->wait_lock);
- return m;
+ lockdep_assert_held_once(&p->blocked_lock);
+ return p->blocked_on == PROXY_WAKING ? NULL : p->blocked_on;
}
static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m)
{
- struct mutex *blocked_on = READ_ONCE(p->blocked_on);
-
WARN_ON_ONCE(!m);
/* The task should only be setting itself as blocked */
WARN_ON_ONCE(p != current);
- /* Currently we serialize blocked_on under the mutex::wait_lock */
- lockdep_assert_held_once(&m->wait_lock);
+ /* Currently we serialize blocked_on under the task::blocked_lock */
+ lockdep_assert_held_once(&p->blocked_lock);
/*
* Check ensure we don't overwrite existing mutex value
* with a different mutex. Note, setting it to the same
* lock repeatedly is ok.
*/
- WARN_ON_ONCE(blocked_on && blocked_on != m);
- WRITE_ONCE(p->blocked_on, m);
+ WARN_ON_ONCE(p->blocked_on && p->blocked_on != m);
+ p->blocked_on = m;
}
-static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m)
+static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m)
{
- guard(raw_spinlock_irqsave)(&m->wait_lock);
- __set_task_blocked_on(p, m);
+ /* Currently we serialize blocked_on under the task::blocked_lock */
+ lockdep_assert_held_once(&p->blocked_lock);
+ /*
+ * There may be cases where we re-clear already cleared
+ * blocked_on relationships, but make sure we are not
+ * clearing the relationship with a different lock.
+ */
+ WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m && p->blocked_on != PROXY_WAKING);
+ p->blocked_on = NULL;
}
-static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m)
+static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m)
{
- if (m) {
- struct mutex *blocked_on = READ_ONCE(p->blocked_on);
+ guard(raw_spinlock_irqsave)(&p->blocked_lock);
+ __clear_task_blocked_on(p, m);
+}
- /* Currently we serialize blocked_on under the mutex::wait_lock */
- lockdep_assert_held_once(&m->wait_lock);
- /*
- * There may be cases where we re-clear already cleared
- * blocked_on relationships, but make sure we are not
- * clearing the relationship with a different lock.
- */
- WARN_ON_ONCE(blocked_on && blocked_on != m);
+static inline void __set_task_blocked_on_waking(struct task_struct *p, struct mutex *m)
+{
+ /* Currently we serialize blocked_on under the task::blocked_lock */
+ lockdep_assert_held_once(&p->blocked_lock);
+
+ if (!sched_proxy_exec()) {
+ __clear_task_blocked_on(p, m);
+ return;
}
- WRITE_ONCE(p->blocked_on, NULL);
+
+ /* Don't set PROXY_WAKING if blocked_on was already cleared */
+ if (!p->blocked_on)
+ return;
+ /*
+ * There may be cases where we set PROXY_WAKING on tasks that were
+ * already set to waking, but make sure we are not changing
+ * the relationship with a different lock.
+ */
+ WARN_ON_ONCE(m && p->blocked_on != m && p->blocked_on != PROXY_WAKING);
+ p->blocked_on = PROXY_WAKING;
}
-static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m)
+static inline void set_task_blocked_on_waking(struct task_struct *p, struct mutex *m)
{
- guard(raw_spinlock_irqsave)(&m->wait_lock);
- __clear_task_blocked_on(p, m);
+ guard(raw_spinlock_irqsave)(&p->blocked_lock);
+ __set_task_blocked_on_waking(p, m);
}
+
#else
static inline void __clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m)
{
@@ -2243,6 +2268,14 @@ static inline void __clear_task_blocked_on(struct task_struct *p, struct rt_mute
static inline void clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m)
{
}
+
+static inline void __set_task_blocked_on_waking(struct task_struct *p, struct rt_mutex *m)
+{
+}
+
+static inline void set_task_blocked_on_waking(struct task_struct *p, struct rt_mutex *m)
+{
+}
#endif /* !CONFIG_PREEMPT_RT */
static __always_inline bool need_resched(void)
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 45c0022b91ce..36553e14866d 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -95,6 +95,7 @@ struct sched_domain {
unsigned int newidle_call;
unsigned int newidle_success;
unsigned int newidle_ratio;
+ u64 newidle_stamp;
u64 max_newidle_lb_cost;
unsigned long last_decay_max_lb_cost;
@@ -141,18 +142,30 @@ struct sched_domain {
unsigned int span_weight;
/*
- * Span of all CPUs in this domain.
+ * See sched_domain_span(), on why flex arrays are broken.
*
- * NOTE: this field is variable length. (Allocated dynamically
- * by attaching extra space to the end of the structure,
- * depending on how many CPUs the kernel has booted up with)
- */
unsigned long span[];
+ */
};
static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
{
- return to_cpumask(sd->span);
+ /*
+ * Turns out that C flexible arrays are fundamentally broken since it
+ * is allowed for offsetof(*sd, span) < sizeof(*sd), this means that
+ * structure initialzation *sd = { ... }; which writes every byte
+ * inside sizeof(*type), will over-write the start of the flexible
+ * array.
+ *
+ * Luckily, the way we allocate sched_domain is by:
+ *
+ * sizeof(*sd) + cpumask_size()
+ *
+ * this means that we have sufficient space for the whole flex array
+ * *outside* of sizeof(*sd). So use that, and avoid using sd->span.
+ */
+ unsigned long *bitmap = (void *)sd + sizeof(*sd);
+ return to_cpumask(bitmap);
}
extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
@@ -171,7 +184,6 @@ typedef int (*sched_domain_flags_f)(void);
struct sd_data {
struct sched_domain *__percpu *sd;
- struct sched_domain_shared *__percpu *sds;
struct sched_group *__percpu *sg;
struct sched_group_capacity *__percpu *sgc;
};
diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 9e29d79fc790..ace7379d627d 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -406,7 +406,7 @@ do { \
schedule())
/**
- * wait_var_event_killable - wait for a variable to be updated and notified
+ * wait_var_event_interruptible - wait for a variable to be updated and notified
* @var: the address of variable being waited on
* @condition: the condition to wait for
*
@@ -492,7 +492,7 @@ do { \
* wait_var_event_mutex - wait for a variable to be updated under a mutex
* @var: the address of the variable being waited on
* @condition: condition to wait for
- * @mutex: the mutex which protects updates to the variable
+ * @lock: the mutex which protects updates to the variable
*
* Wait for a condition which can only be reliably tested while holding
* a mutex. The variables assessed in the condition will normal be