diff options
-rw-r--r-- | include/linux/sched.h | 66 | ||||
-rw-r--r-- | kernel/sched/core.c | 33 |
2 files changed, 99 insertions, 0 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 4c72cf6aaabf..02714b9a3ff9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -143,9 +143,22 @@ struct task_group; current->task_state_change = _THIS_IP_; \ } while (0) +# define debug_rtlock_wait_set_state() \ + do { \ + current->saved_state_change = current->task_state_change;\ + current->task_state_change = _THIS_IP_; \ + } while (0) + +# define debug_rtlock_wait_restore_state() \ + do { \ + current->task_state_change = current->saved_state_change;\ + } while (0) + #else # define debug_normal_state_change(cond) do { } while (0) # define debug_special_state_change(cond) do { } while (0) +# define debug_rtlock_wait_set_state() do { } while (0) +# define debug_rtlock_wait_restore_state() do { } while (0) #endif /* @@ -213,6 +226,51 @@ struct task_group; raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ } while (0) +/* + * PREEMPT_RT specific variants for "sleeping" spin/rwlocks + * + * RT's spin/rwlock substitutions are state preserving. The state of the + * task when blocking on the lock is saved in task_struct::saved_state and + * restored after the lock has been acquired. These operations are + * serialized by task_struct::pi_lock against try_to_wake_up(). Any non RT + * lock related wakeups while the task is blocked on the lock are + * redirected to operate on task_struct::saved_state to ensure that these + * are not dropped. On restore task_struct::saved_state is set to + * TASK_RUNNING so any wakeup attempt redirected to saved_state will fail. + * + * The lock operation looks like this: + * + * current_save_and_set_rtlock_wait_state(); + * for (;;) { + * if (try_lock()) + * break; + * raw_spin_unlock_irq(&lock->wait_lock); + * schedule_rtlock(); + * raw_spin_lock_irq(&lock->wait_lock); + * set_current_state(TASK_RTLOCK_WAIT); + * } + * current_restore_rtlock_saved_state(); + */ +#define current_save_and_set_rtlock_wait_state() \ + do { \ + lockdep_assert_irqs_disabled(); \ + raw_spin_lock(¤t->pi_lock); \ + current->saved_state = current->__state; \ + debug_rtlock_wait_set_state(); \ + WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \ + raw_spin_unlock(¤t->pi_lock); \ + } while (0); + +#define current_restore_rtlock_saved_state() \ + do { \ + lockdep_assert_irqs_disabled(); \ + raw_spin_lock(¤t->pi_lock); \ + debug_rtlock_wait_restore_state(); \ + WRITE_ONCE(current->__state, current->saved_state); \ + current->saved_state = TASK_RUNNING; \ + raw_spin_unlock(¤t->pi_lock); \ + } while (0); + #define get_current_state() READ_ONCE(current->__state) /* Task command name length: */ @@ -668,6 +726,11 @@ struct task_struct { #endif unsigned int __state; +#ifdef CONFIG_PREEMPT_RT + /* saved state for "spinlock sleepers" */ + unsigned int saved_state; +#endif + /* * This begins the randomizable portion of task_struct. Only * scheduling-critical items should be added above here. @@ -1357,6 +1420,9 @@ struct task_struct { struct kmap_ctrl kmap_ctrl; #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; +# ifdef CONFIG_PREEMPT_RT + unsigned long saved_state_change; +# endif #endif int pagefault_disabled; #ifdef CONFIG_MMU diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 961991e06337..e407c6ac4a26 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3566,14 +3566,47 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) * * The caller holds p::pi_lock if p != current or has preemption * disabled when p == current. + * + * The rules of PREEMPT_RT saved_state: + * + * The related locking code always holds p::pi_lock when updating + * p::saved_state, which means the code is fully serialized in both cases. + * + * The lock wait and lock wakeups happen via TASK_RTLOCK_WAIT. No other + * bits set. This allows to distinguish all wakeup scenarios. */ static __always_inline bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success) { + if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) { + WARN_ON_ONCE((state & TASK_RTLOCK_WAIT) && + state != TASK_RTLOCK_WAIT); + } + if (READ_ONCE(p->__state) & state) { *success = 1; return true; } + +#ifdef CONFIG_PREEMPT_RT + /* + * Saved state preserves the task state across blocking on + * an RT lock. If the state matches, set p::saved_state to + * TASK_RUNNING, but do not wake the task because it waits + * for a lock wakeup. Also indicate success because from + * the regular waker's point of view this has succeeded. + * + * After acquiring the lock the task will restore p::__state + * from p::saved_state which ensures that the regular + * wakeup is not lost. The restore will also set + * p::saved_state to TASK_RUNNING so any further tests will + * not result in false positives vs. @success + */ + if (p->saved_state & state) { + p->saved_state = TASK_RUNNING; + *success = 1; + } +#endif return false; } |