diff options
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r-- | include/linux/sched.h | 119 |
1 files changed, 94 insertions, 25 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 7c6a77da8b10..af7179f8572c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -95,7 +95,9 @@ struct task_group; #define TASK_WAKING 0x0200 #define TASK_NOLOAD 0x0400 #define TASK_NEW 0x0800 -#define TASK_STATE_MAX 0x1000 +/* RT specific auxilliary flag to mark RT lock waiters */ +#define TASK_RTLOCK_WAIT 0x1000 +#define TASK_STATE_MAX 0x2000 /* Convenience macros for the sake of set_current_state: */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) @@ -121,8 +123,6 @@ struct task_group; #define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0) -#ifdef CONFIG_DEBUG_ATOMIC_SLEEP - /* * Special states are those that do not use the normal wait-loop pattern. See * the comment with set_special_state(). @@ -130,30 +130,37 @@ struct task_group; #define is_special_task_state(state) \ ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD)) -#define __set_current_state(state_value) \ - do { \ - WARN_ON_ONCE(is_special_task_state(state_value));\ - current->task_state_change = _THIS_IP_; \ - WRITE_ONCE(current->__state, (state_value)); \ - } while (0) - -#define set_current_state(state_value) \ - do { \ - WARN_ON_ONCE(is_special_task_state(state_value));\ - current->task_state_change = _THIS_IP_; \ - smp_store_mb(current->__state, (state_value)); \ +#ifdef CONFIG_DEBUG_ATOMIC_SLEEP +# define debug_normal_state_change(state_value) \ + do { \ + WARN_ON_ONCE(is_special_task_state(state_value)); \ + current->task_state_change = _THIS_IP_; \ } while (0) -#define set_special_state(state_value) \ +# define debug_special_state_change(state_value) \ do { \ - unsigned long flags; /* may shadow */ \ WARN_ON_ONCE(!is_special_task_state(state_value)); \ - raw_spin_lock_irqsave(¤t->pi_lock, flags); \ current->task_state_change = _THIS_IP_; \ - WRITE_ONCE(current->__state, (state_value)); \ - raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ } while (0) + +# define debug_rtlock_wait_set_state() \ + do { \ + current->saved_state_change = current->task_state_change;\ + current->task_state_change = _THIS_IP_; \ + } while (0) + +# define debug_rtlock_wait_restore_state() \ + do { \ + current->task_state_change = current->saved_state_change;\ + } while (0) + #else +# define debug_normal_state_change(cond) do { } while (0) +# define debug_special_state_change(cond) do { } while (0) +# define debug_rtlock_wait_set_state() do { } while (0) +# define debug_rtlock_wait_restore_state() do { } while (0) +#endif + /* * set_current_state() includes a barrier so that the write of current->state * is correctly serialised wrt the caller's subsequent test of whether to @@ -192,26 +199,77 @@ struct task_group; * Also see the comments of try_to_wake_up(). */ #define __set_current_state(state_value) \ - WRITE_ONCE(current->__state, (state_value)) + do { \ + debug_normal_state_change((state_value)); \ + WRITE_ONCE(current->__state, (state_value)); \ + } while (0) #define set_current_state(state_value) \ - smp_store_mb(current->__state, (state_value)) + do { \ + debug_normal_state_change((state_value)); \ + smp_store_mb(current->__state, (state_value)); \ + } while (0) /* * set_special_state() should be used for those states when the blocking task * can not use the regular condition based wait-loop. In that case we must - * serialize against wakeups such that any possible in-flight TASK_RUNNING stores - * will not collide with our state change. + * serialize against wakeups such that any possible in-flight TASK_RUNNING + * stores will not collide with our state change. */ #define set_special_state(state_value) \ do { \ unsigned long flags; /* may shadow */ \ + \ raw_spin_lock_irqsave(¤t->pi_lock, flags); \ + debug_special_state_change((state_value)); \ WRITE_ONCE(current->__state, (state_value)); \ raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ } while (0) -#endif +/* + * PREEMPT_RT specific variants for "sleeping" spin/rwlocks + * + * RT's spin/rwlock substitutions are state preserving. The state of the + * task when blocking on the lock is saved in task_struct::saved_state and + * restored after the lock has been acquired. These operations are + * serialized by task_struct::pi_lock against try_to_wake_up(). Any non RT + * lock related wakeups while the task is blocked on the lock are + * redirected to operate on task_struct::saved_state to ensure that these + * are not dropped. On restore task_struct::saved_state is set to + * TASK_RUNNING so any wakeup attempt redirected to saved_state will fail. + * + * The lock operation looks like this: + * + * current_save_and_set_rtlock_wait_state(); + * for (;;) { + * if (try_lock()) + * break; + * raw_spin_unlock_irq(&lock->wait_lock); + * schedule_rtlock(); + * raw_spin_lock_irq(&lock->wait_lock); + * set_current_state(TASK_RTLOCK_WAIT); + * } + * current_restore_rtlock_saved_state(); + */ +#define current_save_and_set_rtlock_wait_state() \ + do { \ + lockdep_assert_irqs_disabled(); \ + raw_spin_lock(¤t->pi_lock); \ + current->saved_state = current->__state; \ + debug_rtlock_wait_set_state(); \ + WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \ + raw_spin_unlock(¤t->pi_lock); \ + } while (0); + +#define current_restore_rtlock_saved_state() \ + do { \ + lockdep_assert_irqs_disabled(); \ + raw_spin_lock(¤t->pi_lock); \ + debug_rtlock_wait_restore_state(); \ + WRITE_ONCE(current->__state, current->saved_state); \ + current->saved_state = TASK_RUNNING; \ + raw_spin_unlock(¤t->pi_lock); \ + } while (0); #define get_current_state() READ_ONCE(current->__state) @@ -230,6 +288,9 @@ extern long schedule_timeout_idle(long timeout); asmlinkage void schedule(void); extern void schedule_preempt_disabled(void); asmlinkage void preempt_schedule_irq(void); +#ifdef CONFIG_PREEMPT_RT + extern void schedule_rtlock(void); +#endif extern int __must_check io_schedule_prepare(void); extern void io_schedule_finish(int token); @@ -668,6 +729,11 @@ struct task_struct { #endif unsigned int __state; +#ifdef CONFIG_PREEMPT_RT + /* saved state for "spinlock sleepers" */ + unsigned int saved_state; +#endif + /* * This begins the randomizable portion of task_struct. Only * scheduling-critical items should be added above here. @@ -1362,6 +1428,9 @@ struct task_struct { struct kmap_ctrl kmap_ctrl; #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; +# ifdef CONFIG_PREEMPT_RT + unsigned long saved_state_change; +# endif #endif int pagefault_disabled; #ifdef CONFIG_MMU |