summaryrefslogtreecommitdiff
path: root/include/linux/sched.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r--include/linux/sched.h119
1 files changed, 94 insertions, 25 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7c6a77da8b10..af7179f8572c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -95,7 +95,9 @@ struct task_group;
#define TASK_WAKING 0x0200
#define TASK_NOLOAD 0x0400
#define TASK_NEW 0x0800
-#define TASK_STATE_MAX 0x1000
+/* RT specific auxilliary flag to mark RT lock waiters */
+#define TASK_RTLOCK_WAIT 0x1000
+#define TASK_STATE_MAX 0x2000
/* Convenience macros for the sake of set_current_state: */
#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
@@ -121,8 +123,6 @@ struct task_group;
#define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0)
-#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-
/*
* Special states are those that do not use the normal wait-loop pattern. See
* the comment with set_special_state().
@@ -130,30 +130,37 @@ struct task_group;
#define is_special_task_state(state) \
((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD))
-#define __set_current_state(state_value) \
- do { \
- WARN_ON_ONCE(is_special_task_state(state_value));\
- current->task_state_change = _THIS_IP_; \
- WRITE_ONCE(current->__state, (state_value)); \
- } while (0)
-
-#define set_current_state(state_value) \
- do { \
- WARN_ON_ONCE(is_special_task_state(state_value));\
- current->task_state_change = _THIS_IP_; \
- smp_store_mb(current->__state, (state_value)); \
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+# define debug_normal_state_change(state_value) \
+ do { \
+ WARN_ON_ONCE(is_special_task_state(state_value)); \
+ current->task_state_change = _THIS_IP_; \
} while (0)
-#define set_special_state(state_value) \
+# define debug_special_state_change(state_value) \
do { \
- unsigned long flags; /* may shadow */ \
WARN_ON_ONCE(!is_special_task_state(state_value)); \
- raw_spin_lock_irqsave(&current->pi_lock, flags); \
current->task_state_change = _THIS_IP_; \
- WRITE_ONCE(current->__state, (state_value)); \
- raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
} while (0)
+
+# define debug_rtlock_wait_set_state() \
+ do { \
+ current->saved_state_change = current->task_state_change;\
+ current->task_state_change = _THIS_IP_; \
+ } while (0)
+
+# define debug_rtlock_wait_restore_state() \
+ do { \
+ current->task_state_change = current->saved_state_change;\
+ } while (0)
+
#else
+# define debug_normal_state_change(cond) do { } while (0)
+# define debug_special_state_change(cond) do { } while (0)
+# define debug_rtlock_wait_set_state() do { } while (0)
+# define debug_rtlock_wait_restore_state() do { } while (0)
+#endif
+
/*
* set_current_state() includes a barrier so that the write of current->state
* is correctly serialised wrt the caller's subsequent test of whether to
@@ -192,26 +199,77 @@ struct task_group;
* Also see the comments of try_to_wake_up().
*/
#define __set_current_state(state_value) \
- WRITE_ONCE(current->__state, (state_value))
+ do { \
+ debug_normal_state_change((state_value)); \
+ WRITE_ONCE(current->__state, (state_value)); \
+ } while (0)
#define set_current_state(state_value) \
- smp_store_mb(current->__state, (state_value))
+ do { \
+ debug_normal_state_change((state_value)); \
+ smp_store_mb(current->__state, (state_value)); \
+ } while (0)
/*
* set_special_state() should be used for those states when the blocking task
* can not use the regular condition based wait-loop. In that case we must
- * serialize against wakeups such that any possible in-flight TASK_RUNNING stores
- * will not collide with our state change.
+ * serialize against wakeups such that any possible in-flight TASK_RUNNING
+ * stores will not collide with our state change.
*/
#define set_special_state(state_value) \
do { \
unsigned long flags; /* may shadow */ \
+ \
raw_spin_lock_irqsave(&current->pi_lock, flags); \
+ debug_special_state_change((state_value)); \
WRITE_ONCE(current->__state, (state_value)); \
raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
} while (0)
-#endif
+/*
+ * PREEMPT_RT specific variants for "sleeping" spin/rwlocks
+ *
+ * RT's spin/rwlock substitutions are state preserving. The state of the
+ * task when blocking on the lock is saved in task_struct::saved_state and
+ * restored after the lock has been acquired. These operations are
+ * serialized by task_struct::pi_lock against try_to_wake_up(). Any non RT
+ * lock related wakeups while the task is blocked on the lock are
+ * redirected to operate on task_struct::saved_state to ensure that these
+ * are not dropped. On restore task_struct::saved_state is set to
+ * TASK_RUNNING so any wakeup attempt redirected to saved_state will fail.
+ *
+ * The lock operation looks like this:
+ *
+ * current_save_and_set_rtlock_wait_state();
+ * for (;;) {
+ * if (try_lock())
+ * break;
+ * raw_spin_unlock_irq(&lock->wait_lock);
+ * schedule_rtlock();
+ * raw_spin_lock_irq(&lock->wait_lock);
+ * set_current_state(TASK_RTLOCK_WAIT);
+ * }
+ * current_restore_rtlock_saved_state();
+ */
+#define current_save_and_set_rtlock_wait_state() \
+ do { \
+ lockdep_assert_irqs_disabled(); \
+ raw_spin_lock(&current->pi_lock); \
+ current->saved_state = current->__state; \
+ debug_rtlock_wait_set_state(); \
+ WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \
+ raw_spin_unlock(&current->pi_lock); \
+ } while (0);
+
+#define current_restore_rtlock_saved_state() \
+ do { \
+ lockdep_assert_irqs_disabled(); \
+ raw_spin_lock(&current->pi_lock); \
+ debug_rtlock_wait_restore_state(); \
+ WRITE_ONCE(current->__state, current->saved_state); \
+ current->saved_state = TASK_RUNNING; \
+ raw_spin_unlock(&current->pi_lock); \
+ } while (0);
#define get_current_state() READ_ONCE(current->__state)
@@ -230,6 +288,9 @@ extern long schedule_timeout_idle(long timeout);
asmlinkage void schedule(void);
extern void schedule_preempt_disabled(void);
asmlinkage void preempt_schedule_irq(void);
+#ifdef CONFIG_PREEMPT_RT
+ extern void schedule_rtlock(void);
+#endif
extern int __must_check io_schedule_prepare(void);
extern void io_schedule_finish(int token);
@@ -668,6 +729,11 @@ struct task_struct {
#endif
unsigned int __state;
+#ifdef CONFIG_PREEMPT_RT
+ /* saved state for "spinlock sleepers" */
+ unsigned int saved_state;
+#endif
+
/*
* This begins the randomizable portion of task_struct. Only
* scheduling-critical items should be added above here.
@@ -1362,6 +1428,9 @@ struct task_struct {
struct kmap_ctrl kmap_ctrl;
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
unsigned long task_state_change;
+# ifdef CONFIG_PREEMPT_RT
+ unsigned long saved_state_change;
+# endif
#endif
int pagefault_disabled;
#ifdef CONFIG_MMU