summaryrefslogtreecommitdiff
path: root/include/linux/rseq.h
blob: 2266f4dc77b6c5ef9ec33859405e935f72ffb544 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
#ifndef _LINUX_RSEQ_H
#define _LINUX_RSEQ_H

#ifdef CONFIG_RSEQ
#include <linux/sched.h>

#include <uapi/linux/rseq.h>

void __rseq_handle_slowpath(struct pt_regs *regs);

/* Invoked from resume_user_mode_work() */
static inline void rseq_handle_slowpath(struct pt_regs *regs)
{
	if (IS_ENABLED(CONFIG_GENERIC_ENTRY)) {
		if (current->rseq.event.slowpath)
			__rseq_handle_slowpath(regs);
	} else {
		/* '&' is intentional to spare one conditional branch */
		if (current->rseq.event.sched_switch & current->rseq.event.has_rseq)
			__rseq_handle_slowpath(regs);
	}
}

void __rseq_signal_deliver(int sig, struct pt_regs *regs);

/*
 * Invoked from signal delivery to fixup based on the register context before
 * switching to the signal delivery context.
 */
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
{
	if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
		/* '&' is intentional to spare one conditional branch */
		if (current->rseq.event.has_rseq & current->rseq.event.user_irq)
			__rseq_signal_deliver(ksig->sig, regs);
	} else {
		if (current->rseq.event.has_rseq)
			__rseq_signal_deliver(ksig->sig, regs);
	}
}

static inline void rseq_raise_notify_resume(struct task_struct *t)
{
	set_tsk_thread_flag(t, TIF_RSEQ);
}

/* Invoked from context switch to force evaluation on exit to user */
static __always_inline void rseq_sched_switch_event(struct task_struct *t)
{
	struct rseq_event *ev = &t->rseq.event;

	if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
		/*
		 * Avoid a boat load of conditionals by using simple logic
		 * to determine whether NOTIFY_RESUME needs to be raised.
		 *
		 * It's required when the CPU or MM CID has changed or
		 * the entry was from user space.
		 */
		bool raise = (ev->user_irq | ev->ids_changed) & ev->has_rseq;

		if (raise) {
			ev->sched_switch = true;
			rseq_raise_notify_resume(t);
		}
	} else {
		if (ev->has_rseq) {
			t->rseq.event.sched_switch = true;
			rseq_raise_notify_resume(t);
		}
	}
}

/*
 * Invoked from __set_task_cpu() when a task migrates or from
 * mm_cid_schedin() when the CID changes to enforce an IDs update.
 *
 * This does not raise TIF_NOTIFY_RESUME as that happens in
 * rseq_sched_switch_event().
 */
static __always_inline void rseq_sched_set_ids_changed(struct task_struct *t)
{
	t->rseq.event.ids_changed = true;
}

/* Enforce a full update after RSEQ registration and when execve() failed */
static inline void rseq_force_update(void)
{
	if (current->rseq.event.has_rseq) {
		current->rseq.event.ids_changed = true;
		current->rseq.event.sched_switch = true;
		rseq_raise_notify_resume(current);
	}
}

/*
 * KVM/HYPERV invoke resume_user_mode_work() before entering guest mode,
 * which clears TIF_NOTIFY_RESUME on architectures that don't use the
 * generic TIF bits and therefore can't provide a separate TIF_RSEQ flag.
 *
 * To avoid updating user space RSEQ in that case just to do it eventually
 * again before returning to user space, because __rseq_handle_slowpath()
 * does nothing when invoked with NULL register state.
 *
 * After returning from guest mode, before exiting to userspace, hypervisors
 * must invoke this function to re-raise TIF_NOTIFY_RESUME if necessary.
 */
static inline void rseq_virt_userspace_exit(void)
{
	/*
	 * The generic optimization for deferring RSEQ updates until the next
	 * exit relies on having a dedicated TIF_RSEQ.
	 */
	if (!IS_ENABLED(CONFIG_HAVE_GENERIC_TIF_BITS) &&
	    current->rseq.event.sched_switch)
		rseq_raise_notify_resume(current);
}

static inline void rseq_reset(struct task_struct *t)
{
	memset(&t->rseq, 0, sizeof(t->rseq));
	t->rseq.ids.cpu_id = RSEQ_CPU_ID_UNINITIALIZED;
}

static inline void rseq_execve(struct task_struct *t)
{
	rseq_reset(t);
}

/*
 * If parent process has a registered restartable sequences area, the
 * child inherits. Unregister rseq for a clone with CLONE_VM set.
 *
 * On fork, keep the IDs (CPU, MMCID) of the parent, which avoids a fault
 * on the COW page on exit to user space, when the child stays on the same
 * CPU as the parent. That's obviously not guaranteed, but in overcommit
 * scenarios it is more likely and optimizes for the fork/exec case without
 * taking the fault.
 */
static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
{
	if (clone_flags & CLONE_VM)
		rseq_reset(t);
	else
		t->rseq = current->rseq;
}

#else /* CONFIG_RSEQ */
static inline void rseq_handle_slowpath(struct pt_regs *regs) { }
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
static inline void rseq_sched_switch_event(struct task_struct *t) { }
static inline void rseq_sched_set_ids_changed(struct task_struct *t) { }
static inline void rseq_force_update(void) { }
static inline void rseq_virt_userspace_exit(void) { }
static inline void rseq_fork(struct task_struct *t, u64 clone_flags) { }
static inline void rseq_execve(struct task_struct *t) { }
#endif  /* !CONFIG_RSEQ */

#ifdef CONFIG_DEBUG_RSEQ
void rseq_syscall(struct pt_regs *regs);
#else /* CONFIG_DEBUG_RSEQ */
static inline void rseq_syscall(struct pt_regs *regs) { }
#endif /* !CONFIG_DEBUG_RSEQ */

#endif /* _LINUX_RSEQ_H */