x86: Add magic AMD return-thunk

Note: needs to be in a section distinct from Retpolines such that the Retpoline RET substitution cannot possibly use immediate jumps. ORC unwinding for zen_untrain_ret() and __x86_return_thunk() is a little tricky but works due to the fact that zen_untrain_ret() doesn't have any stack ops and as such will emit a single ORC entry at the start (+0x3f). Meanwhile, unwinding an IP, including the __x86_return_thunk() one (+0x40) will search for the largest ORC entry smaller or equal to the IP, these will find the one ORC entry (+0x3f) and all works. [ Alexandre: SVM part. ] [ bp: Build fix, massages. ] Suggested-by: Andrew Cooper <Andrew.Cooper3@citrix.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> Signed-off-by: Borislav Petkov <bp@suse.de>
author: Peter Zijlstra <peterz@infradead.org> 2022-06-15 00:15:48 +0300
committer: Borislav Petkov <bp@suse.de> 2022-06-27 11:33:59 +0300
commit: a149180fbcf336e97ce4eb2cdc13672727feb94d (patch)
tree: 465e0b2ebf7a147d686abed224a732907c475c1b /arch/x86/lib
parent: 951ddecf435659553ed15a9214e153a3af43a9a1 (diff)
download: linux-a149180fbcf336e97ce4eb2cdc13672727feb94d.tar.xz
1 files changed, 60 insertions, 4 deletions
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 4467c21215f4..fdd16163b996 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -72,11 +72,67 @@ SYM_CODE_END(__x86_indirect_thunk_array)
  * This function name is magical and is used by -mfunction-return=thunk-extern
  * for the compiler to generate JMPs to it.
  */
-SYM_CODE_START(__x86_return_thunk)
-	UNWIND_HINT_EMPTY
-	ANNOTATE_NOENDBR
+	.section .text.__x86.return_thunk
+
+/*
+ * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
+ * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
+ *    alignment within the BTB.
+ * 2) The instruction at zen_untrain_ret must contain, and not
+ *    end with, the 0xc3 byte of the RET.
+ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
+ *    from re-poisioning the BTB prediction.
+ */
+	.align 64
+	.skip 63, 0xcc
+SYM_FUNC_START_NOALIGN(zen_untrain_ret);
+
+	/*
+	 * As executed from zen_untrain_ret, this is:
+	 *
+	 *   TEST $0xcc, %bl
+	 *   LFENCE
+	 *   JMP __x86_return_thunk
+	 *
+	 * Executing the TEST instruction has a side effect of evicting any BTB
+	 * prediction (potentially attacker controlled) attached to the RET, as
+	 * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
+	 */
+	.byte	0xf6
+
+	/*
+	 * As executed from __x86_return_thunk, this is a plain RET.
+	 *
+	 * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
+	 *
+	 * We subsequently jump backwards and architecturally execute the RET.
+	 * This creates a correct BTB prediction (type=ret), but in the
+	 * meantime we suffer Straight Line Speculation (because the type was
+	 * no branch) which is halted by the INT3.
+	 *
+	 * With SMT enabled and STIBP active, a sibling thread cannot poison
+	 * RET's prediction to a type of its choice, but can evict the
+	 * prediction due to competitive sharing. If the prediction is
+	 * evicted, __x86_return_thunk will suffer Straight Line Speculation
+	 * which will be contained safely by the INT3.
+	 */
+SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL)
 	ret
 	int3
 SYM_CODE_END(__x86_return_thunk)
 
-__EXPORT_THUNK(__x86_return_thunk)
+	/*
+	 * Ensure the TEST decoding / BTB invalidation is complete.
+	 */
+	lfence
+
+	/*
+	 * Jump back and execute the RET in the middle of the TEST instruction.
+	 * INT3 is for SLS protection.
+	 */
+	jmp __x86_return_thunk
+	int3
+SYM_FUNC_END(zen_untrain_ret)
+__EXPORT_THUNK(zen_untrain_ret)
+
+EXPORT_SYMBOL(__x86_return_thunk)
author	Peter Zijlstra <peterz@infradead.org>	2022-06-15 00:15:48 +0300
committer	Borislav Petkov <bp@suse.de>	2022-06-27 11:33:59 +0300
commit	a149180fbcf336e97ce4eb2cdc13672727feb94d (patch)
tree	465e0b2ebf7a147d686abed224a732907c475c1b /arch/x86/lib
parent	951ddecf435659553ed15a9214e153a3af43a9a1 (diff)
download	linux-a149180fbcf336e97ce4eb2cdc13672727feb94d.tar.xz