From b746046238bb99b8f703c79f6d95357428fb6476 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Apr 2020 10:15:51 +0200 Subject: objtool: Better handle IRET Teach objtool a little more about IRET so that we can avoid using the SAVE/RESTORE annotation. In particular, make the weird corner case in insn->restore go away. The purpose of that corner case is to deal with the fact that UNWIND_HINT_RESTORE lands on the instruction after IRET, but that instruction can end up being outside the basic block, consider: if (cond) sync_core() foo(); Then the hint will land on foo(), and we'll encounter the restore hint without ever having seen the save hint. By teaching objtool about the arch specific exception frame size, and assuming that any IRET in an STT_FUNC symbol is an exception frame sized POP, we can remove the use of save/restore hints for this code. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Reviewed-by: Alexandre Chartre Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200416115118.631224674@infradead.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 3bcf27caf6c9..3eeaaeb75638 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -727,7 +727,6 @@ static inline void sync_core(void) unsigned int tmp; asm volatile ( - UNWIND_HINT_SAVE "mov %%ss, %0\n\t" "pushq %q0\n\t" "pushq %%rsp\n\t" @@ -737,7 +736,6 @@ static inline void sync_core(void) "pushq %q0\n\t" "pushq $1f\n\t" "iretq\n\t" - UNWIND_HINT_RESTORE "1:" : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory"); #endif -- cgit v1.2.3 From e25eea89bb8853763a22fa2547199cf96b571ba1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Apr 2020 16:38:19 +0200 Subject: objtool: Introduce HINT_RET_OFFSET Normally objtool ensures a function keeps the stack layout invariant. But there is a useful exception, it is possible to stuff the return stack in order to 'inject' a 'call': push $fun ret In this case the invariant mentioned above is violated. Add an objtool HINT to annotate this and allow a function exit with a modified stack frame. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Reviewed-by: Alexandre Chartre Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200416115118.690601403@infradead.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/orc_types.h | 1 + arch/x86/include/asm/unwind_hints.h | 10 ++++++++++ tools/arch/x86/include/asm/orc_types.h | 1 + tools/objtool/check.c | 24 ++++++++++++++++-------- tools/objtool/check.h | 4 +++- 5 files changed, 31 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h index 6e060907c163..5f18ca7ac51a 100644 --- a/arch/x86/include/asm/orc_types.h +++ b/arch/x86/include/asm/orc_types.h @@ -60,6 +60,7 @@ #define ORC_TYPE_REGS_IRET 2 #define UNWIND_HINT_TYPE_SAVE 3 #define UNWIND_HINT_TYPE_RESTORE 4 +#define UNWIND_HINT_TYPE_RET_OFFSET 5 #ifndef __ASSEMBLY__ /* diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index f5e2eb12cb71..aabf7ace0476 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -94,6 +94,16 @@ UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE .endm + +/* + * RET_OFFSET: Used on instructions that terminate a function; mostly RETURN + * and sibling calls. On these, sp_offset denotes the expected offset from + * initial_func_cfi. + */ +.macro UNWIND_HINT_RET_OFFSET sp_offset=8 + UNWIND_HINT type=UNWIND_HINT_TYPE_RET_OFFSET sp_offset=\sp_offset +.endm + #else /* !__ASSEMBLY__ */ #define UNWIND_HINT(sp_reg, sp_offset, type, end) \ diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h index 6e060907c163..5f18ca7ac51a 100644 --- a/tools/arch/x86/include/asm/orc_types.h +++ b/tools/arch/x86/include/asm/orc_types.h @@ -60,6 +60,7 @@ #define ORC_TYPE_REGS_IRET 2 #define UNWIND_HINT_TYPE_SAVE 3 #define UNWIND_HINT_TYPE_RESTORE 4 +#define UNWIND_HINT_TYPE_RET_OFFSET 5 #ifndef __ASSEMBLY__ /* diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 781b3a3c2ba6..93c88ac51f0f 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1261,6 +1261,9 @@ static int read_unwind_hints(struct objtool_file *file) } else if (hint->type == UNWIND_HINT_TYPE_RESTORE) { insn->restore = true; insn->hint = true; + + } else if (hint->type == UNWIND_HINT_TYPE_RET_OFFSET) { + insn->ret_offset = hint->sp_offset; continue; } @@ -1424,20 +1427,25 @@ static bool is_fentry_call(struct instruction *insn) return false; } -static bool has_modified_stack_frame(struct insn_state *state) +static bool has_modified_stack_frame(struct instruction *insn, struct insn_state *state) { + u8 ret_offset = insn->ret_offset; int i; - if (state->cfa.base != initial_func_cfi.cfa.base || - state->cfa.offset != initial_func_cfi.cfa.offset || - state->stack_size != initial_func_cfi.cfa.offset || - state->drap) + if (state->cfa.base != initial_func_cfi.cfa.base || state->drap) + return true; + + if (state->cfa.offset != initial_func_cfi.cfa.offset + ret_offset) return true; - for (i = 0; i < CFI_NUM_REGS; i++) + if (state->stack_size != initial_func_cfi.cfa.offset + ret_offset) + return true; + + for (i = 0; i < CFI_NUM_REGS; i++) { if (state->regs[i].base != initial_func_cfi.regs[i].base || state->regs[i].offset != initial_func_cfi.regs[i].offset) return true; + } return false; } @@ -2014,7 +2022,7 @@ static int validate_call(struct instruction *insn, struct insn_state *state) static int validate_sibling_call(struct instruction *insn, struct insn_state *state) { - if (has_modified_stack_frame(state)) { + if (has_modified_stack_frame(insn, state)) { WARN_FUNC("sibling call from callable instruction with modified stack frame", insn->sec, insn->offset); return 1; @@ -2043,7 +2051,7 @@ static int validate_return(struct symbol *func, struct instruction *insn, struct return 1; } - if (func && has_modified_stack_frame(state)) { + if (func && has_modified_stack_frame(insn, state)) { WARN_FUNC("return with modified stack frame", insn->sec, insn->offset); return 1; diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 2c55f7591443..81ce27e62c04 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -33,9 +33,11 @@ struct instruction { unsigned int len; enum insn_type type; unsigned long immediate; - bool alt_group, dead_end, ignore, hint, save, restore, ignore_alts; + bool alt_group, dead_end, ignore, ignore_alts; + bool hint, save, restore; bool retpoline_safe; u8 visited; + u8 ret_offset; struct symbol *call_dest; struct instruction *jump_dest; struct instruction *first_jump_src; -- cgit v1.2.3 From 0298739b7983cf9bf4fcfb4bfb815c539bdb87ca Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Apr 2020 16:53:19 +0200 Subject: x86,ftrace: Fix ftrace_regs_caller() unwind The ftrace_regs_caller() trampoline does something 'funny' when there is a direct-caller present. In that case it stuffs the 'direct-caller' address on the return stack and then exits the function. This then results in 'returning' to the direct-caller with the exact registers we came in with -- an indirect tail-call without using a register. This however (rightfully) confuses objtool because the function shares a few instruction in order to have a single exit path, but the stack layout is different for them, depending through which path we came there. This is currently cludged by forcing the stack state to the non-direct case, but this generates actively wrong (ORC) unwind information for the direct case, leading to potential broken unwinds. Fix this issue by fully separating the exit paths. This results in having to poke a second RET into the trampoline copy, see ftrace_regs_caller_ret. This brings us to a second objtool problem, in order for it to perceive the 'jmp ftrace_epilogue' as a function exit, it needs to be recognised as a tail call. In order to make that happen, ftrace_epilogue needs to be the start of an STT_FUNC, so re-arrange code to make this so. Finally, a third issue is that objtool requires functions to exit with the same stack layout they started with, which is obviously violated in the direct case, employ the new HINT_RET_OFFSET to tell objtool this is an expected exception. Together, this results in generating correct ORC unwind information for the ftrace_regs_caller() function and it's trampoline copies. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Reviewed-by: Alexandre Chartre Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200416115118.749606694@infradead.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 12 ++++++++++-- arch/x86/kernel/ftrace_64.S | 32 +++++++++++++++----------------- 2 files changed, 25 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 37a0aeaf89e7..867c126ddabe 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -282,7 +282,8 @@ static inline void tramp_free(void *tramp) { } /* Defined as markers to the end of the ftrace default trampolines */ extern void ftrace_regs_caller_end(void); -extern void ftrace_epilogue(void); +extern void ftrace_regs_caller_ret(void); +extern void ftrace_caller_end(void); extern void ftrace_caller_op_ptr(void); extern void ftrace_regs_caller_op_ptr(void); @@ -334,7 +335,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) call_offset = (unsigned long)ftrace_regs_call; } else { start_offset = (unsigned long)ftrace_caller; - end_offset = (unsigned long)ftrace_epilogue; + end_offset = (unsigned long)ftrace_caller_end; op_offset = (unsigned long)ftrace_caller_op_ptr; call_offset = (unsigned long)ftrace_call; } @@ -366,6 +367,13 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) if (WARN_ON(ret < 0)) goto fail; + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { + ip = trampoline + (ftrace_regs_caller_ret - ftrace_regs_caller); + ret = probe_kernel_read(ip, (void *)retq, RET_SIZE); + if (WARN_ON(ret < 0)) + goto fail; + } + /* * The address of the ftrace_ops that is used for this trampoline * is stored at the end of the trampoline. This will be used to diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 369e61faacfe..7657dc782828 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -157,8 +157,12 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) * think twice before adding any new code or changing the * layout here. */ -SYM_INNER_LABEL(ftrace_epilogue, SYM_L_GLOBAL) +SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL) + jmp ftrace_epilogue +SYM_FUNC_END(ftrace_caller); + +SYM_FUNC_START(ftrace_epilogue) #ifdef CONFIG_FUNCTION_GRAPH_TRACER SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) jmp ftrace_stub @@ -170,14 +174,12 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) */ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK) retq -SYM_FUNC_END(ftrace_caller) +SYM_FUNC_END(ftrace_epilogue) SYM_FUNC_START(ftrace_regs_caller) /* Save the current flags before any operations that can change them */ pushfq - UNWIND_HINT_SAVE - /* added 8 bytes to save flags */ save_mcount_regs 8 /* save_mcount_regs fills in first two parameters */ @@ -233,7 +235,10 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) movq ORIG_RAX(%rsp), %rax movq %rax, MCOUNT_REG_SIZE-8(%rsp) - /* If ORIG_RAX is anything but zero, make this a call to that */ + /* + * If ORIG_RAX is anything but zero, make this a call to that. + * See arch_ftrace_set_direct_caller(). + */ movq ORIG_RAX(%rsp), %rax cmpq $0, %rax je 1f @@ -244,20 +249,14 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) movq %rax, MCOUNT_REG_SIZE(%rsp) restore_mcount_regs 8 + /* Restore flags */ + popfq - jmp 2f +SYM_INNER_LABEL(ftrace_regs_caller_ret, SYM_L_GLOBAL); + UNWIND_HINT_RET_OFFSET + jmp ftrace_epilogue 1: restore_mcount_regs - - -2: - /* - * The stack layout is nondetermistic here, depending on which path was - * taken. This confuses objtool and ORC, rightfully so. For now, - * pretend the stack always looks like the non-direct case. - */ - UNWIND_HINT_RESTORE - /* Restore flags */ popfq @@ -268,7 +267,6 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) * to the return. */ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) - jmp ftrace_epilogue SYM_FUNC_END(ftrace_regs_caller) -- cgit v1.2.3 From dc2745b61907cf6faeb72cc25f2cc4b38d4a3cac Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Apr 2020 16:50:40 +0200 Subject: x86,ftrace: Use SIZEOF_PTREGS There's a convenient macro for 'SS+8' called FRAME_SIZE. Use it to clarify things. (entry/calling.h calls this SIZEOF_PTREGS but we're using asm/ptrace-abi.h) Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Reviewed-by: Alexandre Chartre Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200416115118.808485515@infradead.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 7657dc782828..be9aff20dd5f 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -23,7 +23,7 @@ #endif /* CONFIG_FRAME_POINTER */ /* Size of stack used to save mcount regs in save_mcount_regs */ -#define MCOUNT_REG_SIZE (SS+8 + MCOUNT_FRAME_SIZE) +#define MCOUNT_REG_SIZE (FRAME_SIZE + MCOUNT_FRAME_SIZE) /* * gcc -pg option adds a call to 'mcount' in most functions. @@ -77,7 +77,7 @@ /* * We add enough stack to save all regs. */ - subq $(MCOUNT_REG_SIZE - MCOUNT_FRAME_SIZE), %rsp + subq $(FRAME_SIZE), %rsp movq %rax, RAX(%rsp) movq %rcx, RCX(%rsp) movq %rdx, RDX(%rsp) -- cgit v1.2.3 From 9f2dfd61dd022d4559d42a832fb03e76aad36c5f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Apr 2020 16:51:11 +0200 Subject: x86,ftrace: Shrink ftrace_regs_caller() by one byte 'Optimize' ftrace_regs_caller. Instead of comparing against an immediate, the more natural way to test for zero on x86 is: 'test %r,%r'. 48 83 f8 00 cmp $0x0,%rax 74 49 je 226 48 85 c0 test %rax,%rax 74 49 je 225 Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Reviewed-by: Alexandre Chartre Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200416115118.867411350@infradead.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index be9aff20dd5f..9738ed23964e 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -240,8 +240,8 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) * See arch_ftrace_set_direct_caller(). */ movq ORIG_RAX(%rsp), %rax - cmpq $0, %rax - je 1f + testq %rax, %rax + jz 1f /* Swap the flags with orig_rax */ movq MCOUNT_REG_SIZE(%rsp), %rdi -- cgit v1.2.3 From c536ed2fffd5dbf81fe2dede8ef294e0cbb08f72 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Apr 2020 16:54:26 +0200 Subject: objtool: Remove SAVE/RESTORE hints The SAVE/RESTORE hints are now unused; remove them. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Reviewed-by: Alexandre Chartre Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200416115118.926738768@infradead.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/orc_types.h | 4 +--- arch/x86/include/asm/unwind_hints.h | 27 ---------------------- tools/arch/x86/include/asm/orc_types.h | 4 +--- tools/objtool/check.c | 42 +++------------------------------- tools/objtool/check.h | 2 +- 5 files changed, 6 insertions(+), 73 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h index 5f18ca7ac51a..d25534940bde 100644 --- a/arch/x86/include/asm/orc_types.h +++ b/arch/x86/include/asm/orc_types.h @@ -58,9 +58,7 @@ #define ORC_TYPE_CALL 0 #define ORC_TYPE_REGS 1 #define ORC_TYPE_REGS_IRET 2 -#define UNWIND_HINT_TYPE_SAVE 3 -#define UNWIND_HINT_TYPE_RESTORE 4 -#define UNWIND_HINT_TYPE_RET_OFFSET 5 +#define UNWIND_HINT_TYPE_RET_OFFSET 3 #ifndef __ASSEMBLY__ /* diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index aabf7ace0476..7d903fdb3f43 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -86,15 +86,6 @@ UNWIND_HINT sp_offset=\sp_offset .endm -.macro UNWIND_HINT_SAVE - UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE -.endm - -.macro UNWIND_HINT_RESTORE - UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE -.endm - - /* * RET_OFFSET: Used on instructions that terminate a function; mostly RETURN * and sibling calls. On these, sp_offset denotes the expected offset from @@ -104,24 +95,6 @@ UNWIND_HINT type=UNWIND_HINT_TYPE_RET_OFFSET sp_offset=\sp_offset .endm -#else /* !__ASSEMBLY__ */ - -#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ - "987: \n\t" \ - ".pushsection .discard.unwind_hints\n\t" \ - /* struct unwind_hint */ \ - ".long 987b - .\n\t" \ - ".short " __stringify(sp_offset) "\n\t" \ - ".byte " __stringify(sp_reg) "\n\t" \ - ".byte " __stringify(type) "\n\t" \ - ".byte " __stringify(end) "\n\t" \ - ".balign 4 \n\t" \ - ".popsection\n\t" - -#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE, 0) - -#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE, 0) - #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_UNWIND_HINTS_H */ diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h index 5f18ca7ac51a..d25534940bde 100644 --- a/tools/arch/x86/include/asm/orc_types.h +++ b/tools/arch/x86/include/asm/orc_types.h @@ -58,9 +58,7 @@ #define ORC_TYPE_CALL 0 #define ORC_TYPE_REGS 1 #define ORC_TYPE_REGS_IRET 2 -#define UNWIND_HINT_TYPE_SAVE 3 -#define UNWIND_HINT_TYPE_RESTORE 4 -#define UNWIND_HINT_TYPE_RET_OFFSET 5 +#define UNWIND_HINT_TYPE_RET_OFFSET 3 #ifndef __ASSEMBLY__ /* diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 93c88ac51f0f..464f10c0a5ac 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1254,15 +1254,7 @@ static int read_unwind_hints(struct objtool_file *file) cfa = &insn->state.cfa; - if (hint->type == UNWIND_HINT_TYPE_SAVE) { - insn->save = true; - continue; - - } else if (hint->type == UNWIND_HINT_TYPE_RESTORE) { - insn->restore = true; - insn->hint = true; - - } else if (hint->type == UNWIND_HINT_TYPE_RET_OFFSET) { + if (hint->type == UNWIND_HINT_TYPE_RET_OFFSET) { insn->ret_offset = hint->sp_offset; continue; } @@ -2113,37 +2105,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 0; } - if (insn->hint) { - if (insn->restore) { - struct instruction *save_insn, *i; - - i = insn; - save_insn = NULL; - sym_for_each_insn_continue_reverse(file, func, i) { - if (i->save) { - save_insn = i; - break; - } - } - - if (!save_insn) { - WARN_FUNC("no corresponding CFI save for CFI restore", - sec, insn->offset); - return 1; - } - - if (!save_insn->visited) { - WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo", - sec, insn->offset); - return 1; - } - - insn->state = save_insn->state; - } - + if (insn->hint) state = insn->state; - - } else + else insn->state = state; insn->visited |= visited; diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 81ce27e62c04..7c30760bee74 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -34,7 +34,7 @@ struct instruction { enum insn_type type; unsigned long immediate; bool alt_group, dead_end, ignore, ignore_alts; - bool hint, save, restore; + bool hint; bool retpoline_safe; u8 visited; u8 ret_offset; -- cgit v1.2.3 From 1ff865e343c2b59469d7e41d370a980a3f972c71 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 28 Apr 2020 19:57:59 +0200 Subject: x86,smap: Fix smap_{save,restore}() alternatives As reported by objtool: lib/ubsan.o: warning: objtool: .altinstr_replacement+0x0: alternative modifies stack lib/ubsan.o: warning: objtool: .altinstr_replacement+0x7: alternative modifies stack the smap_{save,restore}() alternatives violate (the newly enforced) rule on stack invariance. That is, due to there only being a single ORC table it must be valid to any alternative. These alternatives violate this with the direct result that unwinds will not be correct when it hits between the PUSH and POP instructions. Rewrite the functions to only have a conditional jump. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200429101802.GI13592@hirez.programming.kicks-ass.net --- arch/x86/include/asm/smap.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index 27c47d183f4b..8b58d6975d5d 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -57,8 +57,10 @@ static __always_inline unsigned long smap_save(void) { unsigned long flags; - asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC, - X86_FEATURE_SMAP) + asm volatile ("# smap_save\n\t" + ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP) + "pushf; pop %0; " __ASM_CLAC "\n\t" + "1:" : "=rm" (flags) : : "memory", "cc"); return flags; @@ -66,7 +68,10 @@ static __always_inline unsigned long smap_save(void) static __always_inline void smap_restore(unsigned long flags) { - asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP) + asm volatile ("# smap_restore\n\t" + ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP) + "push %0; popf\n\t" + "1:" : : "g" (flags) : "memory", "cc"); } -- cgit v1.2.3 From 089dd8e53126ebaf506e2dc0bf89d652c36bfc12 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Apr 2020 12:36:16 +0200 Subject: x86/speculation: Change FILL_RETURN_BUFFER to work with objtool Change FILL_RETURN_BUFFER so that objtool groks it and can generate correct ORC unwind information. - Since ORC is alternative invariant; that is, all alternatives should have the same ORC entries, the __FILL_RETURN_BUFFER body can not be part of an alternative. Therefore, move it out of the alternative and keep the alternative as a sort of jump_label around it. - Use the ANNOTATE_INTRA_FUNCTION_CALL annotation to white-list these 'funny' call instructions to nowhere. - Use UNWIND_HINT_EMPTY to 'fill' the speculation traps, otherwise objtool will consider them unreachable. - Move the RSP adjustment into the loop, such that the loop has a deterministic stack layout. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Alexandre Chartre Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200428191700.032079304@infradead.org --- arch/x86/include/asm/nospec-branch.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 7e9a281e2660..b8890e18f624 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -4,11 +4,13 @@ #define _ASM_X86_NOSPEC_BRANCH_H_ #include +#include #include #include #include #include +#include /* * This should be used immediately before a retpoline alternative. It tells @@ -46,21 +48,25 @@ #define __FILL_RETURN_BUFFER(reg, nr, sp) \ mov $(nr/2), reg; \ 771: \ + ANNOTATE_INTRA_FUNCTION_CALL; \ call 772f; \ 773: /* speculation trap */ \ + UNWIND_HINT_EMPTY; \ pause; \ lfence; \ jmp 773b; \ 772: \ + ANNOTATE_INTRA_FUNCTION_CALL; \ call 774f; \ 775: /* speculation trap */ \ + UNWIND_HINT_EMPTY; \ pause; \ lfence; \ jmp 775b; \ 774: \ + add $(BITS_PER_LONG/8) * 2, sp; \ dec reg; \ - jnz 771b; \ - add $(BITS_PER_LONG/8) * nr, sp; + jnz 771b; #ifdef __ASSEMBLY__ @@ -137,10 +143,8 @@ */ .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req #ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE "jmp .Lskip_rsb_\@", \ - __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ - \ftr + ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr + __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) .Lskip_rsb_\@: #endif .endm -- cgit v1.2.3 From ca3f0d80dd57c8828bfb5bc0bc79750ea7a1ba26 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Apr 2020 17:03:22 +0200 Subject: x86: Simplify retpoline declaration Because of how KSYM works, we need one declaration per line. Seeing how we're going to be doubling the amount of retpoline symbols, simplify the machinery in order to avoid having to copy/paste even more. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200428191700.091696925@infradead.org --- arch/x86/include/asm/GEN-for-each-reg.h | 25 ++++++++++++++++++++++ arch/x86/include/asm/asm-prototypes.h | 28 +++++++------------------ arch/x86/lib/retpoline.S | 37 ++++++++++++++------------------- 3 files changed, 49 insertions(+), 41 deletions(-) create mode 100644 arch/x86/include/asm/GEN-for-each-reg.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/GEN-for-each-reg.h b/arch/x86/include/asm/GEN-for-each-reg.h new file mode 100644 index 000000000000..1b07fb102c4e --- /dev/null +++ b/arch/x86/include/asm/GEN-for-each-reg.h @@ -0,0 +1,25 @@ +#ifdef CONFIG_64BIT +GEN(rax) +GEN(rbx) +GEN(rcx) +GEN(rdx) +GEN(rsi) +GEN(rdi) +GEN(rbp) +GEN(r8) +GEN(r9) +GEN(r10) +GEN(r11) +GEN(r12) +GEN(r13) +GEN(r14) +GEN(r15) +#else +GEN(eax) +GEN(ebx) +GEN(ecx) +GEN(edx) +GEN(esi) +GEN(edi) +GEN(ebp) +#endif diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index ce92c4acc913..aa7585e1fe67 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -17,24 +17,12 @@ extern void cmpxchg8b_emu(void); #endif #ifdef CONFIG_RETPOLINE -#ifdef CONFIG_X86_32 -#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void); -#else -#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void); -INDIRECT_THUNK(8) -INDIRECT_THUNK(9) -INDIRECT_THUNK(10) -INDIRECT_THUNK(11) -INDIRECT_THUNK(12) -INDIRECT_THUNK(13) -INDIRECT_THUNK(14) -INDIRECT_THUNK(15) -#endif -INDIRECT_THUNK(ax) -INDIRECT_THUNK(bx) -INDIRECT_THUNK(cx) -INDIRECT_THUNK(dx) -INDIRECT_THUNK(si) -INDIRECT_THUNK(di) -INDIRECT_THUNK(bp) + +#define DECL_INDIRECT_THUNK(reg) \ + extern asmlinkage void __x86_indirect_thunk_ ## reg (void); + +#undef GEN +#define GEN(reg) DECL_INDIRECT_THUNK(reg) +#include + #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 363ec132df7e..9cc5480300c9 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -24,25 +24,20 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg) * only see one instance of "__x86_indirect_thunk_\reg" rather * than one per register with the correct names. So we do it * the simple and nasty way... + * + * Worse, you can only have a single EXPORT_SYMBOL per line, + * and CPP can't insert newlines, so we have to repeat everything + * at least twice. */ -#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) -#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) -#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg) - -GENERATE_THUNK(_ASM_AX) -GENERATE_THUNK(_ASM_BX) -GENERATE_THUNK(_ASM_CX) -GENERATE_THUNK(_ASM_DX) -GENERATE_THUNK(_ASM_SI) -GENERATE_THUNK(_ASM_DI) -GENERATE_THUNK(_ASM_BP) -#ifdef CONFIG_64BIT -GENERATE_THUNK(r8) -GENERATE_THUNK(r9) -GENERATE_THUNK(r10) -GENERATE_THUNK(r11) -GENERATE_THUNK(r12) -GENERATE_THUNK(r13) -GENERATE_THUNK(r14) -GENERATE_THUNK(r15) -#endif + +#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) +#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) + +#undef GEN +#define GEN(reg) THUNK reg +#include + +#undef GEN +#define GEN(reg) EXPORT_THUNK(reg) +#include + -- cgit v1.2.3 From 34fdce6981b96920ced4e0ee56e9db3fb03a33f0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Apr 2020 17:16:40 +0200 Subject: x86: Change {JMP,CALL}_NOSPEC argument In order to change the {JMP,CALL}_NOSPEC macros to call out-of-line versions of the retpoline magic, we need to remove the '%' from the argument, such that we can paste it onto symbol names. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200428191700.151623523@infradead.org --- arch/x86/crypto/aesni-intel_asm.S | 4 ++-- arch/x86/crypto/camellia-aesni-avx-asm_64.S | 2 +- arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 2 +- arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 26 +++++++++++++------------- arch/x86/entry/entry_32.S | 6 +++--- arch/x86/entry/entry_64.S | 2 +- arch/x86/include/asm/nospec-branch.h | 16 ++++++++-------- arch/x86/kernel/ftrace_32.S | 2 +- arch/x86/kernel/ftrace_64.S | 4 ++-- arch/x86/lib/checksum_32.S | 4 ++-- arch/x86/platform/efi/efi_stub_64.S | 2 +- 11 files changed, 35 insertions(+), 35 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index cad6e1bfa7d5..54e7d15dbd0d 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -2758,7 +2758,7 @@ SYM_FUNC_START(aesni_xts_crypt8) pxor INC, STATE4 movdqu IV, 0x30(OUTP) - CALL_NOSPEC %r11 + CALL_NOSPEC r11 movdqu 0x00(OUTP), INC pxor INC, STATE1 @@ -2803,7 +2803,7 @@ SYM_FUNC_START(aesni_xts_crypt8) _aesni_gf128mul_x_ble() movups IV, (IVP) - CALL_NOSPEC %r11 + CALL_NOSPEC r11 movdqu 0x40(OUTP), INC pxor INC, STATE1 diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index d01ddd73de65..ecc0a9a905c4 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -1228,7 +1228,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_16way) vpxor 14 * 16(%rax), %xmm15, %xmm14; vpxor 15 * 16(%rax), %xmm15, %xmm15; - CALL_NOSPEC %r9; + CALL_NOSPEC r9; addq $(16 * 16), %rsp; diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 563ef6e83cdd..0907243c501c 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -1339,7 +1339,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_32way) vpxor 14 * 32(%rax), %ymm15, %ymm14; vpxor 15 * 32(%rax), %ymm15, %ymm15; - CALL_NOSPEC %r9; + CALL_NOSPEC r9; addq $(16 * 32), %rsp; diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 0e6690e3618c..8501ec4532f4 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -75,7 +75,7 @@ .text SYM_FUNC_START(crc_pcl) -#define bufp %rdi +#define bufp rdi #define bufp_dw %edi #define bufp_w %di #define bufp_b %dil @@ -105,9 +105,9 @@ SYM_FUNC_START(crc_pcl) ## 1) ALIGN: ################################################################ - mov bufp, bufptmp # rdi = *buf - neg bufp - and $7, bufp # calculate the unalignment amount of + mov %bufp, bufptmp # rdi = *buf + neg %bufp + and $7, %bufp # calculate the unalignment amount of # the address je proc_block # Skip if aligned @@ -123,13 +123,13 @@ SYM_FUNC_START(crc_pcl) do_align: #### Calculate CRC of unaligned bytes of the buffer (if any) movq (bufptmp), tmp # load a quadward from the buffer - add bufp, bufptmp # align buffer pointer for quadword + add %bufp, bufptmp # align buffer pointer for quadword # processing - sub bufp, len # update buffer length + sub %bufp, len # update buffer length align_loop: crc32b %bl, crc_init_dw # compute crc32 of 1-byte shr $8, tmp # get next byte - dec bufp + dec %bufp jne align_loop proc_block: @@ -169,10 +169,10 @@ continue_block: xor crc2, crc2 ## branch into array - lea jump_table(%rip), bufp - movzxw (bufp, %rax, 2), len - lea crc_array(%rip), bufp - lea (bufp, len, 1), bufp + lea jump_table(%rip), %bufp + movzxw (%bufp, %rax, 2), len + lea crc_array(%rip), %bufp + lea (%bufp, len, 1), %bufp JMP_NOSPEC bufp ################################################################ @@ -218,9 +218,9 @@ LABEL crc_ %i ## 4) Combine three results: ################################################################ - lea (K_table-8)(%rip), bufp # first entry is for idx 1 + lea (K_table-8)(%rip), %bufp # first entry is for idx 1 shlq $3, %rax # rax *= 8 - pmovzxdq (bufp,%rax), %xmm0 # 2 consts: K1:K2 + pmovzxdq (%bufp,%rax), %xmm0 # 2 consts: K1:K2 leal (%eax,%eax,2), %eax # rax *= 3 (total *24) subq %rax, tmp # tmp -= rax*24 diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index b67bae7091d7..7e7ffb7a5147 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -816,7 +816,7 @@ SYM_CODE_START(ret_from_fork) /* kernel thread */ 1: movl %edi, %eax - CALL_NOSPEC %ebx + CALL_NOSPEC ebx /* * A kernel thread is allowed to return here after successfully * calling do_execve(). Exit to userspace to complete the execve() @@ -1501,7 +1501,7 @@ SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2) TRACE_IRQS_OFF movl %esp, %eax # pt_regs pointer - CALL_NOSPEC %edi + CALL_NOSPEC edi jmp ret_from_exception SYM_CODE_END(common_exception_read_cr2) @@ -1522,7 +1522,7 @@ SYM_CODE_START_LOCAL_NOALIGN(common_exception) TRACE_IRQS_OFF movl %esp, %eax # pt_regs pointer - CALL_NOSPEC %edi + CALL_NOSPEC edi jmp ret_from_exception SYM_CODE_END(common_exception) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 0e9504fabe52..168b798913bc 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -349,7 +349,7 @@ SYM_CODE_START(ret_from_fork) /* kernel thread */ UNWIND_HINT_EMPTY movq %r12, %rdi - CALL_NOSPEC %rbx + CALL_NOSPEC rbx /* * A kernel thread is allowed to return here after successfully * calling do_execve(). Exit to userspace to complete the execve() diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index b8890e18f624..d3269b69728a 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -118,22 +118,22 @@ .macro JMP_NOSPEC reg:req #ifdef CONFIG_RETPOLINE ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \ - __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ + __stringify(RETPOLINE_JMP %\reg), X86_FEATURE_RETPOLINE,\ + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD #else - jmp *\reg + jmp *%\reg #endif .endm .macro CALL_NOSPEC reg:req #ifdef CONFIG_RETPOLINE ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \ - __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg),\ + __stringify(RETPOLINE_CALL %\reg), X86_FEATURE_RETPOLINE,\ + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD #else - call *\reg + call *%\reg #endif .endm diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S index e8a9f8370112..e405fe1a8bf4 100644 --- a/arch/x86/kernel/ftrace_32.S +++ b/arch/x86/kernel/ftrace_32.S @@ -189,5 +189,5 @@ return_to_handler: movl %eax, %ecx popl %edx popl %eax - JMP_NOSPEC %ecx + JMP_NOSPEC ecx #endif diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 9738ed23964e..aa5d28aeb31e 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -301,7 +301,7 @@ trace: * function tracing is enabled. */ movq ftrace_trace_function, %r8 - CALL_NOSPEC %r8 + CALL_NOSPEC r8 restore_mcount_regs jmp fgraph_trace @@ -338,6 +338,6 @@ SYM_CODE_START(return_to_handler) movq 8(%rsp), %rdx movq (%rsp), %rax addq $24, %rsp - JMP_NOSPEC %rdi + JMP_NOSPEC rdi SYM_CODE_END(return_to_handler) #endif diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index 4742e8fa7ee7..d1d768912368 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -153,7 +153,7 @@ SYM_FUNC_START(csum_partial) negl %ebx lea 45f(%ebx,%ebx,2), %ebx testl %esi, %esi - JMP_NOSPEC %ebx + JMP_NOSPEC ebx # Handle 2-byte-aligned regions 20: addw (%esi), %ax @@ -436,7 +436,7 @@ SYM_FUNC_START(csum_partial_copy_generic) andl $-32,%edx lea 3f(%ebx,%ebx), %ebx testl %esi, %esi - JMP_NOSPEC %ebx + JMP_NOSPEC ebx 1: addl $64,%esi addl $64,%edi SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 15da118f04f0..90380a17ab23 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -21,7 +21,7 @@ SYM_FUNC_START(__efi_call) mov %r8, %r9 mov %rcx, %r8 mov %rsi, %rcx - CALL_NOSPEC %rdi + CALL_NOSPEC rdi leave ret SYM_FUNC_END(__efi_call) -- cgit v1.2.3 From cc1ac9c792810b93783a7de344f428922af8d98c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Apr 2020 14:34:26 +0200 Subject: x86/retpoline: Fix retpoline unwind Currently objtool cannot understand retpolines, and thus cannot generate ORC unwind information for them. This means that we cannot unwind from the middle of a retpoline. The recent ANNOTATE_INTRA_FUNCTION_CALL and UNWIND_HINT_RET_OFFSET support in objtool enables it to understand the basic retpoline construct. A further problem is that the ORC unwind information is alternative invariant; IOW. every alternative should have the same ORC, retpolines obviously violate this. This means we need to out-of-line them. Since all GCC generated code already uses out-of-line retpolines, this should not affect performance much, if anything. This will enable objtool to generate valid ORC data for the out-of-line copies, which means we can correctly and reliably unwind through a retpoline. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20200428191700.210835357@infradead.org --- arch/x86/include/asm/asm-prototypes.h | 7 +++++ arch/x86/include/asm/nospec-branch.h | 56 +++++------------------------------ arch/x86/lib/retpoline.S | 26 ++++++++++++++-- 3 files changed, 38 insertions(+), 51 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index aa7585e1fe67..9bf2620ce817 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -21,8 +21,15 @@ extern void cmpxchg8b_emu(void); #define DECL_INDIRECT_THUNK(reg) \ extern asmlinkage void __x86_indirect_thunk_ ## reg (void); +#define DECL_RETPOLINE(reg) \ + extern asmlinkage void __x86_retpoline_ ## reg (void); + #undef GEN #define GEN(reg) DECL_INDIRECT_THUNK(reg) #include +#undef GEN +#define GEN(reg) DECL_RETPOLINE(reg) +#include + #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index d3269b69728a..d52d1aacdd97 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -12,15 +12,6 @@ #include #include -/* - * This should be used immediately before a retpoline alternative. It tells - * objtool where the retpolines are so that it can make sense of the control - * flow by just reading the original instruction(s) and ignoring the - * alternatives. - */ -#define ANNOTATE_NOSPEC_ALTERNATIVE \ - ANNOTATE_IGNORE_ALTERNATIVE - /* * Fill the CPU return stack buffer. * @@ -82,34 +73,6 @@ .popsection .endm -/* - * These are the bare retpoline primitives for indirect jmp and call. - * Do not use these directly; they only exist to make the ALTERNATIVE - * invocation below less ugly. - */ -.macro RETPOLINE_JMP reg:req - call .Ldo_rop_\@ -.Lspec_trap_\@: - pause - lfence - jmp .Lspec_trap_\@ -.Ldo_rop_\@: - mov \reg, (%_ASM_SP) - ret -.endm - -/* - * This is a wrapper around RETPOLINE_JMP so the called function in reg - * returns to the instruction after the macro. - */ -.macro RETPOLINE_CALL reg:req - jmp .Ldo_call_\@ -.Ldo_retpoline_jmp_\@: - RETPOLINE_JMP \reg -.Ldo_call_\@: - call .Ldo_retpoline_jmp_\@ -.endm - /* * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple * indirect jmp/call which may be susceptible to the Spectre variant 2 @@ -117,10 +80,9 @@ */ .macro JMP_NOSPEC reg:req #ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ - __stringify(RETPOLINE_JMP %\reg), X86_FEATURE_RETPOLINE,\ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ + __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \ + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD #else jmp *%\reg #endif @@ -128,10 +90,9 @@ .macro CALL_NOSPEC reg:req #ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg),\ - __stringify(RETPOLINE_CALL %\reg), X86_FEATURE_RETPOLINE,\ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \ + __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \ + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD #else call *%\reg #endif @@ -165,16 +126,16 @@ * which is ensured when CONFIG_RETPOLINE is defined. */ # define CALL_NOSPEC \ - ANNOTATE_NOSPEC_ALTERNATIVE \ ALTERNATIVE_2( \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ - "call __x86_indirect_thunk_%V[thunk_target]\n", \ + "call __x86_retpoline_%V[thunk_target]\n", \ X86_FEATURE_RETPOLINE, \ "lfence;\n" \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ X86_FEATURE_RETPOLINE_AMD) + # define THUNK_TARGET(addr) [thunk_target] "r" (addr) #else /* CONFIG_X86_32 */ @@ -184,7 +145,6 @@ * here, anyway. */ # define CALL_NOSPEC \ - ANNOTATE_NOSPEC_ALTERNATIVE \ ALTERNATIVE_2( \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 9cc5480300c9..b4c43a9b1483 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -7,15 +7,31 @@ #include #include #include +#include +#include .macro THUNK reg .section .text.__x86.indirect_thunk + .align 32 SYM_FUNC_START(__x86_indirect_thunk_\reg) - CFI_STARTPROC - JMP_NOSPEC %\reg - CFI_ENDPROC + JMP_NOSPEC \reg SYM_FUNC_END(__x86_indirect_thunk_\reg) + +SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg) + ANNOTATE_INTRA_FUNCTION_CALL + call .Ldo_rop_\@ +.Lspec_trap_\@: + UNWIND_HINT_EMPTY + pause + lfence + jmp .Lspec_trap_\@ +.Ldo_rop_\@: + mov %\reg, (%_ASM_SP) + UNWIND_HINT_RET_OFFSET + ret +SYM_FUNC_END(__x86_retpoline_\reg) + .endm /* @@ -32,6 +48,7 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg) #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) #define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) +#define EXPORT_RETPOLINE(reg) __EXPORT_THUNK(__x86_retpoline_ ## reg) #undef GEN #define GEN(reg) THUNK reg @@ -41,3 +58,6 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg) #define GEN(reg) EXPORT_THUNK(reg) #include +#undef GEN +#define GEN(reg) EXPORT_RETPOLINE(reg) +#include -- cgit v1.2.3