From c3d7fa6684b5b3a07a48fc379d27bfb8a96661d9 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 20 Jan 2021 14:55:42 +0100 Subject: x86/xen: Use specific Xen pv interrupt entry for MCE Xen PV guests don't use IST. For machine check interrupts, switch to the same model as debug interrupts. Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210120135555.32594-3-jgross@suse.com --- arch/x86/include/asm/idtentry.h | 3 +++ arch/x86/xen/enlighten_pv.c | 16 +++++++++++++++- arch/x86/xen/xen-asm.S | 2 +- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index f656aabd1545..616909ea258d 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -585,6 +585,9 @@ DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check); #else DECLARE_IDTENTRY_RAW(X86_TRAP_MC, exc_machine_check); #endif +#ifdef CONFIG_XEN_PV +DECLARE_IDTENTRY_RAW(X86_TRAP_MC, xenpv_exc_machine_check); +#endif #endif /* NMI */ diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 9a5a50cdaab5..9db1d31bd59f 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -590,6 +590,20 @@ DEFINE_IDTENTRY_RAW(exc_xen_unknown_trap) BUG(); } +#ifdef CONFIG_X86_MCE +DEFINE_IDTENTRY_RAW(xenpv_exc_machine_check) +{ + /* + * There's no IST on Xen PV, but we still need to dispatch + * to the correct handler. + */ + if (user_mode(regs)) + noist_exc_machine_check(regs); + else + exc_machine_check(regs); +} +#endif + struct trap_array_entry { void (*orig)(void); void (*xen)(void); @@ -610,7 +624,7 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY_REDIR(exc_debug, true ), TRAP_ENTRY(exc_double_fault, true ), #ifdef CONFIG_X86_MCE - TRAP_ENTRY(exc_machine_check, true ), + TRAP_ENTRY_REDIR(exc_machine_check, true ), #endif TRAP_ENTRY_REDIR(exc_nmi, true ), TRAP_ENTRY(exc_int3, false ), diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 53cf8aa35032..cd330ce47b82 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -172,7 +172,7 @@ xen_pv_trap asm_exc_spurious_interrupt_bug xen_pv_trap asm_exc_coprocessor_error xen_pv_trap asm_exc_alignment_check #ifdef CONFIG_X86_MCE -xen_pv_trap asm_exc_machine_check +xen_pv_trap asm_xenpv_exc_machine_check #endif /* CONFIG_X86_MCE */ xen_pv_trap asm_exc_simd_coprocessor_error #ifdef CONFIG_IA32_EMULATION -- cgit v1.2.3 From 5b4c6d65019bff65757f61adbbad5e45a333b800 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 20 Jan 2021 14:55:43 +0100 Subject: x86/xen: Use specific Xen pv interrupt entry for DF Xen PV guests don't use IST. For double fault interrupts, switch to the same model as NMI. Correct a typo in a comment while copying it. Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210120135555.32594-4-jgross@suse.com --- arch/x86/include/asm/idtentry.h | 3 +++ arch/x86/xen/enlighten_pv.c | 10 ++++++++-- arch/x86/xen/xen-asm.S | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 616909ea258d..41e2e2e1b439 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -608,6 +608,9 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_DB, xenpv_exc_debug); /* #DF */ DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault); +#ifdef CONFIG_XEN_PV +DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_DF, xenpv_exc_double_fault); +#endif /* #VC */ #ifdef CONFIG_AMD_MEM_ENCRYPT diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 9db1d31bd59f..1fec2ee7a4dd 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -567,10 +567,16 @@ void noist_exc_debug(struct pt_regs *regs); DEFINE_IDTENTRY_RAW(xenpv_exc_nmi) { - /* On Xen PV, NMI doesn't use IST. The C part is the sane as native. */ + /* On Xen PV, NMI doesn't use IST. The C part is the same as native. */ exc_nmi(regs); } +DEFINE_IDTENTRY_RAW_ERRORCODE(xenpv_exc_double_fault) +{ + /* On Xen PV, DF doesn't use IST. The C part is the same as native. */ + exc_double_fault(regs, error_code); +} + DEFINE_IDTENTRY_RAW(xenpv_exc_debug) { /* @@ -622,7 +628,7 @@ struct trap_array_entry { static struct trap_array_entry trap_array[] = { TRAP_ENTRY_REDIR(exc_debug, true ), - TRAP_ENTRY(exc_double_fault, true ), + TRAP_ENTRY_REDIR(exc_double_fault, true ), #ifdef CONFIG_X86_MCE TRAP_ENTRY_REDIR(exc_machine_check, true ), #endif diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index cd330ce47b82..eac9dac3656a 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -161,7 +161,7 @@ xen_pv_trap asm_exc_overflow xen_pv_trap asm_exc_bounds xen_pv_trap asm_exc_invalid_op xen_pv_trap asm_exc_device_not_available -xen_pv_trap asm_exc_double_fault +xen_pv_trap asm_xenpv_exc_double_fault xen_pv_trap asm_exc_coproc_segment_overrun xen_pv_trap asm_exc_invalid_tss xen_pv_trap asm_exc_segment_not_present -- cgit v1.2.3 From 53c9d9240944088274aadbbbafc6138ca462db4f Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 20 Jan 2021 14:55:44 +0100 Subject: x86/pv: Switch SWAPGS to ALTERNATIVE SWAPGS is used only for interrupts coming from user mode or for returning to user mode. So there is no reason to use the PARAVIRT framework, as it can easily be replaced by an ALTERNATIVE depending on X86_FEATURE_XENPV. There are several instances using the PV-aware SWAPGS macro in paths which are never executed in a Xen PV guest. Replace those with the plain swapgs instruction. For SWAPGS_UNSAFE_STACK the same applies. Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Reviewed-by: Thomas Gleixner Acked-by: Andy Lutomirski Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210120135555.32594-5-jgross@suse.com --- arch/x86/entry/entry_64.S | 10 +++++----- arch/x86/include/asm/irqflags.h | 20 ++++++++------------ arch/x86/include/asm/paravirt.h | 20 -------------------- arch/x86/include/asm/paravirt_types.h | 2 -- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/paravirt.c | 1 - arch/x86/kernel/paravirt_patch.c | 3 --- arch/x86/xen/enlighten_pv.c | 3 --- 8 files changed, 13 insertions(+), 47 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index cad08703c4ad..a876204a73e0 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -669,7 +669,7 @@ native_irq_return_ldt: */ pushq %rdi /* Stash user RDI */ - SWAPGS /* to kernel GS */ + swapgs /* to kernel GS */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */ movq PER_CPU_VAR(espfix_waddr), %rdi @@ -699,7 +699,7 @@ native_irq_return_ldt: orq PER_CPU_VAR(espfix_stack), %rax SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi - SWAPGS /* to user GS */ + swapgs /* to user GS */ popq %rdi /* Restore user RDI */ movq %rax, %rsp @@ -943,7 +943,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) ret .Lparanoid_entry_swapgs: - SWAPGS + swapgs /* * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an @@ -1001,7 +1001,7 @@ SYM_CODE_START_LOCAL(paranoid_exit) jnz restore_regs_and_return_to_kernel /* We are returning to a context with user GSBASE */ - SWAPGS_UNSAFE_STACK + swapgs jmp restore_regs_and_return_to_kernel SYM_CODE_END(paranoid_exit) @@ -1426,7 +1426,7 @@ nmi_no_fsgsbase: jnz nmi_restore nmi_swapgs: - SWAPGS_UNSAFE_STACK + swapgs nmi_restore: POP_REGS diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 2dfc8d380dab..8c86edefa115 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -131,18 +131,6 @@ static __always_inline unsigned long arch_local_irq_save(void) #define SAVE_FLAGS(x) pushfq; popq %rax #endif -#define SWAPGS swapgs -/* - * Currently paravirt can't handle swapgs nicely when we - * don't have a stack we can rely on (such as a user space - * stack). So we either find a way around these or just fault - * and emulate if a guest tries to call swapgs directly. - * - * Either way, this is a good way to document that we don't - * have a reliable stack. x86_64 only. - */ -#define SWAPGS_UNSAFE_STACK swapgs - #define INTERRUPT_RETURN jmp native_iret #define USERGS_SYSRET64 \ swapgs; \ @@ -170,6 +158,14 @@ static __always_inline int arch_irqs_disabled(void) return arch_irqs_disabled_flags(flags); } +#else +#ifdef CONFIG_X86_64 +#ifdef CONFIG_XEN_PV +#define SWAPGS ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV +#else +#define SWAPGS swapgs +#endif +#endif #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index f8dce11d2bc1..f2ebe109a37e 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -776,26 +776,6 @@ extern void default_banner(void); #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT_XXL -/* - * If swapgs is used while the userspace stack is still current, - * there's no way to call a pvop. The PV replacement *must* be - * inlined, or the swapgs instruction must be trapped and emulated. - */ -#define SWAPGS_UNSAFE_STACK \ - PARA_SITE(PARA_PATCH(PV_CPU_swapgs), swapgs) - -/* - * Note: swapgs is very special, and in practise is either going to be - * implemented with a single "swapgs" instruction or something very - * special. Either way, we don't need to save any registers for - * it. - */ -#define SWAPGS \ - PARA_SITE(PARA_PATCH(PV_CPU_swapgs), \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_ops+PV_CPU_swapgs); \ - ) - #define USERGS_SYSRET64 \ PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64), \ ANNOTATE_RETPOLINE_SAFE; \ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index b6b02b7c19cc..130f428b0cc8 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -168,8 +168,6 @@ struct pv_cpu_ops { frame set up. */ void (*iret)(void); - void (*swapgs)(void); - void (*start_context_switch)(struct task_struct *prev); void (*end_context_switch)(struct task_struct *next); #endif diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 828be792231e..1354bc30614d 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -15,7 +15,6 @@ int main(void) #ifdef CONFIG_PARAVIRT_XXL OFFSET(PV_CPU_usergs_sysret64, paravirt_patch_template, cpu.usergs_sysret64); - OFFSET(PV_CPU_swapgs, paravirt_patch_template, cpu.swapgs); #ifdef CONFIG_DEBUG_ENTRY OFFSET(PV_IRQ_save_fl, paravirt_patch_template, irq.save_fl); #endif diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 6c3407ba6ee9..5e5fcf5c376d 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -312,7 +312,6 @@ struct paravirt_patch_template pv_ops = { .cpu.usergs_sysret64 = native_usergs_sysret64, .cpu.iret = native_iret, - .cpu.swapgs = native_swapgs, #ifdef CONFIG_X86_IOPL_IOPERM .cpu.invalidate_io_bitmap = native_tss_invalidate_io_bitmap, diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c index ace6e334cb39..7c518b08aa3c 100644 --- a/arch/x86/kernel/paravirt_patch.c +++ b/arch/x86/kernel/paravirt_patch.c @@ -28,7 +28,6 @@ struct patch_xxl { const unsigned char irq_restore_fl[2]; const unsigned char cpu_wbinvd[2]; const unsigned char cpu_usergs_sysret64[6]; - const unsigned char cpu_swapgs[3]; const unsigned char mov64[3]; }; @@ -43,7 +42,6 @@ static const struct patch_xxl patch_data_xxl = { .cpu_wbinvd = { 0x0f, 0x09 }, // wbinvd .cpu_usergs_sysret64 = { 0x0f, 0x01, 0xf8, 0x48, 0x0f, 0x07 }, // swapgs; sysretq - .cpu_swapgs = { 0x0f, 0x01, 0xf8 }, // swapgs .mov64 = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax }; @@ -86,7 +84,6 @@ unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr, PATCH_CASE(mmu, write_cr3, xxl, insn_buff, len); PATCH_CASE(cpu, usergs_sysret64, xxl, insn_buff, len); - PATCH_CASE(cpu, swapgs, xxl, insn_buff, len); PATCH_CASE(cpu, wbinvd, xxl, insn_buff, len); #endif diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 1fec2ee7a4dd..95f37996152a 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1098,9 +1098,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { #endif .io_delay = xen_io_delay, - /* Xen takes care of %gs when switching to usermode for us */ - .swapgs = paravirt_nop, - .start_context_switch = paravirt_start_context_switch, .end_context_switch = xen_end_context_switch, }; -- cgit v1.2.3 From afd30525a659ac0ae0904f0cb4a2ca75522c3123 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 20 Jan 2021 14:55:45 +0100 Subject: x86/xen: Drop USERGS_SYSRET64 paravirt call USERGS_SYSRET64 is used to return from a syscall via SYSRET, but a Xen PV guest will nevertheless use the IRET hypercall, as there is no sysret PV hypercall defined. So instead of testing all the prerequisites for doing a sysret and then mangling the stack for Xen PV again for doing an iret just use the iret exit from the beginning. This can easily be done via an ALTERNATIVE like it is done for the sysenter compat case already. It should be noted that this drops the optimization in Xen for not restoring a few registers when returning to user mode, but it seems as if the saved instructions in the kernel more than compensate for this drop (a kernel build in a Xen PV guest was slightly faster with this patch applied). While at it remove the stale sysret32 remnants. Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210120135555.32594-6-jgross@suse.com --- arch/x86/entry/entry_64.S | 16 +++++++--------- arch/x86/include/asm/irqflags.h | 6 ------ arch/x86/include/asm/paravirt.h | 5 ----- arch/x86/include/asm/paravirt_types.h | 8 -------- arch/x86/kernel/asm-offsets_64.c | 2 -- arch/x86/kernel/paravirt.c | 5 +---- arch/x86/kernel/paravirt_patch.c | 4 ---- arch/x86/xen/enlighten_pv.c | 1 - arch/x86/xen/xen-asm.S | 20 -------------------- arch/x86/xen/xen-ops.h | 2 -- 10 files changed, 8 insertions(+), 61 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a876204a73e0..ce0464d630a2 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -46,14 +46,6 @@ .code64 .section .entry.text, "ax" -#ifdef CONFIG_PARAVIRT_XXL -SYM_CODE_START(native_usergs_sysret64) - UNWIND_HINT_EMPTY - swapgs - sysretq -SYM_CODE_END(native_usergs_sysret64) -#endif /* CONFIG_PARAVIRT_XXL */ - /* * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers. * @@ -123,7 +115,12 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) * Try to use SYSRET instead of IRET if we're returning to * a completely clean 64-bit userspace context. If we're not, * go to the slow exit path. + * In the Xen PV case we must use iret anyway. */ + + ALTERNATIVE "", "jmp swapgs_restore_regs_and_return_to_usermode", \ + X86_FEATURE_XENPV + movq RCX(%rsp), %rcx movq RIP(%rsp), %r11 @@ -215,7 +212,8 @@ syscall_return_via_sysret: popq %rdi popq %rsp - USERGS_SYSRET64 + swapgs + sysretq SYM_CODE_END(entry_SYSCALL_64) /* diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 8c86edefa115..e585a4705b8d 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -132,12 +132,6 @@ static __always_inline unsigned long arch_local_irq_save(void) #endif #define INTERRUPT_RETURN jmp native_iret -#define USERGS_SYSRET64 \ - swapgs; \ - sysretq; -#define USERGS_SYSRET32 \ - swapgs; \ - sysretl #else #define INTERRUPT_RETURN iret diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index f2ebe109a37e..dd43b1100a87 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -776,11 +776,6 @@ extern void default_banner(void); #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT_XXL -#define USERGS_SYSRET64 \ - PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64), \ - ANNOTATE_RETPOLINE_SAFE; \ - jmp PARA_INDIRECT(pv_ops+PV_CPU_usergs_sysret64);) - #ifdef CONFIG_DEBUG_ENTRY #define SAVE_FLAGS(clobbers) \ PARA_SITE(PARA_PATCH(PV_IRQ_save_fl), \ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 130f428b0cc8..0169365f1403 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -156,14 +156,6 @@ struct pv_cpu_ops { u64 (*read_pmc)(int counter); - /* - * Switch to usermode gs and return to 64-bit usermode using - * sysret. Only used in 64-bit kernels to return to 64-bit - * processes. Usermode register state, including %rsp, must - * already be restored. - */ - void (*usergs_sysret64)(void); - /* Normal iret. Jump to this with the standard iret stack frame set up. */ void (*iret)(void); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 1354bc30614d..b14533af7676 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -13,8 +13,6 @@ int main(void) { #ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT_XXL - OFFSET(PV_CPU_usergs_sysret64, paravirt_patch_template, - cpu.usergs_sysret64); #ifdef CONFIG_DEBUG_ENTRY OFFSET(PV_IRQ_save_fl, paravirt_patch_template, irq.save_fl); #endif diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 5e5fcf5c376d..18560b71e717 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -135,8 +135,7 @@ unsigned paravirt_patch_default(u8 type, void *insn_buff, else if (opfunc == _paravirt_ident_64) ret = paravirt_patch_ident_64(insn_buff, len); - else if (type == PARAVIRT_PATCH(cpu.iret) || - type == PARAVIRT_PATCH(cpu.usergs_sysret64)) + else if (type == PARAVIRT_PATCH(cpu.iret)) /* If operation requires a jmp, then jmp */ ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len); #endif @@ -170,7 +169,6 @@ static u64 native_steal_clock(int cpu) /* These are in entry.S */ extern void native_iret(void); -extern void native_usergs_sysret64(void); static struct resource reserve_ioports = { .start = 0, @@ -310,7 +308,6 @@ struct paravirt_patch_template pv_ops = { .cpu.load_sp0 = native_load_sp0, - .cpu.usergs_sysret64 = native_usergs_sysret64, .cpu.iret = native_iret, #ifdef CONFIG_X86_IOPL_IOPERM diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c index 7c518b08aa3c..2fada2c347c9 100644 --- a/arch/x86/kernel/paravirt_patch.c +++ b/arch/x86/kernel/paravirt_patch.c @@ -27,7 +27,6 @@ struct patch_xxl { const unsigned char mmu_write_cr3[3]; const unsigned char irq_restore_fl[2]; const unsigned char cpu_wbinvd[2]; - const unsigned char cpu_usergs_sysret64[6]; const unsigned char mov64[3]; }; @@ -40,8 +39,6 @@ static const struct patch_xxl patch_data_xxl = { .mmu_write_cr3 = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3 .irq_restore_fl = { 0x57, 0x9d }, // push %rdi; popfq .cpu_wbinvd = { 0x0f, 0x09 }, // wbinvd - .cpu_usergs_sysret64 = { 0x0f, 0x01, 0xf8, - 0x48, 0x0f, 0x07 }, // swapgs; sysretq .mov64 = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax }; @@ -83,7 +80,6 @@ unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr, PATCH_CASE(mmu, read_cr3, xxl, insn_buff, len); PATCH_CASE(mmu, write_cr3, xxl, insn_buff, len); - PATCH_CASE(cpu, usergs_sysret64, xxl, insn_buff, len); PATCH_CASE(cpu, wbinvd, xxl, insn_buff, len); #endif diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 95f37996152a..6abf3f248197 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1073,7 +1073,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_pmc = xen_read_pmc, .iret = xen_iret, - .usergs_sysret64 = xen_sysret64, .load_tr_desc = paravirt_nop, .set_ldt = xen_set_ldt, diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index eac9dac3656a..1d738c5957f5 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -215,26 +215,6 @@ SYM_CODE_START(xen_iret) jmp hypercall_iret SYM_CODE_END(xen_iret) -SYM_CODE_START(xen_sysret64) - /* - * We're already on the usermode stack at this point, but - * still with the kernel gs, so we can easily switch back. - * - * tss.sp2 is scratch space. - */ - movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - - pushq $__USER_DS - pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) - pushq %r11 - pushq $__USER_CS - pushq %rcx - - pushq $VGCF_in_syscall - jmp hypercall_iret -SYM_CODE_END(xen_sysret64) - /* * Xen handles syscall callbacks much like ordinary exceptions, which * means we have: diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9546c3384c75..b2fd80a01a36 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -138,8 +138,6 @@ __visible unsigned long xen_read_cr2_direct(void); /* These are not functions, and cannot be called normally */ __visible void xen_iret(void); -__visible void xen_sysret32(void); -__visible void xen_sysret64(void); extern int xen_panic_handler_init(void); -- cgit v1.2.3 From ab234a260b1f625b26cbefa93ca365b0ae66df33 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 20 Jan 2021 14:55:46 +0100 Subject: x86/pv: Rework arch_local_irq_restore() to not use popf POPF is a rather expensive operation, so don't use it for restoring irq flags. Instead, test whether interrupts are enabled in the flags parameter and enable interrupts via STI in that case. This results in the restore_fl paravirt op to be no longer needed. Suggested-by: Andy Lutomirski Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210120135555.32594-7-jgross@suse.com --- arch/x86/include/asm/irqflags.h | 20 ++++++-------------- arch/x86/include/asm/paravirt.h | 5 ----- arch/x86/include/asm/paravirt_types.h | 7 ++----- arch/x86/kernel/irqflags.S | 11 ----------- arch/x86/kernel/paravirt.c | 1 - arch/x86/kernel/paravirt_patch.c | 3 --- arch/x86/xen/enlighten_pv.c | 2 -- arch/x86/xen/irq.c | 23 ----------------------- arch/x86/xen/xen-asm.S | 28 ---------------------------- arch/x86/xen/xen-ops.h | 1 - 10 files changed, 8 insertions(+), 93 deletions(-) diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index e585a4705b8d..144d70ea4393 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -35,15 +35,6 @@ extern __always_inline unsigned long native_save_fl(void) return flags; } -extern inline void native_restore_fl(unsigned long flags); -extern inline void native_restore_fl(unsigned long flags) -{ - asm volatile("push %0 ; popf" - : /* no output */ - :"g" (flags) - :"memory", "cc"); -} - static __always_inline void native_irq_disable(void) { asm volatile("cli": : :"memory"); @@ -79,11 +70,6 @@ static __always_inline unsigned long arch_local_save_flags(void) return native_save_fl(); } -static __always_inline void arch_local_irq_restore(unsigned long flags) -{ - native_restore_fl(flags); -} - static __always_inline void arch_local_irq_disable(void) { native_irq_disable(); @@ -152,6 +138,12 @@ static __always_inline int arch_irqs_disabled(void) return arch_irqs_disabled_flags(flags); } + +static __always_inline void arch_local_irq_restore(unsigned long flags) +{ + if (!arch_irqs_disabled_flags(flags)) + arch_local_irq_enable(); +} #else #ifdef CONFIG_X86_64 #ifdef CONFIG_XEN_PV diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index dd43b1100a87..4abf110e2243 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -648,11 +648,6 @@ static inline notrace unsigned long arch_local_save_flags(void) return PVOP_CALLEE0(unsigned long, irq.save_fl); } -static inline notrace void arch_local_irq_restore(unsigned long f) -{ - PVOP_VCALLEE1(irq.restore_fl, f); -} - static inline notrace void arch_local_irq_disable(void) { PVOP_VCALLEE0(irq.irq_disable); diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 0169365f1403..de87087d3bde 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -168,16 +168,13 @@ struct pv_cpu_ops { struct pv_irq_ops { #ifdef CONFIG_PARAVIRT_XXL /* - * Get/set interrupt state. save_fl and restore_fl are only - * expected to use X86_EFLAGS_IF; all other bits - * returned from save_fl are undefined, and may be ignored by - * restore_fl. + * Get/set interrupt state. save_fl is expected to use X86_EFLAGS_IF; + * all other bits returned from save_fl are undefined. * * NOTE: These functions callers expect the callee to preserve * more registers than the standard C calling convention. */ struct paravirt_callee_save save_fl; - struct paravirt_callee_save restore_fl; struct paravirt_callee_save irq_disable; struct paravirt_callee_save irq_enable; diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S index 0db0375235b4..8ef35063964b 100644 --- a/arch/x86/kernel/irqflags.S +++ b/arch/x86/kernel/irqflags.S @@ -13,14 +13,3 @@ SYM_FUNC_START(native_save_fl) ret SYM_FUNC_END(native_save_fl) EXPORT_SYMBOL(native_save_fl) - -/* - * void native_restore_fl(unsigned long flags) - * %eax/%rdi: flags - */ -SYM_FUNC_START(native_restore_fl) - push %_ASM_ARG1 - popf - ret -SYM_FUNC_END(native_restore_fl) -EXPORT_SYMBOL(native_restore_fl) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 18560b71e717..c60222ab8ab9 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -320,7 +320,6 @@ struct paravirt_patch_template pv_ops = { /* Irq ops. */ .irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), - .irq.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), .irq.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), .irq.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable), .irq.safe_halt = native_safe_halt, diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c index 2fada2c347c9..abd27ec67397 100644 --- a/arch/x86/kernel/paravirt_patch.c +++ b/arch/x86/kernel/paravirt_patch.c @@ -25,7 +25,6 @@ struct patch_xxl { const unsigned char mmu_read_cr2[3]; const unsigned char mmu_read_cr3[3]; const unsigned char mmu_write_cr3[3]; - const unsigned char irq_restore_fl[2]; const unsigned char cpu_wbinvd[2]; const unsigned char mov64[3]; }; @@ -37,7 +36,6 @@ static const struct patch_xxl patch_data_xxl = { .mmu_read_cr2 = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax .mmu_read_cr3 = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax .mmu_write_cr3 = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3 - .irq_restore_fl = { 0x57, 0x9d }, // push %rdi; popfq .cpu_wbinvd = { 0x0f, 0x09 }, // wbinvd .mov64 = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax }; @@ -71,7 +69,6 @@ unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr, switch (type) { #ifdef CONFIG_PARAVIRT_XXL - PATCH_CASE(irq, restore_fl, xxl, insn_buff, len); PATCH_CASE(irq, save_fl, xxl, insn_buff, len); PATCH_CASE(irq, irq_enable, xxl, insn_buff, len); PATCH_CASE(irq, irq_disable, xxl, insn_buff, len); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 6abf3f248197..dc0a337f985b 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1035,8 +1035,6 @@ void __init xen_setup_vcpu_info_placement(void) */ if (xen_have_vcpu_info_placement) { pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); - pv_ops.irq.restore_fl = - __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); pv_ops.irq.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); pv_ops.irq.irq_enable = diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 850c93f346c7..dfa091d79c2e 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -42,28 +42,6 @@ asmlinkage __visible unsigned long xen_save_fl(void) } PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl); -__visible void xen_restore_fl(unsigned long flags) -{ - struct vcpu_info *vcpu; - - /* convert from IF type flag */ - flags = !(flags & X86_EFLAGS_IF); - - /* See xen_irq_enable() for why preemption must be disabled. */ - preempt_disable(); - vcpu = this_cpu_read(xen_vcpu); - vcpu->evtchn_upcall_mask = flags; - - if (flags == 0) { - barrier(); /* unmask then check (avoid races) */ - if (unlikely(vcpu->evtchn_upcall_pending)) - xen_force_evtchn_callback(); - preempt_enable(); - } else - preempt_enable_no_resched(); -} -PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); - asmlinkage __visible void xen_irq_disable(void) { /* There's a one instruction preempt window here. We need to @@ -118,7 +96,6 @@ static void xen_halt(void) static const struct pv_irq_ops xen_irq_ops __initconst = { .save_fl = PV_CALLEE_SAVE(xen_save_fl), - .restore_fl = PV_CALLEE_SAVE(xen_restore_fl), .irq_disable = PV_CALLEE_SAVE(xen_irq_disable), .irq_enable = PV_CALLEE_SAVE(xen_irq_enable), diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 1d738c5957f5..02f31341e435 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -72,34 +72,6 @@ SYM_FUNC_START(xen_save_fl_direct) ret SYM_FUNC_END(xen_save_fl_direct) - -/* - * In principle the caller should be passing us a value return from - * xen_save_fl_direct, but for robustness sake we test only the - * X86_EFLAGS_IF flag rather than the whole byte. After setting the - * interrupt mask state, it checks for unmasked pending events and - * enters the hypervisor to get them delivered if so. - */ -SYM_FUNC_START(xen_restore_fl_direct) - FRAME_BEGIN - testw $X86_EFLAGS_IF, %di - setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - /* - * Preempt here doesn't matter because that will deal with any - * pending interrupts. The pending check may end up being run - * on the wrong CPU, but that doesn't hurt. - */ - - /* check for unmasked and pending */ - cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending - jnz 1f - call check_events -1: - FRAME_END - ret -SYM_FUNC_END(xen_restore_fl_direct) - - /* * Force an event check by making a hypercall, but preserve regs * before making the call. diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index b2fd80a01a36..8d7ec49a35fb 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -131,7 +131,6 @@ static inline void __init xen_efi_init(struct boot_params *boot_params) __visible void xen_irq_enable_direct(void); __visible void xen_irq_disable_direct(void); __visible unsigned long xen_save_fl_direct(void); -__visible void xen_restore_fl_direct(unsigned long); __visible unsigned long xen_read_cr2(void); __visible unsigned long xen_read_cr2_direct(void); -- cgit v1.2.3