From 8c40397f22a4ff7996d3abdc2d9d1d90f9fc8054 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Nov 2019 23:03:15 +0100 Subject: x86/ptrace: Prevent truncation of bitmap size The active() callback of the IO bitmap regset divides the IO bitmap size by the word size (32/64 bit). As the I/O bitmap size is in bytes the active check fails for bitmap sizes of 1-3 bytes on 32bit and 1-7 bytes on 64bit. Use DIV_ROUND_UP() instead. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Reviewed-by: Andy Lutomirski --- arch/x86/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 3c5bbe8e4120..7c526741215b 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -697,7 +697,7 @@ static int ptrace_set_debugreg(struct task_struct *tsk, int n, static int ioperm_active(struct task_struct *target, const struct user_regset *regset) { - return target->thread.io_bitmap_max / regset->size; + return DIV_ROUND_UP(target->thread.io_bitmap_max, regset->size); } static int ioperm_get(struct task_struct *target, -- cgit v1.2.3 From 2fff071d28b54f050f62654dad4ec111b8416d8e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Nov 2019 23:03:16 +0100 Subject: x86/process: Unify copy_thread_tls() While looking at the TSS io bitmap it turned out that any change in that area would require identical changes to copy_thread_tls(). The 32 and 64 bit variants share sufficient code to consolidate them into a common function to avoid duplication of upcoming modifications. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski --- arch/x86/include/asm/ptrace.h | 6 +++ arch/x86/include/asm/switch_to.h | 10 ++++ arch/x86/kernel/process.c | 100 +++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/process_32.c | 68 -------------------------- arch/x86/kernel/process_64.c | 75 ----------------------------- 5 files changed, 116 insertions(+), 143 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 332eb3525867..5057a8ed100b 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -361,5 +361,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx, extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); +#ifdef CONFIG_X86_64 +# define do_set_thread_area_64(p, s, t) do_arch_prctl_64(p, s, t) +#else +# define do_set_thread_area_64(p, s, t) (0) +#endif + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PTRACE_H */ diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 18a4b6890fa8..0e059b73437b 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -103,7 +103,17 @@ static inline void update_task_stack(struct task_struct *task) if (static_cpu_has(X86_FEATURE_XENPV)) load_sp0(task_top_of_stack(task)); #endif +} +static inline void kthread_frame_init(struct inactive_task_frame *frame, + unsigned long fun, unsigned long arg) +{ + frame->bx = fun; +#ifdef CONFIG_X86_32 + frame->di = arg; +#else + frame->r12 = arg; +#endif } #endif /* _ASM_X86_SWITCH_TO_H */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5e94c4354d4e..7e07f0bb0def 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -132,6 +132,106 @@ void exit_thread(struct task_struct *tsk) fpu__drop(fpu); } +static int set_new_tls(struct task_struct *p, unsigned long tls) +{ + struct user_desc __user *utls = (struct user_desc __user *)tls; + + if (in_ia32_syscall()) + return do_set_thread_area(p, -1, utls, 0); + else + return do_set_thread_area_64(p, ARCH_SET_FS, tls); +} + +static inline int copy_io_bitmap(struct task_struct *tsk) +{ + if (likely(!test_tsk_thread_flag(current, TIF_IO_BITMAP))) + return 0; + + tsk->thread.io_bitmap_ptr = kmemdup(current->thread.io_bitmap_ptr, + IO_BITMAP_BYTES, GFP_KERNEL); + if (!tsk->thread.io_bitmap_ptr) { + tsk->thread.io_bitmap_max = 0; + return -ENOMEM; + } + set_tsk_thread_flag(tsk, TIF_IO_BITMAP); + return 0; +} + +static inline void free_io_bitmap(struct task_struct *tsk) +{ + if (tsk->thread.io_bitmap_ptr) { + kfree(tsk->thread.io_bitmap_ptr); + tsk->thread.io_bitmap_ptr = NULL; + tsk->thread.io_bitmap_max = 0; + } +} + +int copy_thread_tls(unsigned long clone_flags, unsigned long sp, + unsigned long arg, struct task_struct *p, unsigned long tls) +{ + struct inactive_task_frame *frame; + struct fork_frame *fork_frame; + struct pt_regs *childregs; + int ret; + + childregs = task_pt_regs(p); + fork_frame = container_of(childregs, struct fork_frame, regs); + frame = &fork_frame->frame; + + frame->bp = 0; + frame->ret_addr = (unsigned long) ret_from_fork; + p->thread.sp = (unsigned long) fork_frame; + p->thread.io_bitmap_ptr = NULL; + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); + +#ifdef CONFIG_X86_64 + savesegment(gs, p->thread.gsindex); + p->thread.gsbase = p->thread.gsindex ? 0 : current->thread.gsbase; + savesegment(fs, p->thread.fsindex); + p->thread.fsbase = p->thread.fsindex ? 0 : current->thread.fsbase; + savesegment(es, p->thread.es); + savesegment(ds, p->thread.ds); +#else + p->thread.sp0 = (unsigned long) (childregs + 1); + /* + * Clear all status flags including IF and set fixed bit. 64bit + * does not have this initialization as the frame does not contain + * flags. The flags consistency (especially vs. AC) is there + * ensured via objtool, which lacks 32bit support. + */ + frame->flags = X86_EFLAGS_FIXED; +#endif + + /* Kernel thread ? */ + if (unlikely(p->flags & PF_KTHREAD)) { + memset(childregs, 0, sizeof(struct pt_regs)); + kthread_frame_init(frame, sp, arg); + return 0; + } + + frame->bx = 0; + *childregs = *current_pt_regs(); + childregs->ax = 0; + if (sp) + childregs->sp = sp; + +#ifdef CONFIG_X86_32 + task_user_gs(p) = get_user_gs(current_pt_regs()); +#endif + + ret = copy_io_bitmap(p); + if (ret) + return ret; + + /* Set a new TLS for the child thread? */ + if (clone_flags & CLONE_SETTLS) { + ret = set_new_tls(p, tls); + if (ret) + free_io_bitmap(p); + } + return ret; +} + void flush_thread(void) { struct task_struct *tsk = current; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index b8ceec4974fe..6c7d90527156 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -112,74 +112,6 @@ void release_thread(struct task_struct *dead_task) release_vm86_irqs(dead_task); } -int copy_thread_tls(unsigned long clone_flags, unsigned long sp, - unsigned long arg, struct task_struct *p, unsigned long tls) -{ - struct pt_regs *childregs = task_pt_regs(p); - struct fork_frame *fork_frame = container_of(childregs, struct fork_frame, regs); - struct inactive_task_frame *frame = &fork_frame->frame; - struct task_struct *tsk; - int err; - - /* - * For a new task use the RESET flags value since there is no before. - * All the status flags are zero; DF and all the system flags must also - * be 0, specifically IF must be 0 because we context switch to the new - * task with interrupts disabled. - */ - frame->flags = X86_EFLAGS_FIXED; - frame->bp = 0; - frame->ret_addr = (unsigned long) ret_from_fork; - p->thread.sp = (unsigned long) fork_frame; - p->thread.sp0 = (unsigned long) (childregs+1); - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); - - if (unlikely(p->flags & PF_KTHREAD)) { - /* kernel thread */ - memset(childregs, 0, sizeof(struct pt_regs)); - frame->bx = sp; /* function */ - frame->di = arg; - p->thread.io_bitmap_ptr = NULL; - return 0; - } - frame->bx = 0; - *childregs = *current_pt_regs(); - childregs->ax = 0; - if (sp) - childregs->sp = sp; - - task_user_gs(p) = get_user_gs(current_pt_regs()); - - p->thread.io_bitmap_ptr = NULL; - tsk = current; - err = -ENOMEM; - - if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { - p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, - IO_BITMAP_BYTES, GFP_KERNEL); - if (!p->thread.io_bitmap_ptr) { - p->thread.io_bitmap_max = 0; - return -ENOMEM; - } - set_tsk_thread_flag(p, TIF_IO_BITMAP); - } - - err = 0; - - /* - * Set a new TLS for the child thread? - */ - if (clone_flags & CLONE_SETTLS) - err = do_set_thread_area(p, -1, - (struct user_desc __user *)tls, 0); - - if (err && p->thread.io_bitmap_ptr) { - kfree(p->thread.io_bitmap_ptr); - p->thread.io_bitmap_max = 0; - } - return err; -} - void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) { diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index af64519b2695..e93a1b8fd7f9 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -371,81 +371,6 @@ void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase) task->thread.gsbase = gsbase; } -int copy_thread_tls(unsigned long clone_flags, unsigned long sp, - unsigned long arg, struct task_struct *p, unsigned long tls) -{ - int err; - struct pt_regs *childregs; - struct fork_frame *fork_frame; - struct inactive_task_frame *frame; - struct task_struct *me = current; - - childregs = task_pt_regs(p); - fork_frame = container_of(childregs, struct fork_frame, regs); - frame = &fork_frame->frame; - - frame->bp = 0; - frame->ret_addr = (unsigned long) ret_from_fork; - p->thread.sp = (unsigned long) fork_frame; - p->thread.io_bitmap_ptr = NULL; - - savesegment(gs, p->thread.gsindex); - p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase; - savesegment(fs, p->thread.fsindex); - p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase; - savesegment(es, p->thread.es); - savesegment(ds, p->thread.ds); - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); - - if (unlikely(p->flags & PF_KTHREAD)) { - /* kernel thread */ - memset(childregs, 0, sizeof(struct pt_regs)); - frame->bx = sp; /* function */ - frame->r12 = arg; - return 0; - } - frame->bx = 0; - *childregs = *current_pt_regs(); - - childregs->ax = 0; - if (sp) - childregs->sp = sp; - - err = -ENOMEM; - if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { - p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr, - IO_BITMAP_BYTES, GFP_KERNEL); - if (!p->thread.io_bitmap_ptr) { - p->thread.io_bitmap_max = 0; - return -ENOMEM; - } - set_tsk_thread_flag(p, TIF_IO_BITMAP); - } - - /* - * Set a new TLS for the child thread? - */ - if (clone_flags & CLONE_SETTLS) { -#ifdef CONFIG_IA32_EMULATION - if (in_ia32_syscall()) - err = do_set_thread_area(p, -1, - (struct user_desc __user *)tls, 0); - else -#endif - err = do_arch_prctl_64(p, ARCH_SET_FS, tls); - if (err) - goto out; - } - err = 0; -out: - if (err && p->thread.io_bitmap_ptr) { - kfree(p->thread.io_bitmap_ptr); - p->thread.io_bitmap_max = 0; - } - - return err; -} - static void start_thread_common(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp, -- cgit v1.2.3 From 505b789996f64bdbfcc5847dd4b5076fc7c50274 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Nov 2019 23:03:17 +0100 Subject: x86/cpu: Unify cpu_init() Similar to copy_thread_tls() the 32bit and 64bit implementations of cpu_init() are very similar and unification avoids duplicate changes in the future. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski --- arch/x86/kernel/cpu/common.c | 173 ++++++++++++++++--------------------------- 1 file changed, 65 insertions(+), 108 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9ae7d1bcd4f4..d52ec1a82120 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -53,10 +53,7 @@ #include #include #include - -#ifdef CONFIG_X86_LOCAL_APIC #include -#endif #include "cpu.h" @@ -1749,7 +1746,7 @@ static void wait_for_master_cpu(int cpu) } #ifdef CONFIG_X86_64 -static void setup_getcpu(int cpu) +static inline void setup_getcpu(int cpu) { unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu)); struct desc_struct d = { }; @@ -1769,7 +1766,43 @@ static void setup_getcpu(int cpu) write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_CPUNODE, &d, DESCTYPE_S); } + +static inline void ucode_cpu_init(int cpu) +{ + if (cpu) + load_ucode_ap(); +} + +static inline void tss_setup_ist(struct tss_struct *tss) +{ + /* Set up the per-CPU TSS IST stacks */ + tss->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF); + tss->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI); + tss->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB); + tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE); +} + +static inline void gdt_setup_doublefault_tss(int cpu) { } + +#else /* CONFIG_X86_64 */ + +static inline void setup_getcpu(int cpu) { } + +static inline void ucode_cpu_init(int cpu) +{ + show_ucode_info_early(); +} + +static inline void tss_setup_ist(struct tss_struct *tss) { } + +static inline void gdt_setup_doublefault_tss(int cpu) +{ +#ifdef CONFIG_DOUBLEFAULT + /* Set up the doublefault TSS pointer in the GDT */ + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); #endif +} +#endif /* !CONFIG_X86_64 */ /* * cpu_init() initializes state that is per-CPU. Some data is already @@ -1777,21 +1810,15 @@ static void setup_getcpu(int cpu) * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. */ -#ifdef CONFIG_X86_64 - void cpu_init(void) { + struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); + struct task_struct *cur = current; int cpu = raw_smp_processor_id(); - struct task_struct *me; - struct tss_struct *t; - int i; wait_for_master_cpu(cpu); - if (cpu) - load_ucode_ap(); - - t = &per_cpu(cpu_tss_rw, cpu); + ucode_cpu_init(cpu); #ifdef CONFIG_NUMA if (this_cpu_read(numa_node) == 0 && @@ -1800,63 +1827,48 @@ void cpu_init(void) #endif setup_getcpu(cpu); - me = current; - pr_debug("Initializing CPU#%d\n", cpu); - cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + if (IS_ENABLED(CONFIG_X86_64) || cpu_feature_enabled(X86_FEATURE_VME) || + boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE)) + cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); /* * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ - switch_to_new_gdt(cpu); - loadsegment(fs, 0); - load_current_idt(); - memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); - syscall_init(); + if (IS_ENABLED(CONFIG_X86_64)) { + loadsegment(fs, 0); + memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); + syscall_init(); - wrmsrl(MSR_FS_BASE, 0); - wrmsrl(MSR_KERNEL_GS_BASE, 0); - barrier(); + wrmsrl(MSR_FS_BASE, 0); + wrmsrl(MSR_KERNEL_GS_BASE, 0); + barrier(); - x86_configure_nx(); - x2apic_setup(); - - /* - * set up and load the per-CPU TSS - */ - if (!t->x86_tss.ist[0]) { - t->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF); - t->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI); - t->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB); - t->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE); + x2apic_setup(); } - t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; - - /* - * <= is required because the CPU will access up to - * 8 bits beyond the end of the IO permission bitmap. - */ - for (i = 0; i <= IO_BITMAP_LONGS; i++) - t->io_bitmap[i] = ~0UL; - mmgrab(&init_mm); - me->active_mm = &init_mm; - BUG_ON(me->mm); + cur->active_mm = &init_mm; + BUG_ON(cur->mm); initialize_tlbstate_and_flush(); - enter_lazy_tlb(&init_mm, me); + enter_lazy_tlb(&init_mm, cur); - /* - * Initialize the TSS. sp0 points to the entry trampoline stack - * regardless of what task is running. - */ + /* Initialize the TSS. */ + tss_setup_ist(tss); + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; + memset(tss->io_bitmap, 0xff, sizeof(tss->io_bitmap)); set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); + load_TR_desc(); + /* + * sp0 points to the entry trampoline stack regardless of what task + * is running. + */ load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1)); load_mm_ldt(&init_mm); @@ -1864,6 +1876,8 @@ void cpu_init(void) clear_all_debug_regs(); dbg_restore_debug_regs(); + gdt_setup_doublefault_tss(cpu); + fpu__init_cpu(); if (is_uv_system()) @@ -1872,63 +1886,6 @@ void cpu_init(void) load_fixmap_gdt(cpu); } -#else - -void cpu_init(void) -{ - int cpu = smp_processor_id(); - struct task_struct *curr = current; - struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu); - - wait_for_master_cpu(cpu); - - show_ucode_info_early(); - - pr_info("Initializing CPU#%d\n", cpu); - - if (cpu_feature_enabled(X86_FEATURE_VME) || - boot_cpu_has(X86_FEATURE_TSC) || - boot_cpu_has(X86_FEATURE_DE)) - cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - - load_current_idt(); - switch_to_new_gdt(cpu); - - /* - * Set up and load the per-CPU TSS and LDT - */ - mmgrab(&init_mm); - curr->active_mm = &init_mm; - BUG_ON(curr->mm); - initialize_tlbstate_and_flush(); - enter_lazy_tlb(&init_mm, curr); - - /* - * Initialize the TSS. sp0 points to the entry trampoline stack - * regardless of what task is running. - */ - set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); - load_TR_desc(); - load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1)); - - load_mm_ldt(&init_mm); - - t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; - -#ifdef CONFIG_DOUBLEFAULT - /* Set up doublefault TSS pointer in the GDT */ - __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); -#endif - - clear_all_debug_regs(); - dbg_restore_debug_regs(); - - fpu__init_cpu(); - - load_fixmap_gdt(cpu); -} -#endif - /* * The microcode loader calls this upon late microcode load to recheck features, * only when microcode has been updated. Caller holds microcode_mutex and CPU -- cgit v1.2.3 From 6b546e1c9ad2a25f874f8bc6077d0f55f9446414 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Nov 2019 23:03:18 +0100 Subject: x86/tss: Fix and move VMX BUILD_BUG_ON() The BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 == 0x67) in the VMX code is bogus in two aspects: 1) This wants to be in generic x86 code simply to catch issues even when VMX is disabled in Kconfig. 2) The IO_BITMAP_OFFSET is not the right thing to check because it makes asssumptions about the layout of tss_struct. Nothing requires that the I/O bitmap is placed right after x86_tss, which is the hardware mandated tss structure. It pointlessly makes restrictions on the struct tss_struct layout. The proper thing to check is: - Offset of x86_tss in tss_struct is 0 - Size of x86_tss == 0x68 Move it to the other build time TSS checks and make it do the right thing. Signed-off-by: Thomas Gleixner Acked-by: Paolo Bonzini Acked-by: Andy Lutomirski --- arch/x86/kvm/vmx/vmx.c | 8 -------- arch/x86/mm/cpu_entry_area.c | 8 ++++++++ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5d21a4ab28cf..311fd48d99e2 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1338,14 +1338,6 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss); vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ - /* - * VM exits change the host TR limit to 0x67 after a VM - * exit. This is okay, since 0x67 covers everything except - * the IO bitmap and have have code to handle the IO bitmap - * being lost after a VM exit. - */ - BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 != 0x67); - rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 752ad11d6868..2c1d4223a343 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -161,6 +161,14 @@ static void __init setup_cpu_entry_area(unsigned int cpu) BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); + /* + * VMX changes the host TR limit to 0x67 after a VM exit. This is + * okay, since 0x67 covers the size of struct x86_hw_tss. Make sure + * that this is correct. + */ + BUILD_BUG_ON(offsetof(struct tss_struct, x86_tss) != 0); + BUILD_BUG_ON(sizeof(struct x86_hw_tss) != 0x68); + cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu), sizeof(struct tss_struct) / PAGE_SIZE, tss_prot); -- cgit v1.2.3 From b800fc4d4a2bfe4f4a52dc1955e1b4d8649e6d5f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Nov 2019 23:03:19 +0100 Subject: x86/iopl: Cleanup include maze Get rid of superfluous includes. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski --- arch/x86/kernel/ioport.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 61a89d3c0382..76fc2efa05d5 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -3,22 +3,14 @@ * This contains the io-permission bitmap code - written by obz, with changes * by Linus. 32/64 bits code unification by Miguel Botón. */ - -#include -#include -#include #include -#include -#include -#include #include -#include -#include -#include -#include #include #include -#include +#include +#include +#include + #include /* -- cgit v1.2.3 From ae31cea86ab31f3d2e15d6cc8710754ad7330c9e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Nov 2019 19:05:31 +0100 Subject: x86/ioperm: Simplify first ioperm() invocation logic On the first allocation of a task the I/O bitmap needs to be allocated. After the allocation it is installed as an empty bitmap and immediately afterwards updated. Avoid that and just do the initial updates (store bitmap pointer, set TIF flag and make TSS limit valid) in the update path unconditionally. If the bitmap was already active this is redundant but harmless. Preparatory change for later optimizations in the context switch code. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/ioport.c | 55 +++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 76fc2efa05d5..ca6aa1e33c97 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -18,9 +18,10 @@ */ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) { + unsigned int i, max_long, bytes, bytes_updated; struct thread_struct *t = ¤t->thread; struct tss_struct *tss; - unsigned int i, max_long, bytes, bytes_updated; + unsigned long *bitmap; if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) return -EINVAL; @@ -33,59 +34,55 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) * IO bitmap up. ioperm() is much less timing critical than clone(), * this is why we delay this operation until now: */ - if (!t->io_bitmap_ptr) { - unsigned long *bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); - + bitmap = t->io_bitmap_ptr; + if (!bitmap) { + bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!bitmap) return -ENOMEM; memset(bitmap, 0xff, IO_BITMAP_BYTES); - t->io_bitmap_ptr = bitmap; - set_thread_flag(TIF_IO_BITMAP); - - /* - * Now that we have an IO bitmap, we need our TSS limit to be - * correct. It's fine if we are preempted after doing this: - * with TIF_IO_BITMAP set, context switches will keep our TSS - * limit correct. - */ - preempt_disable(); - refresh_tss_limit(); - preempt_enable(); } /* - * do it in the per-thread copy and in the TSS ... - * - * Disable preemption via get_cpu() - we must not switch away - * because the ->io_bitmap_max value must match the bitmap - * contents: + * Update the bitmap and the TSS copy with preemption disabled to + * prevent a race against context switch. */ - tss = &per_cpu(cpu_tss_rw, get_cpu()); - + preempt_disable(); if (turn_on) - bitmap_clear(t->io_bitmap_ptr, from, num); + bitmap_clear(bitmap, from, num); else - bitmap_set(t->io_bitmap_ptr, from, num); + bitmap_set(bitmap, from, num); /* * Search for a (possibly new) maximum. This is simple and stupid, * to keep it obviously correct: */ max_long = 0; - for (i = 0; i < IO_BITMAP_LONGS; i++) - if (t->io_bitmap_ptr[i] != ~0UL) + for (i = 0; i < IO_BITMAP_LONGS; i++) { + if (bitmap[i] != ~0UL) max_long = i; + } bytes = (max_long + 1) * sizeof(unsigned long); bytes_updated = max(bytes, t->io_bitmap_max); + /* Update the thread data */ t->io_bitmap_max = bytes; + /* + * Store the bitmap pointer (might be the same if the task already + * head one). Set the TIF flag, just in case this is the first + * invocation. + */ + t->io_bitmap_ptr = bitmap; + set_thread_flag(TIF_IO_BITMAP); - /* Update the TSS: */ + /* Update the TSS */ + tss = this_cpu_ptr(&cpu_tss_rw); memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); + /* Make sure the TSS limit covers the I/O bitmap. */ + refresh_tss_limit(); - put_cpu(); + preempt_enable(); return 0; } -- cgit v1.2.3 From 32f3bf67ee78332f2caec0984cb9d412cd0a3c23 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Nov 2019 19:56:19 +0100 Subject: x86/ioperm: Avoid bitmap allocation if no permissions are set If ioperm() is invoked the first time and the @turn_on argument is 0, then there is no point to allocate a bitmap just to clear permissions which are not set. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/ioport.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index ca6aa1e33c97..80fa36be2670 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -36,6 +36,9 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) */ bitmap = t->io_bitmap_ptr; if (!bitmap) { + /* No point to allocate a bitmap just to clear permissions */ + if (!turn_on) + return 0; bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!bitmap) return -ENOMEM; -- cgit v1.2.3 From ecc7e37d4dadd16f6be125ca496feccd05454da4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Nov 2019 23:03:20 +0100 Subject: x86/io: Speedup schedule out of I/O bitmap user There is no requirement to update the TSS I/O bitmap when a thread using it is scheduled out and the incoming thread does not use it. For the permission check based on the TSS I/O bitmap the CPU calculates the memory location of the I/O bitmap by the address of the TSS and the io_bitmap_base member of the tss_struct. The easiest way to invalidate the I/O bitmap is to switch the offset to an address outside of the TSS limit. If an I/O instruction is issued from user space the TSS limit causes #GP to be raised in the same was as valid I/O bitmap with all bits set to 1 would do. This removes the extra work when an I/O bitmap using task is scheduled out and puts the burden on the rare I/O bitmap users when they are scheduled in. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/processor.h | 38 ++++++++++++++++-------- arch/x86/kernel/cpu/common.c | 3 +- arch/x86/kernel/doublefault.c | 2 +- arch/x86/kernel/ioport.c | 4 +++ arch/x86/kernel/process.c | 63 +++++++++++++++++++++++----------------- 5 files changed, 69 insertions(+), 41 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6e0a3b43d027..6d0059c21969 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -330,8 +330,23 @@ struct x86_hw_tss { #define IO_BITMAP_BITS 65536 #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) -#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss)) -#define INVALID_IO_BITMAP_OFFSET 0x8000 + +#define IO_BITMAP_OFFSET_VALID \ + (offsetof(struct tss_struct, io_bitmap) - \ + offsetof(struct tss_struct, x86_tss)) + +/* + * sizeof(unsigned long) coming from an extra "long" at the end + * of the iobitmap. + * + * -1? seg base+limit should be pointing to the address of the + * last valid byte + */ +#define __KERNEL_TSS_LIMIT \ + (IO_BITMAP_OFFSET_VALID + IO_BITMAP_BYTES + sizeof(unsigned long) - 1) + +/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */ +#define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1) struct entry_stack { unsigned long words[64]; @@ -349,6 +364,15 @@ struct tss_struct { */ struct x86_hw_tss x86_tss; + /* + * Store the dirty size of the last io bitmap offender. The next + * one will have to do the cleanup as the switch out to a non io + * bitmap user will just set x86_tss.io_bitmap_base to a value + * outside of the TSS limit. So for sane tasks there is no need to + * actually touch the io_bitmap at all. + */ + unsigned int io_bitmap_prev_max; + /* * The extra 1 is there because the CPU will access an * additional byte beyond the end of the IO permission @@ -360,16 +384,6 @@ struct tss_struct { DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); -/* - * sizeof(unsigned long) coming from an extra "long" at the end - * of the iobitmap. - * - * -1? seg base+limit should be pointing to the address of the - * last valid byte - */ -#define __KERNEL_TSS_LIMIT \ - (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1) - /* Per CPU interrupt stacks */ struct irq_stack { char stack[IRQ_STACK_SIZE]; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d52ec1a82120..8c1000a57b55 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1860,7 +1860,8 @@ void cpu_init(void) /* Initialize the TSS. */ tss_setup_ist(tss); - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; + tss->io_bitmap_prev_max = 0; memset(tss->io_bitmap, 0xff, sizeof(tss->io_bitmap)); set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c index 0b8cedb20d6d..cedb07d0909c 100644 --- a/arch/x86/kernel/doublefault.c +++ b/arch/x86/kernel/doublefault.c @@ -54,7 +54,7 @@ struct x86_hw_tss doublefault_tss __cacheline_aligned = { .sp0 = STACK_START, .ss0 = __KERNEL_DS, .ldt = 0, - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, + .io_bitmap_base = IO_BITMAP_OFFSET_INVALID, .ip = (unsigned long) doublefault_fn, /* 0x2 bit is always set */ diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 80fa36be2670..eed218a3fd48 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -82,6 +82,10 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) /* Update the TSS */ tss = this_cpu_ptr(&cpu_tss_rw); memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); + /* Store the new end of the zero bits */ + tss->io_bitmap_prev_max = bytes; + /* Make the bitmap base in the TSS valid */ + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID; /* Make sure the TSS limit covers the I/O bitmap. */ refresh_tss_limit(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7e07f0bb0def..2444fe296de5 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -72,18 +72,9 @@ __visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = { #ifdef CONFIG_X86_32 .ss0 = __KERNEL_DS, .ss1 = __KERNEL_CS, - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, #endif + .io_bitmap_base = IO_BITMAP_OFFSET_INVALID, }, -#ifdef CONFIG_X86_32 - /* - * Note that the .io_bitmap member must be extra-big. This is because - * the CPU will access an additional byte beyond the end of the IO - * permission bitmap. The extra byte must be all 1 bits, and must - * be within the limit. - */ - .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, -#endif }; EXPORT_PER_CPU_SYMBOL(cpu_tss_rw); @@ -112,18 +103,18 @@ void exit_thread(struct task_struct *tsk) struct thread_struct *t = &tsk->thread; unsigned long *bp = t->io_bitmap_ptr; struct fpu *fpu = &t->fpu; + struct tss_struct *tss; if (bp) { - struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu()); + preempt_disable(); + tss = this_cpu_ptr(&cpu_tss_rw); t->io_bitmap_ptr = NULL; - clear_thread_flag(TIF_IO_BITMAP); - /* - * Careful, clear this in the TSS too: - */ - memset(tss->io_bitmap, 0xff, t->io_bitmap_max); t->io_bitmap_max = 0; - put_cpu(); + clear_thread_flag(TIF_IO_BITMAP); + /* Invalidate the io bitmap base in the TSS */ + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; + preempt_enable(); kfree(bp); } @@ -369,29 +360,47 @@ void arch_setup_new_exec(void) } } -static inline void switch_to_bitmap(struct thread_struct *prev, - struct thread_struct *next, +static inline void switch_to_bitmap(struct thread_struct *next, unsigned long tifp, unsigned long tifn) { struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); if (tifn & _TIF_IO_BITMAP) { /* - * Copy the relevant range of the IO bitmap. - * Normally this is 128 bytes or less: + * Copy at least the size of the incoming tasks bitmap + * which covers the last permitted I/O port. + * + * If the previous task which used an io bitmap had more + * bits permitted, then the copy needs to cover those as + * well so they get turned off. */ memcpy(tss->io_bitmap, next->io_bitmap_ptr, - max(prev->io_bitmap_max, next->io_bitmap_max)); + max(tss->io_bitmap_prev_max, next->io_bitmap_max)); + + /* Store the new max and set io_bitmap_base valid */ + tss->io_bitmap_prev_max = next->io_bitmap_max; + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID; + /* - * Make sure that the TSS limit is correct for the CPU - * to notice the IO bitmap. + * Make sure that the TSS limit is covering the io bitmap. + * It might have been cut down by a VMEXIT to 0x67 which + * would cause a subsequent I/O access from user space to + * trigger a #GP because tbe bitmap is outside the TSS + * limit. */ refresh_tss_limit(); } else if (tifp & _TIF_IO_BITMAP) { /* - * Clear any possible leftover bits: + * Do not touch the bitmap. Let the next bitmap using task + * deal with the mess. Just make the io_bitmap_base invalid + * by moving it outside the TSS limit so any subsequent I/O + * access from user space will trigger a #GP. + * + * This is correct even when VMEXIT rewrites the TSS limit + * to 0x67 as the only requirement is that the base points + * outside the limit. */ - memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; } } @@ -605,7 +614,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) tifn = READ_ONCE(task_thread_info(next_p)->flags); tifp = READ_ONCE(task_thread_info(prev_p)->flags); - switch_to_bitmap(prev, next, tifp, tifn); + switch_to_bitmap(next, tifp, tifn); propagate_user_return_notify(prev_p, next_p); -- cgit v1.2.3 From f5848e5fd2f813c3a8009a642dfbcf635287c199 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Nov 2019 18:45:29 +0100 Subject: x86/tss: Move I/O bitmap data into a seperate struct Move the non hardware portion of I/O bitmap data into a seperate struct for readability sake. Originally-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/processor.h | 35 +++++++++++++++++++++-------------- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/ioport.c | 4 ++-- arch/x86/kernel/process.c | 6 +++--- 4 files changed, 28 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6d0059c21969..cd7cd7d10b81 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -328,11 +328,11 @@ struct x86_hw_tss { * IO-bitmap sizes: */ #define IO_BITMAP_BITS 65536 -#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) -#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) +#define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE) +#define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long)) -#define IO_BITMAP_OFFSET_VALID \ - (offsetof(struct tss_struct, io_bitmap) - \ +#define IO_BITMAP_OFFSET_VALID \ + (offsetof(struct tss_struct, io_bitmap.bitmap) - \ offsetof(struct tss_struct, x86_tss)) /* @@ -356,14 +356,10 @@ struct entry_stack_page { struct entry_stack stack; } __aligned(PAGE_SIZE); -struct tss_struct { - /* - * The fixed hardware portion. This must not cross a page boundary - * at risk of violating the SDM's advice and potentially triggering - * errata. - */ - struct x86_hw_tss x86_tss; - +/* + * All IO bitmap related data stored in the TSS: + */ +struct x86_io_bitmap { /* * Store the dirty size of the last io bitmap offender. The next * one will have to do the cleanup as the switch out to a non io @@ -371,7 +367,7 @@ struct tss_struct { * outside of the TSS limit. So for sane tasks there is no need to * actually touch the io_bitmap at all. */ - unsigned int io_bitmap_prev_max; + unsigned int prev_max; /* * The extra 1 is there because the CPU will access an @@ -379,7 +375,18 @@ struct tss_struct { * bitmap. The extra byte must be all 1 bits, and must * be within the limit. */ - unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; + unsigned long bitmap[IO_BITMAP_LONGS + 1]; +}; + +struct tss_struct { + /* + * The fixed hardware portion. This must not cross a page boundary + * at risk of violating the SDM's advice and potentially triggering + * errata. + */ + struct x86_hw_tss x86_tss; + + struct x86_io_bitmap io_bitmap; } __aligned(PAGE_SIZE); DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8c1000a57b55..3aee167246f7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1861,8 +1861,8 @@ void cpu_init(void) /* Initialize the TSS. */ tss_setup_ist(tss); tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; - tss->io_bitmap_prev_max = 0; - memset(tss->io_bitmap, 0xff, sizeof(tss->io_bitmap)); + tss->io_bitmap.prev_max = 0; + memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap)); set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); load_TR_desc(); diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index eed218a3fd48..80d99bb826f3 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -81,9 +81,9 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) /* Update the TSS */ tss = this_cpu_ptr(&cpu_tss_rw); - memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); + memcpy(tss->io_bitmap.bitmap, t->io_bitmap_ptr, bytes_updated); /* Store the new end of the zero bits */ - tss->io_bitmap_prev_max = bytes; + tss->io_bitmap.prev_max = bytes; /* Make the bitmap base in the TSS valid */ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID; /* Make sure the TSS limit covers the I/O bitmap. */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2444fe296de5..35f1c80df7a8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -374,11 +374,11 @@ static inline void switch_to_bitmap(struct thread_struct *next, * bits permitted, then the copy needs to cover those as * well so they get turned off. */ - memcpy(tss->io_bitmap, next->io_bitmap_ptr, - max(tss->io_bitmap_prev_max, next->io_bitmap_max)); + memcpy(tss->io_bitmap.bitmap, next->io_bitmap_ptr, + max(tss->io_bitmap.prev_max, next->io_bitmap_max)); /* Store the new max and set io_bitmap_base valid */ - tss->io_bitmap_prev_max = next->io_bitmap_max; + tss->io_bitmap.prev_max = next->io_bitmap_max; tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID; /* -- cgit v1.2.3 From 577d5cd7e5851d3832066cd0422475fa7db2ee17 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Nov 2019 23:03:21 +0100 Subject: x86/ioperm: Move iobitmap data into a struct No point in having all the data in thread_struct, especially as upcoming changes add more. Make the bitmap in the new struct accessible as array of longs and as array of characters via a union, so both the bitmap functions and the update logic can avoid type casts. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_bitmap.h | 13 +++++++++++++ arch/x86/include/asm/processor.h | 6 ++---- arch/x86/kernel/ioport.c | 27 ++++++++++++++------------- arch/x86/kernel/process.c | 38 ++++++++++++++++++++------------------ arch/x86/kernel/ptrace.c | 12 ++++++++---- 5 files changed, 57 insertions(+), 39 deletions(-) create mode 100644 arch/x86/include/asm/io_bitmap.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h new file mode 100644 index 000000000000..1a12b9ff5e4e --- /dev/null +++ b/arch/x86/include/asm/io_bitmap.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IOBITMAP_H +#define _ASM_X86_IOBITMAP_H + +#include + +struct io_bitmap { + /* The maximum number of bytes to copy so all zero bits are covered */ + unsigned int max; + unsigned long bitmap[IO_BITMAP_LONGS]; +}; + +#endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index cd7cd7d10b81..c949e0e5cbe6 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -7,6 +7,7 @@ /* Forward declaration, a strange C thing */ struct task_struct; struct mm_struct; +struct io_bitmap; struct vm86; #include @@ -501,10 +502,8 @@ struct thread_struct { struct vm86 *vm86; #endif /* IO permissions: */ - unsigned long *io_bitmap_ptr; + struct io_bitmap *io_bitmap; unsigned long iopl; - /* Max allowed port in the bitmap, in bytes: */ - unsigned io_bitmap_max; mm_segment_t addr_limit; @@ -862,7 +861,6 @@ static inline void spin_lock_prefetch(const void *x) #define INIT_THREAD { \ .sp0 = TOP_OF_INIT_STACK, \ .sysenter_cs = __KERNEL_CS, \ - .io_bitmap_ptr = NULL, \ .addr_limit = KERNEL_DS, \ } diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 80d99bb826f3..05f77f3af46d 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -11,6 +11,7 @@ #include #include +#include #include /* @@ -21,7 +22,7 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) unsigned int i, max_long, bytes, bytes_updated; struct thread_struct *t = ¤t->thread; struct tss_struct *tss; - unsigned long *bitmap; + struct io_bitmap *iobm; if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) return -EINVAL; @@ -34,16 +35,16 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) * IO bitmap up. ioperm() is much less timing critical than clone(), * this is why we delay this operation until now: */ - bitmap = t->io_bitmap_ptr; - if (!bitmap) { + iobm = t->io_bitmap; + if (!iobm) { /* No point to allocate a bitmap just to clear permissions */ if (!turn_on) return 0; - bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); - if (!bitmap) + iobm = kmalloc(sizeof(*iobm), GFP_KERNEL); + if (!iobm) return -ENOMEM; - memset(bitmap, 0xff, IO_BITMAP_BYTES); + memset(iobm->bitmap, 0xff, sizeof(iobm->bitmap)); } /* @@ -52,9 +53,9 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) */ preempt_disable(); if (turn_on) - bitmap_clear(bitmap, from, num); + bitmap_clear(iobm->bitmap, from, num); else - bitmap_set(bitmap, from, num); + bitmap_set(iobm->bitmap, from, num); /* * Search for a (possibly new) maximum. This is simple and stupid, @@ -62,26 +63,26 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) */ max_long = 0; for (i = 0; i < IO_BITMAP_LONGS; i++) { - if (bitmap[i] != ~0UL) + if (iobm->bitmap[i] != ~0UL) max_long = i; } bytes = (max_long + 1) * sizeof(unsigned long); - bytes_updated = max(bytes, t->io_bitmap_max); + bytes_updated = max(bytes, t->io_bitmap->max); /* Update the thread data */ - t->io_bitmap_max = bytes; + iobm->max = bytes; /* * Store the bitmap pointer (might be the same if the task already * head one). Set the TIF flag, just in case this is the first * invocation. */ - t->io_bitmap_ptr = bitmap; + t->io_bitmap = iobm; set_thread_flag(TIF_IO_BITMAP); /* Update the TSS */ tss = this_cpu_ptr(&cpu_tss_rw); - memcpy(tss->io_bitmap.bitmap, t->io_bitmap_ptr, bytes_updated); + memcpy(tss->io_bitmap.bitmap, iobm->bitmap, bytes_updated); /* Store the new end of the zero bits */ tss->io_bitmap.prev_max = bytes; /* Make the bitmap base in the TSS valid */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 35f1c80df7a8..1504fd2b9bc4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include "process.h" @@ -101,21 +102,20 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) void exit_thread(struct task_struct *tsk) { struct thread_struct *t = &tsk->thread; - unsigned long *bp = t->io_bitmap_ptr; + struct io_bitmap *iobm = t->io_bitmap; struct fpu *fpu = &t->fpu; struct tss_struct *tss; - if (bp) { + if (iobm) { preempt_disable(); tss = this_cpu_ptr(&cpu_tss_rw); - t->io_bitmap_ptr = NULL; - t->io_bitmap_max = 0; + t->io_bitmap = NULL; clear_thread_flag(TIF_IO_BITMAP); /* Invalidate the io bitmap base in the TSS */ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; preempt_enable(); - kfree(bp); + kfree(iobm); } free_vm86(t); @@ -135,25 +135,25 @@ static int set_new_tls(struct task_struct *p, unsigned long tls) static inline int copy_io_bitmap(struct task_struct *tsk) { + struct io_bitmap *iobm = current->thread.io_bitmap; + if (likely(!test_tsk_thread_flag(current, TIF_IO_BITMAP))) return 0; - tsk->thread.io_bitmap_ptr = kmemdup(current->thread.io_bitmap_ptr, - IO_BITMAP_BYTES, GFP_KERNEL); - if (!tsk->thread.io_bitmap_ptr) { - tsk->thread.io_bitmap_max = 0; + tsk->thread.io_bitmap = kmemdup(iobm, sizeof(*iobm), GFP_KERNEL); + + if (!tsk->thread.io_bitmap) return -ENOMEM; - } + set_tsk_thread_flag(tsk, TIF_IO_BITMAP); return 0; } static inline void free_io_bitmap(struct task_struct *tsk) { - if (tsk->thread.io_bitmap_ptr) { - kfree(tsk->thread.io_bitmap_ptr); - tsk->thread.io_bitmap_ptr = NULL; - tsk->thread.io_bitmap_max = 0; + if (tsk->thread.io_bitmap) { + kfree(tsk->thread.io_bitmap); + tsk->thread.io_bitmap = NULL; } } @@ -172,7 +172,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, frame->bp = 0; frame->ret_addr = (unsigned long) ret_from_fork; p->thread.sp = (unsigned long) fork_frame; - p->thread.io_bitmap_ptr = NULL; + p->thread.io_bitmap = NULL; memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); #ifdef CONFIG_X86_64 @@ -366,6 +366,8 @@ static inline void switch_to_bitmap(struct thread_struct *next, struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); if (tifn & _TIF_IO_BITMAP) { + struct io_bitmap *iobm = next->io_bitmap; + /* * Copy at least the size of the incoming tasks bitmap * which covers the last permitted I/O port. @@ -374,11 +376,11 @@ static inline void switch_to_bitmap(struct thread_struct *next, * bits permitted, then the copy needs to cover those as * well so they get turned off. */ - memcpy(tss->io_bitmap.bitmap, next->io_bitmap_ptr, - max(tss->io_bitmap.prev_max, next->io_bitmap_max)); + memcpy(tss->io_bitmap.bitmap, next->io_bitmap->bitmap, + max(tss->io_bitmap.prev_max, next->io_bitmap->max)); /* Store the new max and set io_bitmap_base valid */ - tss->io_bitmap.prev_max = next->io_bitmap_max; + tss->io_bitmap.prev_max = next->io_bitmap->max; tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID; /* diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7c526741215b..066e5b01a7e0 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "tls.h" @@ -697,7 +698,9 @@ static int ptrace_set_debugreg(struct task_struct *tsk, int n, static int ioperm_active(struct task_struct *target, const struct user_regset *regset) { - return DIV_ROUND_UP(target->thread.io_bitmap_max, regset->size); + struct io_bitmap *iobm = target->thread.io_bitmap; + + return iobm ? DIV_ROUND_UP(iobm->max, regset->size) : 0; } static int ioperm_get(struct task_struct *target, @@ -705,12 +708,13 @@ static int ioperm_get(struct task_struct *target, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - if (!target->thread.io_bitmap_ptr) + struct io_bitmap *iobm = target->thread.io_bitmap; + + if (!iobm) return -ENXIO; return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - target->thread.io_bitmap_ptr, - 0, IO_BITMAP_BYTES); + iobm->bitmap, 0, IO_BITMAP_BYTES); } /* -- cgit v1.2.3 From 060aa16fdb7c5078a4159a76e5dc87d6a493af9b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Nov 2019 23:03:22 +0100 Subject: x86/ioperm: Add bitmap sequence number Add a globally unique sequence number which is incremented when ioperm() is changing the I/O bitmap of a task. Store the new sequence number in the io_bitmap structure and compare it with the sequence number of the I/O bitmap which was last loaded on a CPU. Only update the bitmap if the sequence is different. That should further reduce the overhead of I/O bitmap scheduling when there are only a few I/O bitmap users on the system. The 64bit sequence counter is sufficient. A wraparound of the sequence counter assuming an ioperm() call every nanosecond would require about 584 years of uptime. Suggested-by: Linus Torvalds Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_bitmap.h | 1 + arch/x86/include/asm/processor.h | 3 +++ arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/ioport.c | 5 +++++ arch/x86/kernel/process.c | 38 ++++++++++++++++++++++++++++---------- 5 files changed, 38 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h index 1a12b9ff5e4e..d63bd5afe671 100644 --- a/arch/x86/include/asm/io_bitmap.h +++ b/arch/x86/include/asm/io_bitmap.h @@ -5,6 +5,7 @@ #include struct io_bitmap { + u64 sequence; /* The maximum number of bytes to copy so all zero bits are covered */ unsigned int max; unsigned long bitmap[IO_BITMAP_LONGS]; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c949e0e5cbe6..40bb0f7bca3f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -361,6 +361,9 @@ struct entry_stack_page { * All IO bitmap related data stored in the TSS: */ struct x86_io_bitmap { + /* The sequence number of the last active bitmap. */ + u64 prev_sequence; + /* * Store the dirty size of the last io bitmap offender. The next * one will have to do the cleanup as the switch out to a non io diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3aee167246f7..79dd544bb974 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1862,6 +1862,7 @@ void cpu_init(void) tss_setup_ist(tss); tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; tss->io_bitmap.prev_max = 0; + tss->io_bitmap.prev_sequence = 0; memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap)); set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 05f77f3af46d..7c1ab5c2b395 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -14,6 +14,8 @@ #include #include +static atomic64_t io_bitmap_sequence; + /* * this changes the io permissions bitmap in the current task. */ @@ -72,6 +74,9 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) /* Update the thread data */ iobm->max = bytes; + /* Update the sequence number to force an update in switch_to() */ + iobm->sequence = atomic64_add_return(1, &io_bitmap_sequence); + /* * Store the bitmap pointer (might be the same if the task already * head one). Set the TIF flag, just in case this is the first diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 1504fd2b9bc4..7c49be90468b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -360,6 +360,28 @@ void arch_setup_new_exec(void) } } +static void switch_to_update_io_bitmap(struct tss_struct *tss, + struct io_bitmap *iobm) +{ + /* + * Copy at least the byte range of the incoming tasks bitmap which + * covers the permitted I/O ports. + * + * If the previous task which used an I/O bitmap had more bits + * permitted, then the copy needs to cover those as well so they + * get turned off. + */ + memcpy(tss->io_bitmap.bitmap, iobm->bitmap, + max(tss->io_bitmap.prev_max, iobm->max)); + + /* + * Store the new max and the sequence number of this bitmap + * and a pointer to the bitmap itself. + */ + tss->io_bitmap.prev_max = iobm->max; + tss->io_bitmap.prev_sequence = iobm->sequence; +} + static inline void switch_to_bitmap(struct thread_struct *next, unsigned long tifp, unsigned long tifn) { @@ -369,18 +391,14 @@ static inline void switch_to_bitmap(struct thread_struct *next, struct io_bitmap *iobm = next->io_bitmap; /* - * Copy at least the size of the incoming tasks bitmap - * which covers the last permitted I/O port. - * - * If the previous task which used an io bitmap had more - * bits permitted, then the copy needs to cover those as - * well so they get turned off. + * Only copy bitmap data when the sequence number + * differs. The update time is accounted to the incoming + * task. */ - memcpy(tss->io_bitmap.bitmap, next->io_bitmap->bitmap, - max(tss->io_bitmap.prev_max, next->io_bitmap->max)); + if (tss->io_bitmap.prev_sequence != iobm->sequence) + switch_to_update_io_bitmap(tss, iobm); - /* Store the new max and set io_bitmap_base valid */ - tss->io_bitmap.prev_max = next->io_bitmap->max; + /* Enable the bitmap */ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID; /* -- cgit v1.2.3 From 22fe5b0439dd53643fd6f4c582c46c6dba0fde53 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Nov 2019 23:03:23 +0100 Subject: x86/ioperm: Move TSS bitmap update to exit to user work There is no point to update the TSS bitmap for tasks which use I/O bitmaps on every context switch. It's enough to update it right before exiting to user space. That reduces the context switch bitmap handling to invalidating the io bitmap base offset in the TSS when the outgoing task has TIF_IO_BITMAP set. The invaldiation is done on purpose when a task with an IO bitmap switches out to prevent any possible leakage of an activated IO bitmap. It also removes the requirement to update the tasks bitmap atomically in ioperm(). Signed-off-by: Thomas Gleixner --- arch/x86/entry/common.c | 4 +++ arch/x86/include/asm/io_bitmap.h | 2 ++ arch/x86/include/asm/thread_info.h | 9 +++--- arch/x86/kernel/ioport.c | 25 +++------------- arch/x86/kernel/process.c | 59 +++++++++++++++++++++++++------------- 5 files changed, 54 insertions(+), 45 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 3f8e22615812..9747876980b5 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -33,6 +33,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -196,6 +197,9 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) /* Reload ti->flags; we may have rescheduled above. */ cached_flags = READ_ONCE(ti->flags); + if (unlikely(cached_flags & _TIF_IO_BITMAP)) + tss_update_io_bitmap(); + fpregs_assert_state_consistent(); if (unlikely(cached_flags & _TIF_NEED_FPU_LOAD)) switch_fpu_return(); diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h index d63bd5afe671..6d82a37cb17c 100644 --- a/arch/x86/include/asm/io_bitmap.h +++ b/arch/x86/include/asm/io_bitmap.h @@ -11,4 +11,6 @@ struct io_bitmap { unsigned long bitmap[IO_BITMAP_LONGS]; }; +void tss_update_io_bitmap(void); + #endif diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index f9453536f9bb..0accf44878a5 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -143,8 +143,8 @@ struct thread_info { _TIF_NOHZ) /* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW_BASE \ - (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \ +#define _TIF_WORK_CTXSW_BASE \ + (_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \ _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE) /* @@ -156,8 +156,9 @@ struct thread_info { # define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE) #endif -#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) -#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) +#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY | \ + _TIF_IO_BITMAP) +#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) #define STACK_WARN (THREAD_SIZE/8) diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 7c1ab5c2b395..198beadb3732 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -21,9 +21,8 @@ static atomic64_t io_bitmap_sequence; */ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) { - unsigned int i, max_long, bytes, bytes_updated; struct thread_struct *t = ¤t->thread; - struct tss_struct *tss; + unsigned int i, max_long; struct io_bitmap *iobm; if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) @@ -50,10 +49,9 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) } /* - * Update the bitmap and the TSS copy with preemption disabled to - * prevent a race against context switch. + * Update the tasks bitmap. The update of the TSS bitmap happens on + * exit to user mode. So this needs no protection. */ - preempt_disable(); if (turn_on) bitmap_clear(iobm->bitmap, from, num); else @@ -69,11 +67,8 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) max_long = i; } - bytes = (max_long + 1) * sizeof(unsigned long); - bytes_updated = max(bytes, t->io_bitmap->max); + iobm->max = (max_long + 1) * sizeof(unsigned long); - /* Update the thread data */ - iobm->max = bytes; /* Update the sequence number to force an update in switch_to() */ iobm->sequence = atomic64_add_return(1, &io_bitmap_sequence); @@ -85,18 +80,6 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) t->io_bitmap = iobm; set_thread_flag(TIF_IO_BITMAP); - /* Update the TSS */ - tss = this_cpu_ptr(&cpu_tss_rw); - memcpy(tss->io_bitmap.bitmap, iobm->bitmap, bytes_updated); - /* Store the new end of the zero bits */ - tss->io_bitmap.prev_max = bytes; - /* Make the bitmap base in the TSS valid */ - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID; - /* Make sure the TSS limit covers the I/O bitmap. */ - refresh_tss_limit(); - - preempt_enable(); - return 0; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7c49be90468b..108af913ab3c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -360,8 +360,34 @@ void arch_setup_new_exec(void) } } -static void switch_to_update_io_bitmap(struct tss_struct *tss, - struct io_bitmap *iobm) +static inline void tss_invalidate_io_bitmap(struct tss_struct *tss) +{ + /* + * Invalidate the I/O bitmap by moving io_bitmap_base outside the + * TSS limit so any subsequent I/O access from user space will + * trigger a #GP. + * + * This is correct even when VMEXIT rewrites the TSS limit + * to 0x67 as the only requirement is that the base points + * outside the limit. + */ + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; +} + +static inline void switch_to_bitmap(unsigned long tifp) +{ + /* + * Invalidate I/O bitmap if the previous task used it. This prevents + * any possible leakage of an active I/O bitmap. + * + * If the next task has an I/O bitmap it will handle it on exit to + * user mode. + */ + if (tifp & _TIF_IO_BITMAP) + tss_invalidate_io_bitmap(this_cpu_ptr(&cpu_tss_rw)); +} + +static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm) { /* * Copy at least the byte range of the incoming tasks bitmap which @@ -382,13 +408,15 @@ static void switch_to_update_io_bitmap(struct tss_struct *tss, tss->io_bitmap.prev_sequence = iobm->sequence; } -static inline void switch_to_bitmap(struct thread_struct *next, - unsigned long tifp, unsigned long tifn) +/** + * tss_update_io_bitmap - Update I/O bitmap before exiting to usermode + */ +void tss_update_io_bitmap(void) { struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); - if (tifn & _TIF_IO_BITMAP) { - struct io_bitmap *iobm = next->io_bitmap; + if (test_thread_flag(TIF_IO_BITMAP)) { + struct io_bitmap *iobm = current->thread.io_bitmap; /* * Only copy bitmap data when the sequence number @@ -396,7 +424,7 @@ static inline void switch_to_bitmap(struct thread_struct *next, * task. */ if (tss->io_bitmap.prev_sequence != iobm->sequence) - switch_to_update_io_bitmap(tss, iobm); + tss_copy_io_bitmap(tss, iobm); /* Enable the bitmap */ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID; @@ -409,18 +437,8 @@ static inline void switch_to_bitmap(struct thread_struct *next, * limit. */ refresh_tss_limit(); - } else if (tifp & _TIF_IO_BITMAP) { - /* - * Do not touch the bitmap. Let the next bitmap using task - * deal with the mess. Just make the io_bitmap_base invalid - * by moving it outside the TSS limit so any subsequent I/O - * access from user space will trigger a #GP. - * - * This is correct even when VMEXIT rewrites the TSS limit - * to 0x67 as the only requirement is that the base points - * outside the limit. - */ - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; + } else { + tss_invalidate_io_bitmap(tss); } } @@ -634,7 +652,8 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) tifn = READ_ONCE(task_thread_info(next_p)->flags); tifp = READ_ONCE(task_thread_info(prev_p)->flags); - switch_to_bitmap(next, tifp, tifn); + + switch_to_bitmap(tifp); propagate_user_return_notify(prev_p, next_p); -- cgit v1.2.3 From ea5f1cd7ab494f65f50f338299eabb40ad6a1767 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Nov 2019 23:03:24 +0100 Subject: x86/ioperm: Remove bitmap if all permissions dropped If ioperm() results in a bitmap with all bits set (no permissions to any I/O port), then handling that bitmap on context switch and exit to user mode is pointless. Drop it. Move the bitmap exit handling to the ioport code and reuse it for both the thread exit path and dropping it. This allows to reuse this code for the upcoming iopl() emulation. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski --- arch/x86/include/asm/io_bitmap.h | 2 ++ arch/x86/kernel/ioport.c | 19 ++++++++++++++++++- arch/x86/kernel/process.c | 17 +++-------------- 3 files changed, 23 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h index 6d82a37cb17c..784a88edb25d 100644 --- a/arch/x86/include/asm/io_bitmap.h +++ b/arch/x86/include/asm/io_bitmap.h @@ -11,6 +11,8 @@ struct io_bitmap { unsigned long bitmap[IO_BITMAP_LONGS]; }; +void io_bitmap_exit(void); + void tss_update_io_bitmap(void); #endif diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 198beadb3732..f9fc69aeb033 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -16,6 +16,18 @@ static atomic64_t io_bitmap_sequence; +void io_bitmap_exit(void) +{ + struct io_bitmap *iobm = current->thread.io_bitmap; + + current->thread.io_bitmap = NULL; + clear_thread_flag(TIF_IO_BITMAP); + preempt_disable(); + tss_update_io_bitmap(); + preempt_enable(); + kfree(iobm); +} + /* * this changes the io permissions bitmap in the current task. */ @@ -61,11 +73,16 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) * Search for a (possibly new) maximum. This is simple and stupid, * to keep it obviously correct: */ - max_long = 0; + max_long = UINT_MAX; for (i = 0; i < IO_BITMAP_LONGS; i++) { if (iobm->bitmap[i] != ~0UL) max_long = i; } + /* All permissions dropped? */ + if (max_long == UINT_MAX) { + io_bitmap_exit(); + return 0; + } iobm->max = (max_long + 1) * sizeof(unsigned long); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 108af913ab3c..7ba4d54aec17 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -102,21 +102,10 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) void exit_thread(struct task_struct *tsk) { struct thread_struct *t = &tsk->thread; - struct io_bitmap *iobm = t->io_bitmap; struct fpu *fpu = &t->fpu; - struct tss_struct *tss; - - if (iobm) { - preempt_disable(); - tss = this_cpu_ptr(&cpu_tss_rw); - - t->io_bitmap = NULL; - clear_thread_flag(TIF_IO_BITMAP); - /* Invalidate the io bitmap base in the TSS */ - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; - preempt_enable(); - kfree(iobm); - } + + if (test_thread_flag(TIF_IO_BITMAP)) + io_bitmap_exit(); free_vm86(t); -- cgit v1.2.3 From 4804e382c117ce213cd5c43512cf4b1d71bb2650 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Nov 2019 23:03:25 +0100 Subject: x86/ioperm: Share I/O bitmap if identical The I/O bitmap is duplicated on fork. That's wasting memory and slows down fork. There is no point to do so. As long as the bitmap is not modified it can be shared between threads and processes. Add a refcount and just share it on fork. If a task modifies the bitmap then it has to do the duplication if and only if it is shared. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski --- arch/x86/include/asm/io_bitmap.h | 5 +++++ arch/x86/kernel/ioport.c | 48 ++++++++++++++++++++++++++++++++-------- arch/x86/kernel/process.c | 39 +++++--------------------------- 3 files changed, 50 insertions(+), 42 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h index 784a88edb25d..b664baadf736 100644 --- a/arch/x86/include/asm/io_bitmap.h +++ b/arch/x86/include/asm/io_bitmap.h @@ -2,15 +2,20 @@ #ifndef _ASM_X86_IOBITMAP_H #define _ASM_X86_IOBITMAP_H +#include #include struct io_bitmap { u64 sequence; + refcount_t refcnt; /* The maximum number of bytes to copy so all zero bits are covered */ unsigned int max; unsigned long bitmap[IO_BITMAP_LONGS]; }; +struct task_struct; + +void io_bitmap_share(struct task_struct *tsk); void io_bitmap_exit(void); void tss_update_io_bitmap(void); diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index f9fc69aeb033..f82ca1c62e3e 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -16,6 +16,17 @@ static atomic64_t io_bitmap_sequence; +void io_bitmap_share(struct task_struct *tsk) + { + /* + * Take a refcount on current's bitmap. It can be used by + * both tasks as long as none of them changes the bitmap. + */ + refcount_inc(¤t->thread.io_bitmap->refcnt); + tsk->thread.io_bitmap = current->thread.io_bitmap; + set_tsk_thread_flag(tsk, TIF_IO_BITMAP); +} + void io_bitmap_exit(void) { struct io_bitmap *iobm = current->thread.io_bitmap; @@ -25,7 +36,8 @@ void io_bitmap_exit(void) preempt_disable(); tss_update_io_bitmap(); preempt_enable(); - kfree(iobm); + if (iobm && refcount_dec_and_test(&iobm->refcnt)) + kfree(iobm); } /* @@ -58,8 +70,31 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) return -ENOMEM; memset(iobm->bitmap, 0xff, sizeof(iobm->bitmap)); + refcount_set(&iobm->refcnt, 1); + } + + /* + * If the bitmap is not shared, then nothing can take a refcount as + * current can obviously not fork at the same time. If it's shared + * duplicate it and drop the refcount on the original one. + */ + if (refcount_read(&iobm->refcnt) > 1) { + iobm = kmemdup(iobm, sizeof(*iobm), GFP_KERNEL); + if (!iobm) + return -ENOMEM; + refcount_set(&iobm->refcnt, 1); + io_bitmap_exit(); } + /* + * Store the bitmap pointer (might be the same if the task already + * head one). Must be done here so freeing the bitmap when all + * permissions are dropped has the pointer set up. + */ + t->io_bitmap = iobm; + /* Mark it active for context switching and exit to user mode */ + set_thread_flag(TIF_IO_BITMAP); + /* * Update the tasks bitmap. The update of the TSS bitmap happens on * exit to user mode. So this needs no protection. @@ -86,16 +121,11 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) iobm->max = (max_long + 1) * sizeof(unsigned long); - /* Update the sequence number to force an update in switch_to() */ - iobm->sequence = atomic64_add_return(1, &io_bitmap_sequence); - /* - * Store the bitmap pointer (might be the same if the task already - * head one). Set the TIF flag, just in case this is the first - * invocation. + * Update the sequence number to force a TSS update on return to + * user mode. */ - t->io_bitmap = iobm; - set_thread_flag(TIF_IO_BITMAP); + iobm->sequence = atomic64_add_return(1, &io_bitmap_sequence); return 0; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7ba4d54aec17..0b19c137c442 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -122,37 +122,13 @@ static int set_new_tls(struct task_struct *p, unsigned long tls) return do_set_thread_area_64(p, ARCH_SET_FS, tls); } -static inline int copy_io_bitmap(struct task_struct *tsk) -{ - struct io_bitmap *iobm = current->thread.io_bitmap; - - if (likely(!test_tsk_thread_flag(current, TIF_IO_BITMAP))) - return 0; - - tsk->thread.io_bitmap = kmemdup(iobm, sizeof(*iobm), GFP_KERNEL); - - if (!tsk->thread.io_bitmap) - return -ENOMEM; - - set_tsk_thread_flag(tsk, TIF_IO_BITMAP); - return 0; -} - -static inline void free_io_bitmap(struct task_struct *tsk) -{ - if (tsk->thread.io_bitmap) { - kfree(tsk->thread.io_bitmap); - tsk->thread.io_bitmap = NULL; - } -} - int copy_thread_tls(unsigned long clone_flags, unsigned long sp, unsigned long arg, struct task_struct *p, unsigned long tls) { struct inactive_task_frame *frame; struct fork_frame *fork_frame; struct pt_regs *childregs; - int ret; + int ret = 0; childregs = task_pt_regs(p); fork_frame = container_of(childregs, struct fork_frame, regs); @@ -199,16 +175,13 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, task_user_gs(p) = get_user_gs(current_pt_regs()); #endif - ret = copy_io_bitmap(p); - if (ret) - return ret; - /* Set a new TLS for the child thread? */ - if (clone_flags & CLONE_SETTLS) { + if (clone_flags & CLONE_SETTLS) ret = set_new_tls(p, tls); - if (ret) - free_io_bitmap(p); - } + + if (!ret && unlikely(test_tsk_thread_flag(current, TIF_IO_BITMAP))) + io_bitmap_share(p); + return ret; } -- cgit v1.2.3 From be9afb4b529d9e3a68da1212e33be677bbfc8d2c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Nov 2019 23:03:27 +0100 Subject: x86/iopl: Fixup misleading comment The comment for the sys_iopl() implementation is outdated and actively misleading in some parts. Fix it up. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski --- arch/x86/kernel/ioport.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index f82ca1c62e3e..3548563b0935 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -41,7 +41,7 @@ void io_bitmap_exit(void) } /* - * this changes the io permissions bitmap in the current task. + * This changes the io permissions bitmap in the current task. */ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) { @@ -136,14 +136,24 @@ SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) } /* - * sys_iopl has to be used when you want to access the IO ports - * beyond the 0x3ff range: to get the full 65536 ports bitmapped - * you'd need 8kB of bitmaps/process, which is a bit excessive. + * The sys_iopl functionality depends on the level argument, which if + * granted for the task is used by the CPU to check I/O instruction and + * CLI/STI against the current priviledge level (CPL). If CPL is less than + * or equal the tasks IOPL level the instructions take effect. If not a #GP + * is raised. The default IOPL is 0, i.e. no permissions. * - * Here we just change the flags value on the stack: we allow - * only the super-user to do it. This depends on the stack-layout - * on system-call entry - see also fork() and the signal handling - * code. + * Setting IOPL to level 0-2 is disabling the userspace access. Only level + * 3 enables it. If set it allows the user space thread: + * + * - Unrestricted access to all 65535 I/O ports + * - The usage of CLI/STI instructions + * + * The advantage over ioperm is that the context switch does not require to + * update the I/O bitmap which is especially true when a large number of + * ports is accessed. But the allowance of CLI/STI in userspace is + * considered a major problem. + * + * IOPL is strictly per thread and inherited on fork. */ SYSCALL_DEFINE1(iopl, unsigned int, level) { @@ -164,9 +174,18 @@ SYSCALL_DEFINE1(iopl, unsigned int, level) security_locked_down(LOCKDOWN_IOPORT)) return -EPERM; } + /* + * Change the flags value on the return stack, which has been set + * up on system-call entry. See also the fork and signal handling + * code how this is handled. + */ regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << X86_EFLAGS_IOPL_BIT); + /* Store the new level in the thread struct */ t->iopl = level << X86_EFLAGS_IOPL_BIT; + /* + * X86_32 switches immediately and XEN handles it via emulation. + */ set_iopl_mask(t->iopl); return 0; -- cgit v1.2.3 From c8137ace56383688af911fea5934c71ad158135e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Nov 2019 23:03:28 +0100 Subject: x86/iopl: Restrict iopl() permission scope The access to the full I/O port range can be also provided by the TSS I/O bitmap, but that would require to copy 8k of data on scheduling in the task. As shown with the sched out optimization TSS.io_bitmap_base can be used to switch the incoming task to a preallocated I/O bitmap which has all bits zero, i.e. allows access to all I/O ports. Implementing this allows to provide an iopl() emulation mode which restricts the IOPL level 3 permissions to I/O port access but removes the STI/CLI permission which is coming with the hardware IOPL mechansim. Provide a config option to switch IOPL to emulation mode, make it the default and while at it also provide an option to disable IOPL completely. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski --- arch/x86/Kconfig | 32 ++++++++++++ arch/x86/include/asm/pgtable_32_types.h | 2 +- arch/x86/include/asm/processor.h | 28 ++++++++--- arch/x86/kernel/cpu/common.c | 5 ++ arch/x86/kernel/ioport.c | 87 +++++++++++++++++++++++---------- arch/x86/kernel/process.c | 32 +++++++----- 6 files changed, 139 insertions(+), 47 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d6e1faa28c58..2aad1cd14cc5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1254,6 +1254,38 @@ config X86_VSYSCALL_EMULATION Disabling this option saves about 7K of kernel size and possibly 4K of additional runtime pagetable memory. +choice + prompt "IOPL" + default X86_IOPL_EMULATION + +config X86_IOPL_EMULATION + bool "IOPL Emulation" + ---help--- + Legacy IOPL support is an overbroad mechanism which allows user + space aside of accessing all 65536 I/O ports also to disable + interrupts. To gain this access the caller needs CAP_SYS_RAWIO + capabilities and permission from potentially active security + modules. + + The emulation restricts the functionality of the syscall to + only allowing the full range I/O port access, but prevents the + ability to disable interrupts from user space. + +config X86_IOPL_LEGACY + bool "IOPL Legacy" + ---help--- + Allow the full IOPL permissions, i.e. user space access to all + 65536 I/O ports and also the ability to disable interrupts, which + is overbroad and can result in system lockups. + +config X86_IOPL_NONE + bool "IOPL None" + ---help--- + Disable the IOPL permission syscall. That's the safest option as + no sane application should depend on this functionality. + +endchoice + config TOSHIBA tristate "Toshiba Laptop support" depends on X86_32 diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index b0bc0fff5f1f..0fab4bfb4df2 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -44,7 +44,7 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c * to avoid include recursion hell */ -#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) +#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 41) #define CPU_ENTRY_AREA_BASE \ ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 40bb0f7bca3f..b0e02aa3f46a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -332,19 +332,21 @@ struct x86_hw_tss { #define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE) #define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long)) -#define IO_BITMAP_OFFSET_VALID \ +#define IO_BITMAP_OFFSET_VALID_MAP \ (offsetof(struct tss_struct, io_bitmap.bitmap) - \ offsetof(struct tss_struct, x86_tss)) +#define IO_BITMAP_OFFSET_VALID_ALL \ + (offsetof(struct tss_struct, io_bitmap.mapall) - \ + offsetof(struct tss_struct, x86_tss)) + /* - * sizeof(unsigned long) coming from an extra "long" at the end - * of the iobitmap. - * - * -1? seg base+limit should be pointing to the address of the - * last valid byte + * sizeof(unsigned long) coming from an extra "long" at the end of the + * iobitmap. The limit is inclusive, i.e. the last valid byte. */ #define __KERNEL_TSS_LIMIT \ - (IO_BITMAP_OFFSET_VALID + IO_BITMAP_BYTES + sizeof(unsigned long) - 1) + (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \ + sizeof(unsigned long) - 1) /* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */ #define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1) @@ -380,6 +382,12 @@ struct x86_io_bitmap { * be within the limit. */ unsigned long bitmap[IO_BITMAP_LONGS + 1]; + + /* + * Special I/O bitmap to emulate IOPL(3). All bytes zero, + * except the additional byte at the end. + */ + unsigned long mapall[IO_BITMAP_LONGS + 1]; }; struct tss_struct { @@ -506,7 +514,13 @@ struct thread_struct { #endif /* IO permissions: */ struct io_bitmap *io_bitmap; + + /* + * IOPL. Priviledge level dependent I/O permission which includes + * user space CLI/STI when granted. + */ unsigned long iopl; + unsigned long iopl_emul; mm_segment_t addr_limit; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 79dd544bb974..7bf402be13bb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1864,6 +1864,11 @@ void cpu_init(void) tss->io_bitmap.prev_max = 0; tss->io_bitmap.prev_sequence = 0; memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap)); + /* + * Invalidate the extra array entry past the end of the all + * permission bitmap as required by the hardware. + */ + tss->io_bitmap.mapall[IO_BITMAP_LONGS] = ~0UL; set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); load_TR_desc(); diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 3548563b0935..9ed9458e02df 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -17,25 +17,41 @@ static atomic64_t io_bitmap_sequence; void io_bitmap_share(struct task_struct *tsk) - { - /* - * Take a refcount on current's bitmap. It can be used by - * both tasks as long as none of them changes the bitmap. - */ - refcount_inc(¤t->thread.io_bitmap->refcnt); - tsk->thread.io_bitmap = current->thread.io_bitmap; +{ + /* Can be NULL when current->thread.iopl_emul == 3 */ + if (current->thread.io_bitmap) { + /* + * Take a refcount on current's bitmap. It can be used by + * both tasks as long as none of them changes the bitmap. + */ + refcount_inc(¤t->thread.io_bitmap->refcnt); + tsk->thread.io_bitmap = current->thread.io_bitmap; + } set_tsk_thread_flag(tsk, TIF_IO_BITMAP); } +static void task_update_io_bitmap(void) +{ + struct thread_struct *t = ¤t->thread; + + if (t->iopl_emul == 3 || t->io_bitmap) { + /* TSS update is handled on exit to user space */ + set_thread_flag(TIF_IO_BITMAP); + } else { + clear_thread_flag(TIF_IO_BITMAP); + /* Invalidate TSS */ + preempt_disable(); + tss_update_io_bitmap(); + preempt_enable(); + } +} + void io_bitmap_exit(void) { struct io_bitmap *iobm = current->thread.io_bitmap; current->thread.io_bitmap = NULL; - clear_thread_flag(TIF_IO_BITMAP); - preempt_disable(); - tss_update_io_bitmap(); - preempt_enable(); + task_update_io_bitmap(); if (iobm && refcount_dec_and_test(&iobm->refcnt)) kfree(iobm); } @@ -157,36 +173,55 @@ SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) */ SYSCALL_DEFINE1(iopl, unsigned int, level) { - struct pt_regs *regs = current_pt_regs(); struct thread_struct *t = ¤t->thread; + struct pt_regs *regs = current_pt_regs(); + unsigned int old; /* * Careful: the IOPL bits in regs->flags are undefined under Xen PV * and changing them has no effect. */ - unsigned int old = t->iopl >> X86_EFLAGS_IOPL_BIT; + if (IS_ENABLED(CONFIG_X86_IOPL_NONE)) + return -ENOSYS; if (level > 3) return -EINVAL; + + if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION)) + old = t->iopl_emul; + else + old = t->iopl >> X86_EFLAGS_IOPL_BIT; + + /* No point in going further if nothing changes */ + if (level == old) + return 0; + /* Trying to gain more privileges? */ if (level > old) { if (!capable(CAP_SYS_RAWIO) || security_locked_down(LOCKDOWN_IOPORT)) return -EPERM; } - /* - * Change the flags value on the return stack, which has been set - * up on system-call entry. See also the fork and signal handling - * code how this is handled. - */ - regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | - (level << X86_EFLAGS_IOPL_BIT); - /* Store the new level in the thread struct */ - t->iopl = level << X86_EFLAGS_IOPL_BIT; - /* - * X86_32 switches immediately and XEN handles it via emulation. - */ - set_iopl_mask(t->iopl); + + if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION)) { + t->iopl_emul = level; + task_update_io_bitmap(); + } else { + /* + * Change the flags value on the return stack, which has + * been set up on system-call entry. See also the fork and + * signal handling code how this is handled. + */ + regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | + (level << X86_EFLAGS_IOPL_BIT); + /* Store the new level in the thread struct */ + t->iopl = level << X86_EFLAGS_IOPL_BIT; + /* + * X86_32 switches immediately and XEN handles it via + * emulation. + */ + set_iopl_mask(t->iopl); + } return 0; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0b19c137c442..8a844a5d5ae8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -376,21 +376,27 @@ static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm) void tss_update_io_bitmap(void) { struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); + u16 *base = &tss->x86_tss.io_bitmap_base; if (test_thread_flag(TIF_IO_BITMAP)) { - struct io_bitmap *iobm = current->thread.io_bitmap; - - /* - * Only copy bitmap data when the sequence number - * differs. The update time is accounted to the incoming - * task. - */ - if (tss->io_bitmap.prev_sequence != iobm->sequence) - tss_copy_io_bitmap(tss, iobm); - - /* Enable the bitmap */ - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID; - + struct thread_struct *t = ¤t->thread; + + if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION) && + t->iopl_emul == 3) { + *base = IO_BITMAP_OFFSET_VALID_ALL; + } else { + struct io_bitmap *iobm = t->io_bitmap; + /* + * Only copy bitmap data when the sequence number + * differs. The update time is accounted to the + * incoming task. + */ + if (tss->io_bitmap.prev_sequence != iobm->sequence) + tss_copy_io_bitmap(tss, iobm); + + /* Enable the bitmap */ + *base = IO_BITMAP_OFFSET_VALID_MAP; + } /* * Make sure that the TSS limit is covering the io bitmap. * It might have been cut down by a VMEXIT to 0x67 which -- cgit v1.2.3 From a24ca9976843156eabbc5f2d798954b5674d1b61 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Nov 2019 23:03:29 +0100 Subject: x86/iopl: Remove legacy IOPL option The IOPL emulation via the I/O bitmap is sufficient. Remove the legacy cruft dealing with the (e)flags based IOPL mechanism. Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross (Paravirt and Xen parts) Acked-by: Andy Lutomirski --- arch/x86/Kconfig | 23 +++-------------- arch/x86/include/asm/paravirt.h | 4 --- arch/x86/include/asm/paravirt_types.h | 2 -- arch/x86/include/asm/processor.h | 26 +++---------------- arch/x86/include/asm/xen/hypervisor.h | 2 -- arch/x86/kernel/ioport.c | 47 ++++++++--------------------------- arch/x86/kernel/paravirt.c | 2 -- arch/x86/kernel/process_32.c | 9 ------- arch/x86/kernel/process_64.c | 11 -------- arch/x86/xen/enlighten_pv.c | 10 -------- 10 files changed, 17 insertions(+), 119 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2aad1cd14cc5..1f926e396ec1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1254,12 +1254,9 @@ config X86_VSYSCALL_EMULATION Disabling this option saves about 7K of kernel size and possibly 4K of additional runtime pagetable memory. -choice - prompt "IOPL" - default X86_IOPL_EMULATION - config X86_IOPL_EMULATION bool "IOPL Emulation" + default y ---help--- Legacy IOPL support is an overbroad mechanism which allows user space aside of accessing all 65536 I/O ports also to disable @@ -1269,22 +1266,8 @@ config X86_IOPL_EMULATION The emulation restricts the functionality of the syscall to only allowing the full range I/O port access, but prevents the - ability to disable interrupts from user space. - -config X86_IOPL_LEGACY - bool "IOPL Legacy" - ---help--- - Allow the full IOPL permissions, i.e. user space access to all - 65536 I/O ports and also the ability to disable interrupts, which - is overbroad and can result in system lockups. - -config X86_IOPL_NONE - bool "IOPL None" - ---help--- - Disable the IOPL permission syscall. That's the safest option as - no sane application should depend on this functionality. - -endchoice + ability to disable interrupts from user space which would be + granted if the hardware IOPL mechanism would be used. config TOSHIBA tristate "Toshiba Laptop support" diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 69089d46f128..86e7317eb31f 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -294,10 +294,6 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) { PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g); } -static inline void set_iopl_mask(unsigned mask) -{ - PVOP_VCALL1(cpu.set_iopl_mask, mask); -} static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 70b654f3ffe5..84812964d3dd 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -140,8 +140,6 @@ struct pv_cpu_ops { void (*load_sp0)(unsigned long sp0); - void (*set_iopl_mask)(unsigned mask); - void (*wbinvd)(void); /* cpuid emulation, mostly so that caps bits can be disabled */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b0e02aa3f46a..1387d31c5e07 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -516,10 +516,10 @@ struct thread_struct { struct io_bitmap *io_bitmap; /* - * IOPL. Priviledge level dependent I/O permission which includes - * user space CLI/STI when granted. + * IOPL. Priviledge level dependent I/O permission which is + * emulated via the I/O bitmap to prevent user space from disabling + * interrupts. */ - unsigned long iopl; unsigned long iopl_emul; mm_segment_t addr_limit; @@ -552,25 +552,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, */ #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ -/* - * Set IOPL bits in EFLAGS from given mask - */ -static inline void native_set_iopl_mask(unsigned mask) -{ -#ifdef CONFIG_X86_32 - unsigned int reg; - - asm volatile ("pushfl;" - "popl %0;" - "andl %1, %0;" - "orl %2, %0;" - "pushl %0;" - "popfl" - : "=&r" (reg) - : "i" (~X86_EFLAGS_IOPL), "r" (mask)); -#endif -} - static inline void native_load_sp0(unsigned long sp0) { @@ -610,7 +591,6 @@ static inline void load_sp0(unsigned long sp0) native_load_sp0(sp0); } -#define set_iopl_mask native_set_iopl_mask #endif /* CONFIG_PARAVIRT_XXL */ /* Free all resources held by a thread. */ diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 42e1245af0d8..ff4b52e37e60 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -62,6 +62,4 @@ void xen_arch_register_cpu(int num); void xen_arch_unregister_cpu(int num); #endif -extern void xen_set_iopl_mask(unsigned mask); - #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 9ed9458e02df..d5dcde972c42 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -153,28 +153,23 @@ SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) /* * The sys_iopl functionality depends on the level argument, which if - * granted for the task is used by the CPU to check I/O instruction and - * CLI/STI against the current priviledge level (CPL). If CPL is less than - * or equal the tasks IOPL level the instructions take effect. If not a #GP - * is raised. The default IOPL is 0, i.e. no permissions. + * granted for the task is used to enable access to all 65536 I/O ports. * - * Setting IOPL to level 0-2 is disabling the userspace access. Only level - * 3 enables it. If set it allows the user space thread: + * This does not use the IOPL mechanism provided by the CPU as that would + * also allow the user space task to use the CLI/STI instructions. * - * - Unrestricted access to all 65535 I/O ports - * - The usage of CLI/STI instructions + * Disabling interrupts in a user space task is dangerous as it might lock + * up the machine and the semantics vs. syscalls and exceptions is + * undefined. * - * The advantage over ioperm is that the context switch does not require to - * update the I/O bitmap which is especially true when a large number of - * ports is accessed. But the allowance of CLI/STI in userspace is - * considered a major problem. + * Setting IOPL to level 0-2 is disabling I/O permissions. Level 3 + * 3 enables them. * * IOPL is strictly per thread and inherited on fork. */ SYSCALL_DEFINE1(iopl, unsigned int, level) { struct thread_struct *t = ¤t->thread; - struct pt_regs *regs = current_pt_regs(); unsigned int old; /* @@ -187,10 +182,7 @@ SYSCALL_DEFINE1(iopl, unsigned int, level) if (level > 3) return -EINVAL; - if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION)) - old = t->iopl_emul; - else - old = t->iopl >> X86_EFLAGS_IOPL_BIT; + old = t->iopl_emul; /* No point in going further if nothing changes */ if (level == old) @@ -203,25 +195,8 @@ SYSCALL_DEFINE1(iopl, unsigned int, level) return -EPERM; } - if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION)) { - t->iopl_emul = level; - task_update_io_bitmap(); - } else { - /* - * Change the flags value on the return stack, which has - * been set up on system-call entry. See also the fork and - * signal handling code how this is handled. - */ - regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | - (level << X86_EFLAGS_IOPL_BIT); - /* Store the new level in the thread struct */ - t->iopl = level << X86_EFLAGS_IOPL_BIT; - /* - * X86_32 switches immediately and XEN handles it via - * emulation. - */ - set_iopl_mask(t->iopl); - } + t->iopl_emul = level; + task_update_io_bitmap(); return 0; } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 59d3d2763a9e..789f5e4f89de 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -341,8 +341,6 @@ struct paravirt_patch_template pv_ops = { .cpu.iret = native_iret, .cpu.swapgs = native_swapgs, - .cpu.set_iopl_mask = native_set_iopl_mask, - .cpu.start_context_switch = paravirt_nop, .cpu.end_context_switch = paravirt_nop, diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 6c7d90527156..323499f48858 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -187,15 +187,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ load_TLS(next, cpu); - /* - * Restore IOPL if needed. In normal use, the flags restore - * in the switch assembly will handle this. But if the kernel - * is running virtualized at a non-zero CPL, the popf will - * not restore flags, so it must be done in a separate step. - */ - if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) - set_iopl_mask(next->iopl); - switch_to_extra(prev_p, next_p); /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e93a1b8fd7f9..506d66830d4d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -497,17 +497,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) switch_to_extra(prev_p, next_p); -#ifdef CONFIG_XEN_PV - /* - * On Xen PV, IOPL bits in pt_regs->flags have no effect, and - * current_pt_regs()->flags may not match the current task's - * intended IOPL. We need to switch it manually. - */ - if (unlikely(static_cpu_has(X86_FEATURE_XENPV) && - prev->iopl != next->iopl)) - xen_set_iopl_mask(next->iopl); -#endif - if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) { /* * AMD CPUs have a misfeature: SYSRET sets the SS selector but diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5bfea374a160..ae4a41ca19f6 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -837,15 +837,6 @@ static void xen_load_sp0(unsigned long sp0) this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); } -void xen_set_iopl_mask(unsigned mask) -{ - struct physdev_set_iopl set_iopl; - - /* Force the change at ring 0. */ - set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; - HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); -} - static void xen_io_delay(void) { } @@ -1055,7 +1046,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .write_idt_entry = xen_write_idt_entry, .load_sp0 = xen_load_sp0, - .set_iopl_mask = xen_set_iopl_mask, .io_delay = xen_io_delay, /* Xen takes care of %gs when switching to usermode for us */ -- cgit v1.2.3 From 111e7b15cf10f6e973ccf537c70c66a5de539060 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Nov 2019 21:40:33 +0100 Subject: x86/ioperm: Extend IOPL config to control ioperm() as well If iopl() is disabled, then providing ioperm() does not make much sense. Rename the config option and disable/enable both syscalls with it. Guard the code with #ifdefs where appropriate. Suggested-by: Andy Lutomirski Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 7 +++++-- arch/x86/include/asm/io_bitmap.h | 6 ++++++ arch/x86/include/asm/processor.h | 9 ++++++++- arch/x86/include/asm/thread_info.h | 7 ++++++- arch/x86/kernel/cpu/common.c | 26 +++++++++++++++++--------- arch/x86/kernel/ioport.c | 26 +++++++++++++++++++------- arch/x86/kernel/process.c | 4 ++++ 7 files changed, 65 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1f926e396ec1..b162ce1482fc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1254,10 +1254,13 @@ config X86_VSYSCALL_EMULATION Disabling this option saves about 7K of kernel size and possibly 4K of additional runtime pagetable memory. -config X86_IOPL_EMULATION - bool "IOPL Emulation" +config X86_IOPL_IOPERM + bool "IOPERM and IOPL Emulation" default y ---help--- + This enables the ioperm() and iopl() syscalls which are necessary + for legacy applications. + Legacy IOPL support is an overbroad mechanism which allows user space aside of accessing all 65536 I/O ports also to disable interrupts. To gain this access the caller needs CAP_SYS_RAWIO diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h index b664baadf736..02c6ef8f7667 100644 --- a/arch/x86/include/asm/io_bitmap.h +++ b/arch/x86/include/asm/io_bitmap.h @@ -15,9 +15,15 @@ struct io_bitmap { struct task_struct; +#ifdef CONFIG_X86_IOPL_IOPERM void io_bitmap_share(struct task_struct *tsk); void io_bitmap_exit(void); void tss_update_io_bitmap(void); +#else +static inline void io_bitmap_share(struct task_struct *tsk) { } +static inline void io_bitmap_exit(void) { } +static inline void tss_update_io_bitmap(void) { } +#endif #endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1387d31c5e07..45f416a2c1f1 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -340,13 +340,18 @@ struct x86_hw_tss { (offsetof(struct tss_struct, io_bitmap.mapall) - \ offsetof(struct tss_struct, x86_tss)) +#ifdef CONFIG_X86_IOPL_IOPERM /* * sizeof(unsigned long) coming from an extra "long" at the end of the * iobitmap. The limit is inclusive, i.e. the last valid byte. */ -#define __KERNEL_TSS_LIMIT \ +# define __KERNEL_TSS_LIMIT \ (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \ sizeof(unsigned long) - 1) +#else +# define __KERNEL_TSS_LIMIT \ + (offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1) +#endif /* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */ #define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1) @@ -398,7 +403,9 @@ struct tss_struct { */ struct x86_hw_tss x86_tss; +#ifdef CONFIG_X86_IOPL_IOPERM struct x86_io_bitmap io_bitmap; +#endif } __aligned(PAGE_SIZE); DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 0accf44878a5..d779366ce3f8 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -156,8 +156,13 @@ struct thread_info { # define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE) #endif -#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY | \ +#ifdef CONFIG_X86_IOPL_IOPERM +# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY | \ _TIF_IO_BITMAP) +#else +# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY) +#endif + #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) #define STACK_WARN (THREAD_SIZE/8) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7bf402be13bb..6f6ca6bd58d6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1804,6 +1804,22 @@ static inline void gdt_setup_doublefault_tss(int cpu) } #endif /* !CONFIG_X86_64 */ +static inline void tss_setup_io_bitmap(struct tss_struct *tss) +{ + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; + +#ifdef CONFIG_X86_IOPL_IOPERM + tss->io_bitmap.prev_max = 0; + tss->io_bitmap.prev_sequence = 0; + memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap)); + /* + * Invalidate the extra array entry past the end of the all + * permission bitmap as required by the hardware. + */ + tss->io_bitmap.mapall[IO_BITMAP_LONGS] = ~0UL; +#endif +} + /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT @@ -1860,15 +1876,7 @@ void cpu_init(void) /* Initialize the TSS. */ tss_setup_ist(tss); - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; - tss->io_bitmap.prev_max = 0; - tss->io_bitmap.prev_sequence = 0; - memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap)); - /* - * Invalidate the extra array entry past the end of the all - * permission bitmap as required by the hardware. - */ - tss->io_bitmap.mapall[IO_BITMAP_LONGS] = ~0UL; + tss_setup_io_bitmap(tss); set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); load_TR_desc(); diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index d5dcde972c42..8abeee0dd7bf 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -14,6 +14,8 @@ #include #include +#ifdef CONFIG_X86_IOPL_IOPERM + static atomic64_t io_bitmap_sequence; void io_bitmap_share(struct task_struct *tsk) @@ -172,13 +174,6 @@ SYSCALL_DEFINE1(iopl, unsigned int, level) struct thread_struct *t = ¤t->thread; unsigned int old; - /* - * Careful: the IOPL bits in regs->flags are undefined under Xen PV - * and changing them has no effect. - */ - if (IS_ENABLED(CONFIG_X86_IOPL_NONE)) - return -ENOSYS; - if (level > 3) return -EINVAL; @@ -200,3 +195,20 @@ SYSCALL_DEFINE1(iopl, unsigned int, level) return 0; } + +#else /* CONFIG_X86_IOPL_IOPERM */ + +long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) +{ + return -ENOSYS; +} +SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) +{ + return -ENOSYS; +} + +SYSCALL_DEFINE1(iopl, unsigned int, level) +{ + return -ENOSYS; +} +#endif diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 8a844a5d5ae8..7964d7db9366 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -322,6 +322,7 @@ void arch_setup_new_exec(void) } } +#ifdef CONFIG_X86_IOPL_IOPERM static inline void tss_invalidate_io_bitmap(struct tss_struct *tss) { /* @@ -409,6 +410,9 @@ void tss_update_io_bitmap(void) tss_invalidate_io_bitmap(tss); } } +#else /* CONFIG_X86_IOPL_IOPERM */ +static inline void switch_to_bitmap(unsigned long tifp) { } +#endif #ifdef CONFIG_SMP -- cgit v1.2.3 From a3ba966066afbe8fd0d3605ffe04c633083752f1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Nov 2019 11:12:03 +0100 Subject: x86/entry/32: Clarify register saving in __switch_to_asm() commit 6690e86be83a ("sched/x86: Save [ER]FLAGS on context switch") re-introduced the flags saving on context switch to prevent AC leakage. The pushf/popf instructions are right among the callee saved register section, so the comment explaining the save/restore is not entirely correct. Add a seperate comment to pushf/popf explaining the reason. Reported-by: Linus Torvalds Signed-off-by: Thomas Gleixner --- arch/x86/entry/entry_32.S | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index f83ca5aa8b77..99fad6fed03c 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -718,6 +718,11 @@ ENTRY(__switch_to_asm) pushl %ebx pushl %edi pushl %esi + /* + * Flags are saved to prevent AC leakage. This could go + * away if objtool would have 32bit support to verify + * the STAC/CLAC correctness. + */ pushfl /* switch stack */ @@ -740,8 +745,9 @@ ENTRY(__switch_to_asm) FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif - /* restore callee-saved registers */ + /* Restore flags or the incoming task to restore AC state. */ popfl + /* restore callee-saved registers */ popl %esi popl %edi popl %ebx -- cgit v1.2.3 From e3cb0c7102f04c83bf1a7cb1d052e92749310b46 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 20 Nov 2019 14:25:53 -0800 Subject: x86/ioperm: Fix use of deprecated config option The commit 111e7b15cf10 ("x86/ioperm: Extend IOPL config to control ioperm() as well") replaced X86_IOPL_EMULATION with X86_IOPL_IOPERM. However it appears that there was at least one spot missed as tss_update_io_bitmap() still had a reference to it contained in the code. The result of this is that it exposed a NULL pointer dereference as seen below with a linux-next next-20191120 kernel: BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 5 PID: 1542 Comm: ovs-vswitchd Tainted: G W 5.4.0-rc8-next-20191120 #125 RIP: 0010:tss_update_io_bitmap+0x4e/0x180 Code: 10 31 c0 65 48 03 1d 69 54 5d 6d 65 48 8b 04 25 40 8c 01 00 48 8b 10 \ f7 c2 00 00 40 00 0f 84 8c 00 00 00 4c 8b a0 c0 22 00 00 <49> 8b 04 \ 24 48 39 43 68 74 2e 8b 53 70 41 39 54 24 0c 48 8d 7b 78 RSP: 0018:ffffb8888a0ebf08 EFLAGS: 00010006 RAX: ffff8a429811a680 RBX: ffff8a4c3f946000 RCX: 0000000000000011 RDX: 0000000000400080 RSI: 0000000000400080 RDI: 0000000000000000 RBP: ffffb8888a0ebf30 R08: 00007ffffb5d7ce0 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f68a9635c40(0000) GS:ffff8a4c3f940000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000103572a001 CR4: 00000000001606e0 Call Trace: ? syscall_slow_exit_work+0x39/0xdb do_syscall_64+0x1a5/0x200 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f68a7aff797 Fixes: 111e7b15cf10 ("x86/ioperm: Extend IOPL config to control ioperm() as well") Signed-off-by: Alexander Duyck Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20191120222426.3060.18462.stgit@localhost.localdomain --- arch/x86/kernel/process.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7964d7db9366..bd2a11ca5dd6 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -382,8 +382,7 @@ void tss_update_io_bitmap(void) if (test_thread_flag(TIF_IO_BITMAP)) { struct thread_struct *t = ¤t->thread; - if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION) && - t->iopl_emul == 3) { + if (IS_ENABLED(CONFIG_X86_IOPL_IOPERM) && t->iopl_emul == 3) { *base = IO_BITMAP_OFFSET_VALID_ALL; } else { struct io_bitmap *iobm = t->io_bitmap; -- cgit v1.2.3