diff options
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/Kconfig | 2 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 7 | ||||
-rw-r--r-- | arch/x86/xen/enlighten_pv.c | 133 | ||||
-rw-r--r-- | arch/x86/xen/mmu_pv.c | 5 | ||||
-rw-r--r-- | arch/x86/xen/pmu.c | 73 | ||||
-rw-r--r-- | arch/x86/xen/smp_pv.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/suspend.c | 7 | ||||
-rw-r--r-- | arch/x86/xen/xen-asm.S | 9 | ||||
-rw-r--r-- | arch/x86/xen/xen-head.S | 12 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 5 |
10 files changed, 139 insertions, 116 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 77e788e928cd..98d8a50d2aed 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -9,7 +9,7 @@ config XEN select PARAVIRT_CLOCK select X86_HV_CALLBACK_VECTOR depends on X86_64 || (X86_32 && X86_PAE) - depends on X86_64 || (X86_GENERIC || MPENTIUM4 || MCORE2 || MATOM || MK8) + depends on X86_64 || (X86_GENERIC || MPENTIUM4 || MATOM) depends on X86_LOCAL_APIC && X86_TSC help This is the Linux Xen port. Enabling this will allow the diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 1b7710bd0d05..53282dc7d5ac 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -103,10 +103,6 @@ noinstr void *__xen_hypercall_setfunc(void) void (*func)(void); /* - * Xen is supported only on CPUs with CPUID, so testing for - * X86_FEATURE_CPUID is a test for early_cpu_init() having been - * run. - * * Note that __xen_hypercall_setfunc() is noinstr only due to a nasty * dependency chain: it is being called via the xen_hypercall static * call when running as a PVH or HVM guest. Hypercalls need to be @@ -118,8 +114,7 @@ noinstr void *__xen_hypercall_setfunc(void) */ instrumentation_begin(); - if (!boot_cpu_has(X86_FEATURE_CPUID)) - xen_get_vendor(); + xen_get_vendor(); if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5e57835e999d..26bbaf4b7330 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -49,7 +49,7 @@ #include <xen/hvc-console.h> #include <xen/acpi.h> -#include <asm/cpuid.h> +#include <asm/cpuid/api.h> #include <asm/paravirt.h> #include <asm/apic.h> #include <asm/page.h> @@ -61,6 +61,7 @@ #include <asm/processor.h> #include <asm/proto.h> #include <asm/msr-index.h> +#include <asm/msr.h> #include <asm/traps.h> #include <asm/setup.h> #include <asm/desc.h> @@ -73,6 +74,7 @@ #include <asm/mwait.h> #include <asm/pci_x86.h> #include <asm/cpu.h> +#include <asm/irq_stack.h> #ifdef CONFIG_X86_IOPL_IOPERM #include <asm/io_bitmap.h> #endif @@ -94,12 +96,49 @@ void *xen_initial_gdt; static int xen_cpu_up_prepare_pv(unsigned int cpu); static int xen_cpu_dead_pv(unsigned int cpu); +#ifndef CONFIG_PREEMPTION +/* + * Some hypercalls issued by the toolstack can take many 10s of + * seconds. Allow tasks running hypercalls via the privcmd driver to + * be voluntarily preempted even if full kernel preemption is + * disabled. + * + * Such preemptible hypercalls are bracketed by + * xen_preemptible_hcall_begin() and xen_preemptible_hcall_end() + * calls. + */ +DEFINE_PER_CPU(bool, xen_in_preemptible_hcall); +EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall); + +/* + * In case of scheduling the flag must be cleared and restored after + * returning from schedule as the task might move to a different CPU. + */ +static __always_inline bool get_and_clear_inhcall(void) +{ + bool inhcall = __this_cpu_read(xen_in_preemptible_hcall); + + __this_cpu_write(xen_in_preemptible_hcall, false); + return inhcall; +} + +static __always_inline void restore_inhcall(bool inhcall) +{ + __this_cpu_write(xen_in_preemptible_hcall, inhcall); +} + +#else + +static __always_inline bool get_and_clear_inhcall(void) { return false; } +static __always_inline void restore_inhcall(bool inhcall) { } + +#endif + struct tls_descs { struct desc_struct desc[3]; }; DEFINE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode) = XEN_LAZY_NONE; -DEFINE_PER_CPU(unsigned int, xen_lazy_nesting); enum xen_lazy_mode xen_get_lazy_mode(void) { @@ -687,6 +726,36 @@ DEFINE_IDTENTRY_RAW(xenpv_exc_machine_check) } #endif +static void __xen_pv_evtchn_do_upcall(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + inc_irq_stat(irq_hv_callback_count); + + xen_evtchn_do_upcall(); + + set_irq_regs(old_regs); +} + +__visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + bool inhcall; + + instrumentation_begin(); + run_sysvec_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs); + + inhcall = get_and_clear_inhcall(); + if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) { + irqentry_exit_cond_resched(); + instrumentation_end(); + restore_inhcall(inhcall); + } else { + instrumentation_end(); + irqentry_exit(regs, state); + } +} + struct trap_array_entry { void (*orig)(void); void (*xen)(void); @@ -1018,15 +1087,15 @@ static void xen_write_cr4(unsigned long cr4) native_write_cr4(cr4); } -static u64 xen_do_read_msr(unsigned int msr, int *err) +static u64 xen_do_read_msr(u32 msr, int *err) { u64 val = 0; /* Avoid uninitialized value for safe variant. */ - if (pmu_msr_read(msr, &val, err)) + if (pmu_msr_chk_emulated(msr, &val, true)) return val; if (err) - val = native_read_msr_safe(msr, err); + *err = native_read_msr_safe(msr, &val); else val = native_read_msr(msr); @@ -1042,17 +1111,9 @@ static u64 xen_do_read_msr(unsigned int msr, int *err) return val; } -static void set_seg(unsigned int which, unsigned int low, unsigned int high, - int *err) +static void set_seg(u32 which, u64 base) { - u64 base = ((u64)high << 32) | low; - - if (HYPERVISOR_set_segment_base(which, base) == 0) - return; - - if (err) - *err = -EIO; - else + if (HYPERVISOR_set_segment_base(which, base)) WARN(1, "Xen set_segment_base(%u, %llx) failed\n", which, base); } @@ -1061,20 +1122,19 @@ static void set_seg(unsigned int which, unsigned int low, unsigned int high, * With err == NULL write_msr() semantics are selected. * Supplying an err pointer requires err to be pre-initialized with 0. */ -static void xen_do_write_msr(unsigned int msr, unsigned int low, - unsigned int high, int *err) +static void xen_do_write_msr(u32 msr, u64 val, int *err) { switch (msr) { case MSR_FS_BASE: - set_seg(SEGBASE_FS, low, high, err); + set_seg(SEGBASE_FS, val); break; case MSR_KERNEL_GS_BASE: - set_seg(SEGBASE_GS_USER, low, high, err); + set_seg(SEGBASE_GS_USER, val); break; case MSR_GS_BASE: - set_seg(SEGBASE_GS_KERNEL, low, high, err); + set_seg(SEGBASE_GS_KERNEL, val); break; case MSR_STAR: @@ -1090,42 +1150,45 @@ static void xen_do_write_msr(unsigned int msr, unsigned int low, break; default: - if (!pmu_msr_write(msr, low, high, err)) { - if (err) - *err = native_write_msr_safe(msr, low, high); - else - native_write_msr(msr, low, high); - } + if (pmu_msr_chk_emulated(msr, &val, false)) + return; + + if (err) + *err = native_write_msr_safe(msr, val); + else + native_write_msr(msr, val); } } -static u64 xen_read_msr_safe(unsigned int msr, int *err) +static int xen_read_msr_safe(u32 msr, u64 *val) { - return xen_do_read_msr(msr, err); + int err = 0; + + *val = xen_do_read_msr(msr, &err); + return err; } -static int xen_write_msr_safe(unsigned int msr, unsigned int low, - unsigned int high) +static int xen_write_msr_safe(u32 msr, u64 val) { int err = 0; - xen_do_write_msr(msr, low, high, &err); + xen_do_write_msr(msr, val, &err); return err; } -static u64 xen_read_msr(unsigned int msr) +static u64 xen_read_msr(u32 msr) { - int err; + int err = 0; return xen_do_read_msr(msr, xen_msr_safe ? &err : NULL); } -static void xen_write_msr(unsigned int msr, unsigned low, unsigned high) +static void xen_write_msr(u32 msr, u64 val) { int err; - xen_do_write_msr(msr, low, high, xen_msr_safe ? &err : NULL); + xen_do_write_msr(msr, val, xen_msr_safe ? &err : NULL); } /* This is called once we have the cpu_possible_mask */ diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index d078de2c952b..2a4a8deaf612 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -578,7 +578,6 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val) xen_mc_issue(XEN_LAZY_MMU); } -#if CONFIG_PGTABLE_LEVELS >= 5 __visible p4dval_t xen_p4d_val(p4d_t p4d) { return pte_mfn_to_pfn(p4d.p4d); @@ -592,7 +591,6 @@ __visible p4d_t xen_make_p4d(p4dval_t p4d) return native_make_p4d(p4d); } PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d); -#endif /* CONFIG_PGTABLE_LEVELS >= 5 */ static void xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, void (*func)(struct mm_struct *mm, struct page *, @@ -2189,7 +2187,6 @@ static const typeof(pv_ops) xen_mmu_ops __initconst = { .flush_tlb_kernel = xen_flush_tlb, .flush_tlb_one_user = xen_flush_tlb_one_user, .flush_tlb_multi = xen_flush_tlb_multi, - .tlb_remove_table = tlb_remove_table, .pgd_alloc = xen_pgd_alloc, .pgd_free = xen_pgd_free, @@ -2223,10 +2220,8 @@ static const typeof(pv_ops) xen_mmu_ops __initconst = { .alloc_pud = xen_alloc_pmd_init, .release_pud = xen_release_pmd_init, -#if CONFIG_PGTABLE_LEVELS >= 5 .p4d_val = PV_CALLEE_SAVE(xen_p4d_val), .make_p4d = PV_CALLEE_SAVE(xen_make_p4d), -#endif .enter_mmap = xen_enter_mmap, .exit_mmap = xen_exit_mmap, diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index f06987b0efc3..8f89ce0b67e3 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -2,6 +2,7 @@ #include <linux/types.h> #include <linux/interrupt.h> +#include <asm/msr.h> #include <asm/xen/hypercall.h> #include <xen/xen.h> #include <xen/page.h> @@ -128,7 +129,7 @@ static inline uint32_t get_fam15h_addr(u32 addr) return addr; } -static inline bool is_amd_pmu_msr(unsigned int msr) +static bool is_amd_pmu_msr(u32 msr) { if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) @@ -194,8 +195,7 @@ static bool is_intel_pmu_msr(u32 msr_index, int *type, int *index) } } -static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type, - int index, bool is_read) +static bool xen_intel_pmu_emulate(u32 msr, u64 *val, int type, int index, bool is_read) { uint64_t *reg = NULL; struct xen_pmu_intel_ctxt *ctxt; @@ -257,7 +257,7 @@ static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type, return false; } -static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read) +static bool xen_amd_pmu_emulate(u32 msr, u64 *val, bool is_read) { uint64_t *reg = NULL; int i, off = 0; @@ -298,55 +298,20 @@ static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read) return false; } -static bool pmu_msr_chk_emulated(unsigned int msr, uint64_t *val, bool is_read, - bool *emul) +bool pmu_msr_chk_emulated(u32 msr, u64 *val, bool is_read) { int type, index = 0; if (is_amd_pmu_msr(msr)) - *emul = xen_amd_pmu_emulate(msr, val, is_read); - else if (is_intel_pmu_msr(msr, &type, &index)) - *emul = xen_intel_pmu_emulate(msr, val, type, index, is_read); - else - return false; - - return true; -} - -bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err) -{ - bool emulated; + return xen_amd_pmu_emulate(msr, val, is_read); - if (!pmu_msr_chk_emulated(msr, val, true, &emulated)) - return false; + if (is_intel_pmu_msr(msr, &type, &index)) + return xen_intel_pmu_emulate(msr, val, type, index, is_read); - if (!emulated) { - *val = err ? native_read_msr_safe(msr, err) - : native_read_msr(msr); - } - - return true; -} - -bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err) -{ - uint64_t val = ((uint64_t)high << 32) | low; - bool emulated; - - if (!pmu_msr_chk_emulated(msr, &val, false, &emulated)) - return false; - - if (!emulated) { - if (err) - *err = native_write_msr_safe(msr, low, high); - else - native_write_msr(msr, low, high); - } - - return true; + return false; } -static unsigned long long xen_amd_read_pmc(int counter) +static u64 xen_amd_read_pmc(int counter) { struct xen_pmu_amd_ctxt *ctxt; uint64_t *counter_regs; @@ -354,11 +319,12 @@ static unsigned long long xen_amd_read_pmc(int counter) uint8_t xenpmu_flags = get_xenpmu_flags(); if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) { - uint32_t msr; - int err; + u32 msr; + u64 val; msr = amd_counters_base + (counter * amd_msr_step); - return native_read_msr_safe(msr, &err); + native_read_msr_safe(msr, &val); + return val; } ctxt = &xenpmu_data->pmu.c.amd; @@ -366,7 +332,7 @@ static unsigned long long xen_amd_read_pmc(int counter) return counter_regs[counter]; } -static unsigned long long xen_intel_read_pmc(int counter) +static u64 xen_intel_read_pmc(int counter) { struct xen_pmu_intel_ctxt *ctxt; uint64_t *fixed_counters; @@ -375,15 +341,16 @@ static unsigned long long xen_intel_read_pmc(int counter) uint8_t xenpmu_flags = get_xenpmu_flags(); if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) { - uint32_t msr; - int err; + u32 msr; + u64 val; if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff); else msr = MSR_IA32_PERFCTR0 + counter; - return native_read_msr_safe(msr, &err); + native_read_msr_safe(msr, &val); + return val; } ctxt = &xenpmu_data->pmu.c.intel; @@ -396,7 +363,7 @@ static unsigned long long xen_intel_read_pmc(int counter) return arch_cntr_pair[counter].counter; } -unsigned long long xen_read_pmc(int counter) +u64 xen_read_pmc(int counter) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return xen_amd_read_pmc(counter); diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 7ea57f728b89..9bb8ff8bff30 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -70,7 +70,7 @@ static void cpu_bringup(void) xen_enable_syscall(); } cpu = smp_processor_id(); - smp_store_cpu_info(cpu); + identify_secondary_cpu(cpu); set_cpu_sibling_map(cpu); speculative_store_bypass_ht_init(); diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 77a6ea1c60e4..ba2f17e64321 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -13,6 +13,7 @@ #include <asm/xen/hypercall.h> #include <asm/xen/page.h> #include <asm/fixmap.h> +#include <asm/msr.h> #include "xen-ops.h" @@ -39,7 +40,7 @@ void xen_arch_post_suspend(int cancelled) static void xen_vcpu_notify_restore(void *data) { if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) - wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl)); + wrmsrq(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl)); /* Boot processor notified via generic timekeeping_resume() */ if (smp_processor_id() == 0) @@ -55,9 +56,9 @@ static void xen_vcpu_notify_suspend(void *data) tick_suspend_local(); if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) { - rdmsrl(MSR_IA32_SPEC_CTRL, tmp); + rdmsrq(MSR_IA32_SPEC_CTRL, tmp); this_cpu_write(spec_ctrl, tmp); - wrmsrl(MSR_IA32_SPEC_CTRL, 0); + wrmsrq(MSR_IA32_SPEC_CTRL, 0); } } diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index b518f36d1ca2..461bb1526502 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -51,6 +51,7 @@ SYM_FUNC_END(xen_hypercall_pv) * non-zero. */ SYM_FUNC_START(xen_irq_disable_direct) + ENDBR movb $1, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask) RET SYM_FUNC_END(xen_irq_disable_direct) @@ -90,6 +91,7 @@ SYM_FUNC_END(check_events) * then enter the hypervisor to get them handled. */ SYM_FUNC_START(xen_irq_enable_direct) + ENDBR FRAME_BEGIN /* Unmask events */ movb $0, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask) @@ -120,6 +122,7 @@ SYM_FUNC_END(xen_irq_enable_direct) * x86 use opposite senses (mask vs enable). */ SYM_FUNC_START(xen_save_fl_direct) + ENDBR testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask) setz %ah addb %ah, %ah @@ -127,6 +130,7 @@ SYM_FUNC_START(xen_save_fl_direct) SYM_FUNC_END(xen_save_fl_direct) SYM_FUNC_START(xen_read_cr2) + ENDBR FRAME_BEGIN _ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX _ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX @@ -135,6 +139,7 @@ SYM_FUNC_START(xen_read_cr2) SYM_FUNC_END(xen_read_cr2); SYM_FUNC_START(xen_read_cr2_direct) + ENDBR FRAME_BEGIN _ASM_MOV PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_arch_cr2), %_ASM_AX FRAME_END @@ -221,9 +226,7 @@ SYM_CODE_END(xen_early_idt_handler_array) push %rax mov $__HYPERVISOR_iret, %eax syscall /* Do the IRET. */ -#ifdef CONFIG_MITIGATION_SLS - int3 -#endif + ud2 /* The SYSCALL should never return. */ .endm SYM_CODE_START(xen_iret) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 894edf8d6d62..5dad6c51cdc3 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -31,16 +31,14 @@ SYM_CODE_START(startup_xen) leaq __top_init_kernel_stack(%rip), %rsp - /* Set up %gs. - * - * The base of %gs always points to fixed_percpu_data. If the - * stack protector canary is enabled, it is located at %gs:40. + /* + * Set up GSBASE. * Note that, on SMP, the boot cpu uses init data section until * the per cpu areas are set up. */ movl $MSR_GS_BASE,%ecx - movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax - cdq + xorl %eax, %eax + xorl %edx, %edx wrmsr mov %rsi, %rdi @@ -133,11 +131,13 @@ SYM_FUNC_START(xen_hypercall_hvm) SYM_FUNC_END(xen_hypercall_hvm) SYM_FUNC_START(xen_hypercall_amd) + ANNOTATE_NOENDBR vmmcall RET SYM_FUNC_END(xen_hypercall_amd) SYM_FUNC_START(xen_hypercall_intel) + ANNOTATE_NOENDBR vmcall RET SYM_FUNC_END(xen_hypercall_intel) diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 25e318ef27d6..090349baec09 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -271,10 +271,9 @@ void xen_pmu_finish(int cpu); static inline void xen_pmu_init(int cpu) {} static inline void xen_pmu_finish(int cpu) {} #endif -bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err); -bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err); +bool pmu_msr_chk_emulated(u32 msr, u64 *val, bool is_read); int pmu_apic_update(uint32_t reg); -unsigned long long xen_read_pmc(int counter); +u64 xen_read_pmc(int counter); #ifdef CONFIG_SMP |