diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-10 22:14:21 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-10 22:14:21 +0300 |
commit | bf98ecbbae3edf3bb3ec254c3e318aa3f75fd15e (patch) | |
tree | 38a9239a6282c982a0291d5888351a327c57edf6 /arch/x86 | |
parent | 4287af35113cd5ba101b5c9e76614b5bebf48f58 (diff) | |
parent | 501586ea5974a9dafee41f54a66326addb01a5ac (diff) | |
download | linux-bf98ecbbae3edf3bb3ec254c3e318aa3f75fd15e.tar.xz |
Merge tag 'for-linus-5.16b-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen updates from Juergen Gross:
- a series to speed up the boot of Xen PV guests
- some cleanups in Xen related code
- replacement of license texts with the appropriate SPDX headers and
fixing of wrong SPDX headers in Xen header files
- a small series making paravirtualized interrupt masking much simpler
and at the same time removing complaints of objtool
- a fix for Xen ballooning hogging workqueues for too long
- enablement of the Xen pciback driver for Arm
- some further small fixes/enhancements
* tag 'for-linus-5.16b-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (22 commits)
xen/balloon: fix unused-variable warning
xen/balloon: rename alloc/free_xenballooned_pages
xen/balloon: add late_initcall_sync() for initial ballooning done
x86/xen: remove 32-bit awareness from startup_xen
xen: remove highmem remnants
xen: allow pv-only hypercalls only with CONFIG_XEN_PV
x86/xen: remove 32-bit pv leftovers
xen-pciback: allow compiling on other archs than x86
x86/xen: switch initial pvops IRQ functions to dummy ones
x86/xen: remove xen_have_vcpu_info_placement flag
x86/pvh: add prototype for xen_pvh_init()
xen: Fix implicit type conversion
xen: fix wrong SPDX headers of Xen related headers
xen/pvcalls-back: Remove redundant 'flush_workqueue()' calls
x86/xen: Remove redundant irq_enter/exit() invocations
xen-pciback: Fix return in pm_ctrl_init()
xen/x86: restrict PV Dom0 identity mapping
xen/x86: there's no highmem anymore in PV mode
xen/x86: adjust handling of the L3 user vsyscall special page table
xen/x86: adjust xen_set_fixmap()
...
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/paravirt_types.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/xen/hypercall.h | 233 | ||||
-rw-r--r-- | arch/x86/include/asm/xen/hypervisor.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/xen/pci.h | 19 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt.c | 13 | ||||
-rw-r--r-- | arch/x86/pci/xen.c | 76 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 116 | ||||
-rw-r--r-- | arch/x86/xen/enlighten_hvm.c | 6 | ||||
-rw-r--r-- | arch/x86/xen/enlighten_pv.c | 35 | ||||
-rw-r--r-- | arch/x86/xen/irq.c | 62 | ||||
-rw-r--r-- | arch/x86/xen/mmu_pv.c | 52 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 10 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 28 | ||||
-rw-r--r-- | arch/x86/xen/smp_pv.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/xen-head.S | 12 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 4 |
16 files changed, 206 insertions, 468 deletions
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index d9d6b0203ec4..fc1151e77569 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -577,7 +577,9 @@ void paravirt_leave_lazy_mmu(void); void paravirt_flush_lazy_mmu(void); void _paravirt_nop(void); +void paravirt_BUG(void); u64 _paravirt_ident_64(u64); +unsigned long paravirt_ret0(void); #define paravirt_nop ((void *)_paravirt_nop) diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 4a7ff8b0db20..0575f5863b7f 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -248,6 +248,7 @@ privcmd_call(unsigned int call, return res; } +#ifdef CONFIG_XEN_PV static inline int HYPERVISOR_set_trap_table(struct trap_info *table) { @@ -281,6 +282,107 @@ HYPERVISOR_callback_op(int cmd, void *arg) } static inline int +HYPERVISOR_set_debugreg(int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static inline unsigned long +HYPERVISOR_get_debugreg(int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int +HYPERVISOR_update_descriptor(u64 ma, u64 desc) +{ + return _hypercall2(int, update_descriptor, ma, desc); +} + +static inline int +HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val, + unsigned long flags) +{ + return _hypercall3(int, update_va_mapping, va, new_val.pte, flags); +} + +static inline int +HYPERVISOR_set_segment_base(int reg, unsigned long value) +{ + return _hypercall2(int, set_segment_base, reg, value); +} + +static inline void +MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) +{ + mcl->op = __HYPERVISOR_fpu_taskswitch; + mcl->args[0] = set; + + trace_xen_mc_entry(mcl, 1); +} + +static inline void +MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, + pte_t new_val, unsigned long flags) +{ + mcl->op = __HYPERVISOR_update_va_mapping; + mcl->args[0] = va; + mcl->args[1] = new_val.pte; + mcl->args[2] = flags; + + trace_xen_mc_entry(mcl, 3); +} + +static inline void +MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, + struct desc_struct desc) +{ + mcl->op = __HYPERVISOR_update_descriptor; + mcl->args[0] = maddr; + mcl->args[1] = *(unsigned long *)&desc; + + trace_xen_mc_entry(mcl, 2); +} + +static inline void +MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req, + int count, int *success_count, domid_t domid) +{ + mcl->op = __HYPERVISOR_mmu_update; + mcl->args[0] = (unsigned long)req; + mcl->args[1] = count; + mcl->args[2] = (unsigned long)success_count; + mcl->args[3] = domid; + + trace_xen_mc_entry(mcl, 4); +} + +static inline void +MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count, + int *success_count, domid_t domid) +{ + mcl->op = __HYPERVISOR_mmuext_op; + mcl->args[0] = (unsigned long)op; + mcl->args[1] = count; + mcl->args[2] = (unsigned long)success_count; + mcl->args[3] = domid; + + trace_xen_mc_entry(mcl, 4); +} + +static inline void +MULTI_stack_switch(struct multicall_entry *mcl, + unsigned long ss, unsigned long esp) +{ + mcl->op = __HYPERVISOR_stack_switch; + mcl->args[0] = ss; + mcl->args[1] = esp; + + trace_xen_mc_entry(mcl, 2); +} +#endif + +static inline int HYPERVISOR_sched_op(int cmd, void *arg) { return _hypercall2(int, sched_op, cmd, arg); @@ -308,26 +410,6 @@ HYPERVISOR_platform_op(struct xen_platform_op *op) return _hypercall1(int, platform_op, op); } -static __always_inline int -HYPERVISOR_set_debugreg(int reg, unsigned long value) -{ - return _hypercall2(int, set_debugreg, reg, value); -} - -static __always_inline unsigned long -HYPERVISOR_get_debugreg(int reg) -{ - return _hypercall1(unsigned long, get_debugreg, reg); -} - -static inline int -HYPERVISOR_update_descriptor(u64 ma, u64 desc) -{ - if (sizeof(u64) == sizeof(long)) - return _hypercall2(int, update_descriptor, ma, desc); - return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); -} - static inline long HYPERVISOR_memory_op(unsigned int cmd, void *arg) { @@ -341,18 +423,6 @@ HYPERVISOR_multicall(void *call_list, uint32_t nr_calls) } static inline int -HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val, - unsigned long flags) -{ - if (sizeof(new_val) == sizeof(long)) - return _hypercall3(int, update_va_mapping, va, - new_val.pte, flags); - else - return _hypercall4(int, update_va_mapping, va, - new_val.pte, new_val.pte >> 32, flags); -} - -static inline int HYPERVISOR_event_channel_op(int cmd, void *arg) { return _hypercall2(int, event_channel_op, cmd, arg); @@ -394,14 +464,6 @@ HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args) return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); } -#ifdef CONFIG_X86_64 -static inline int -HYPERVISOR_set_segment_base(int reg, unsigned long value) -{ - return _hypercall2(int, set_segment_base, reg, value); -} -#endif - static inline int HYPERVISOR_suspend(unsigned long start_info_mfn) { @@ -423,13 +485,6 @@ HYPERVISOR_hvm_op(int op, void *arg) } static inline int -HYPERVISOR_tmem_op( - struct tmem_op *op) -{ - return _hypercall1(int, tmem_op, op); -} - -static inline int HYPERVISOR_xenpmu_op(unsigned int op, void *arg) { return _hypercall2(int, xenpmu_op, op, arg); @@ -446,88 +501,4 @@ HYPERVISOR_dm_op( return ret; } -static inline void -MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) -{ - mcl->op = __HYPERVISOR_fpu_taskswitch; - mcl->args[0] = set; - - trace_xen_mc_entry(mcl, 1); -} - -static inline void -MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, - pte_t new_val, unsigned long flags) -{ - mcl->op = __HYPERVISOR_update_va_mapping; - mcl->args[0] = va; - if (sizeof(new_val) == sizeof(long)) { - mcl->args[1] = new_val.pte; - mcl->args[2] = flags; - } else { - mcl->args[1] = new_val.pte; - mcl->args[2] = new_val.pte >> 32; - mcl->args[3] = flags; - } - - trace_xen_mc_entry(mcl, sizeof(new_val) == sizeof(long) ? 3 : 4); -} - -static inline void -MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, - struct desc_struct desc) -{ - mcl->op = __HYPERVISOR_update_descriptor; - if (sizeof(maddr) == sizeof(long)) { - mcl->args[0] = maddr; - mcl->args[1] = *(unsigned long *)&desc; - } else { - u32 *p = (u32 *)&desc; - - mcl->args[0] = maddr; - mcl->args[1] = maddr >> 32; - mcl->args[2] = *p++; - mcl->args[3] = *p; - } - - trace_xen_mc_entry(mcl, sizeof(maddr) == sizeof(long) ? 2 : 4); -} - -static inline void -MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req, - int count, int *success_count, domid_t domid) -{ - mcl->op = __HYPERVISOR_mmu_update; - mcl->args[0] = (unsigned long)req; - mcl->args[1] = count; - mcl->args[2] = (unsigned long)success_count; - mcl->args[3] = domid; - - trace_xen_mc_entry(mcl, 4); -} - -static inline void -MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count, - int *success_count, domid_t domid) -{ - mcl->op = __HYPERVISOR_mmuext_op; - mcl->args[0] = (unsigned long)op; - mcl->args[1] = count; - mcl->args[2] = (unsigned long)success_count; - mcl->args[3] = domid; - - trace_xen_mc_entry(mcl, 4); -} - -static inline void -MULTI_stack_switch(struct multicall_entry *mcl, - unsigned long ss, unsigned long esp) -{ - mcl->op = __HYPERVISOR_stack_switch; - mcl->args[0] = ss; - mcl->args[1] = esp; - - trace_xen_mc_entry(mcl, 2); -} - #endif /* _ASM_X86_XEN_HYPERCALL_H */ diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index ff4b52e37e60..4957f59deb40 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -62,4 +62,8 @@ void xen_arch_register_cpu(int num); void xen_arch_unregister_cpu(int num); #endif +#ifdef CONFIG_PVH +void __init xen_pvh_init(struct boot_params *boot_params); +#endif + #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index 4557f7cb0fa6..9015b888edd6 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h @@ -22,25 +22,6 @@ static inline int __init pci_xen_initial_domain(void) return -1; } #endif -#ifdef CONFIG_XEN_DOM0 -int xen_find_device_domain_owner(struct pci_dev *dev); -int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); -int xen_unregister_device_domain_owner(struct pci_dev *dev); -#else -static inline int xen_find_device_domain_owner(struct pci_dev *dev) -{ - return -1; -} -static inline int xen_register_device_domain_owner(struct pci_dev *dev, - uint16_t domain) -{ - return -1; -} -static inline int xen_unregister_device_domain_owner(struct pci_dev *dev) -{ - return -1; -} -#endif #if defined(CONFIG_PCI_MSI) #if defined(CONFIG_PCI_XEN) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index ebc45360ffd4..7157c2df3bc2 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -46,6 +46,17 @@ asm (".pushsection .entry.text, \"ax\"\n" ".type _paravirt_nop, @function\n\t" ".popsection"); +/* stub always returning 0. */ +asm (".pushsection .entry.text, \"ax\"\n" + ".global paravirt_ret0\n" + "paravirt_ret0:\n\t" + "xor %" _ASM_AX ", %" _ASM_AX ";\n\t" + "ret\n\t" + ".size paravirt_ret0, . - paravirt_ret0\n\t" + ".type paravirt_ret0, @function\n\t" + ".popsection"); + + void __init default_banner(void) { printk(KERN_INFO "Booting paravirtualized kernel on %s\n", @@ -53,7 +64,7 @@ void __init default_banner(void) } /* Undefined instruction for dealing with missing ops pointers. */ -static void paravirt_BUG(void) +noinstr void paravirt_BUG(void) { BUG(); } diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 5debe4ac6f81..12da00558631 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -23,6 +23,7 @@ #include <xen/features.h> #include <xen/events.h> +#include <xen/pci.h> #include <asm/xen/pci.h> #include <asm/xen/cpuid.h> #include <asm/apic.h> @@ -585,78 +586,3 @@ int __init pci_xen_initial_domain(void) } #endif -#ifdef CONFIG_XEN_DOM0 - -struct xen_device_domain_owner { - domid_t domain; - struct pci_dev *dev; - struct list_head list; -}; - -static DEFINE_SPINLOCK(dev_domain_list_spinlock); -static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list); - -static struct xen_device_domain_owner *find_device(struct pci_dev *dev) -{ - struct xen_device_domain_owner *owner; - - list_for_each_entry(owner, &dev_domain_list, list) { - if (owner->dev == dev) - return owner; - } - return NULL; -} - -int xen_find_device_domain_owner(struct pci_dev *dev) -{ - struct xen_device_domain_owner *owner; - int domain = -ENODEV; - - spin_lock(&dev_domain_list_spinlock); - owner = find_device(dev); - if (owner) - domain = owner->domain; - spin_unlock(&dev_domain_list_spinlock); - return domain; -} -EXPORT_SYMBOL_GPL(xen_find_device_domain_owner); - -int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain) -{ - struct xen_device_domain_owner *owner; - - owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL); - if (!owner) - return -ENODEV; - - spin_lock(&dev_domain_list_spinlock); - if (find_device(dev)) { - spin_unlock(&dev_domain_list_spinlock); - kfree(owner); - return -EEXIST; - } - owner->domain = domain; - owner->dev = dev; - list_add_tail(&owner->list, &dev_domain_list); - spin_unlock(&dev_domain_list_spinlock); - return 0; -} -EXPORT_SYMBOL_GPL(xen_register_device_domain_owner); - -int xen_unregister_device_domain_owner(struct pci_dev *dev) -{ - struct xen_device_domain_owner *owner; - - spin_lock(&dev_domain_list_spinlock); - owner = find_device(dev); - if (!owner) { - spin_unlock(&dev_domain_list_spinlock); - return -ENODEV; - } - list_del(&owner->list); - spin_unlock(&dev_domain_list_spinlock); - kfree(owner); - return 0; -} -EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner); -#endif /* CONFIG_XEN_DOM0 */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 95d970359e17..30c6e986a6cd 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -31,25 +31,10 @@ EXPORT_SYMBOL_GPL(hypercall_page); * Pointer to the xen_vcpu_info structure or * &HYPERVISOR_shared_info->vcpu_info[cpu]. See xen_hvm_init_shared_info * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info - * but if the hypervisor supports VCPUOP_register_vcpu_info then it can point - * to xen_vcpu_info. The pointer is used in __xen_evtchn_do_upcall to - * acknowledge pending events. - * Also more subtly it is used by the patched version of irq enable/disable - * e.g. xen_irq_enable_direct and xen_iret in PV mode. - * - * The desire to be able to do those mask/unmask operations as a single - * instruction by using the per-cpu offset held in %gs is the real reason - * vcpu info is in a per-cpu pointer and the original reason for this - * hypercall. - * + * but during boot it is switched to point to xen_vcpu_info. + * The pointer is used in __xen_evtchn_do_upcall to acknowledge pending events. */ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); - -/* - * Per CPU pages used if hypervisor supports VCPUOP_register_vcpu_info - * hypercall. This can be used both in PV and PVHVM mode. The structure - * overrides the default per_cpu(xen_vcpu, cpu) value. - */ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); /* Linux <-> Xen vCPU id mapping */ @@ -84,21 +69,6 @@ EXPORT_SYMBOL(xen_start_flags); */ struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info; -/* - * Flag to determine whether vcpu info placement is available on all - * VCPUs. We assume it is to start with, and then set it to zero on - * the first failure. This is because it can succeed on some VCPUs - * and not others, since it can involve hypervisor memory allocation, - * or because the guest failed to guarantee all the appropriate - * constraints on all VCPUs (ie buffer can't cross a page boundary). - * - * Note that any particular CPU may be using a placed vcpu structure, - * but we can only optimise if the all are. - * - * 0: not available, 1: available - */ -int xen_have_vcpu_info_placement = 1; - static int xen_cpu_up_online(unsigned int cpu) { xen_init_lock_cpu(cpu); @@ -124,10 +94,8 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int), return rc >= 0 ? 0 : rc; } -static int xen_vcpu_setup_restore(int cpu) +static void xen_vcpu_setup_restore(int cpu) { - int rc = 0; - /* Any per_cpu(xen_vcpu) is stale, so reset it */ xen_vcpu_info_reset(cpu); @@ -136,11 +104,8 @@ static int xen_vcpu_setup_restore(int cpu) * be handled by hotplug. */ if (xen_pv_domain() || - (xen_hvm_domain() && cpu_online(cpu))) { - rc = xen_vcpu_setup(cpu); - } - - return rc; + (xen_hvm_domain() && cpu_online(cpu))) + xen_vcpu_setup(cpu); } /* @@ -150,7 +115,7 @@ static int xen_vcpu_setup_restore(int cpu) */ void xen_vcpu_restore(void) { - int cpu, rc; + int cpu; for_each_possible_cpu(cpu) { bool other_cpu = (cpu != smp_processor_id()); @@ -170,20 +135,9 @@ void xen_vcpu_restore(void) if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock)) xen_setup_runstate_info(cpu); - rc = xen_vcpu_setup_restore(cpu); - if (rc) - pr_emerg_once("vcpu restore failed for cpu=%d err=%d. " - "System will hang.\n", cpu, rc); - /* - * In case xen_vcpu_setup_restore() fails, do not bring up the - * VCPU. This helps us avoid the resulting OOPS when the VCPU - * accesses pvclock_vcpu_time via xen_vcpu (which is NULL.) - * Note that this does not improve the situation much -- now the - * VM hangs instead of OOPSing -- with the VCPUs that did not - * fail, spinning in stop_machine(), waiting for the failed - * VCPUs to come up. - */ - if (other_cpu && is_up && (rc == 0) && + xen_vcpu_setup_restore(cpu); + + if (other_cpu && is_up && HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)) BUG(); } @@ -200,7 +154,7 @@ void xen_vcpu_info_reset(int cpu) } } -int xen_vcpu_setup(int cpu) +void xen_vcpu_setup(int cpu) { struct vcpu_register_vcpu_info info; int err; @@ -221,44 +175,26 @@ int xen_vcpu_setup(int cpu) */ if (xen_hvm_domain()) { if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu)) - return 0; + return; } - if (xen_have_vcpu_info_placement) { - vcpup = &per_cpu(xen_vcpu_info, cpu); - info.mfn = arbitrary_virt_to_mfn(vcpup); - info.offset = offset_in_page(vcpup); - - /* - * Check to see if the hypervisor will put the vcpu_info - * structure where we want it, which allows direct access via - * a percpu-variable. - * N.B. This hypercall can _only_ be called once per CPU. - * Subsequent calls will error out with -EINVAL. This is due to - * the fact that hypervisor has no unregister variant and this - * hypercall does not allow to over-write info.mfn and - * info.offset. - */ - err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, - xen_vcpu_nr(cpu), &info); - - if (err) { - pr_warn_once("register_vcpu_info failed: cpu=%d err=%d\n", - cpu, err); - xen_have_vcpu_info_placement = 0; - } else { - /* - * This cpu is using the registered vcpu info, even if - * later ones fail to. - */ - per_cpu(xen_vcpu, cpu) = vcpup; - } - } + vcpup = &per_cpu(xen_vcpu_info, cpu); + info.mfn = arbitrary_virt_to_mfn(vcpup); + info.offset = offset_in_page(vcpup); - if (!xen_have_vcpu_info_placement) - xen_vcpu_info_reset(cpu); + /* + * N.B. This hypercall can _only_ be called once per CPU. + * Subsequent calls will error out with -EINVAL. This is due to + * the fact that hypervisor has no unregister variant and this + * hypercall does not allow to over-write info.mfn and + * info.offset. + */ + err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu), + &info); + if (err) + panic("register_vcpu_info failed: cpu=%d err=%d\n", cpu, err); - return ((per_cpu(xen_vcpu, cpu) == NULL) ? -ENODEV : 0); + per_cpu(xen_vcpu, cpu) = vcpup; } void __init xen_banner(void) diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index e68ea5f4ad1c..42300941ec29 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -163,9 +163,9 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu); else per_cpu(xen_vcpu_id, cpu) = cpu; - rc = xen_vcpu_setup(cpu); - if (rc || !xen_have_vector_callback) - return rc; + xen_vcpu_setup(cpu); + if (!xen_have_vector_callback) + return 0; if (xen_feature(XENFEAT_hvm_safe_pvclock)) xen_setup_timer(cpu); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 4f63117f09bb..5004feb16783 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -27,7 +27,6 @@ #include <linux/export.h> #include <linux/mm.h> #include <linux/page-flags.h> -#include <linux/highmem.h> #include <linux/pci.h> #include <linux/gfp.h> #include <linux/edd.h> @@ -993,31 +992,13 @@ void __init xen_setup_vcpu_info_placement(void) for_each_possible_cpu(cpu) { /* Set up direct vCPU id mapping for PV guests. */ per_cpu(xen_vcpu_id, cpu) = cpu; - - /* - * xen_vcpu_setup(cpu) can fail -- in which case it - * falls back to the shared_info version for cpus - * where xen_vcpu_nr(cpu) < MAX_VIRT_CPUS. - * - * xen_cpu_up_prepare_pv() handles the rest by failing - * them in hotplug. - */ - (void) xen_vcpu_setup(cpu); + xen_vcpu_setup(cpu); } - /* - * xen_vcpu_setup managed to place the vcpu_info within the - * percpu area for all cpus, so make use of it. - */ - if (xen_have_vcpu_info_placement) { - pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); - pv_ops.irq.irq_disable = - __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); - pv_ops.irq.irq_enable = - __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); - pv_ops.mmu.read_cr2 = - __PV_IS_CALLEE_SAVE(xen_read_cr2_direct); - } + pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); + pv_ops.irq.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); + pv_ops.irq.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); + pv_ops.mmu.read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2_direct); } static const struct pv_info xen_info __initconst = { @@ -1247,12 +1228,6 @@ asmlinkage __visible void __init xen_start_kernel(void) __supported_pte_mask &= ~_PAGE_GLOBAL; __default_kernel_pte_mask &= ~_PAGE_GLOBAL; - /* - * Prevent page tables from being allocated in highmem, even - * if CONFIG_HIGHPTE is enabled. - */ - __userpte_alloc_gfp &= ~__GFP_HIGHMEM; - /* Get mfn list */ xen_build_dynamic_phys_to_machine(); diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 4fe387e520af..06c3c2fb4b06 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -24,60 +24,6 @@ noinstr void xen_force_evtchn_callback(void) (void)HYPERVISOR_xen_version(0, NULL); } -asmlinkage __visible noinstr unsigned long xen_save_fl(void) -{ - struct vcpu_info *vcpu; - unsigned long flags; - - vcpu = this_cpu_read(xen_vcpu); - - /* flag has opposite sense of mask */ - flags = !vcpu->evtchn_upcall_mask; - - /* convert to IF type flag - -0 -> 0x00000000 - -1 -> 0xffffffff - */ - return (-flags) & X86_EFLAGS_IF; -} -__PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl, ".noinstr.text"); - -asmlinkage __visible noinstr void xen_irq_disable(void) -{ - /* There's a one instruction preempt window here. We need to - make sure we're don't switch CPUs between getting the vcpu - pointer and updating the mask. */ - preempt_disable(); - this_cpu_read(xen_vcpu)->evtchn_upcall_mask = 1; - preempt_enable_no_resched(); -} -__PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable, ".noinstr.text"); - -asmlinkage __visible noinstr void xen_irq_enable(void) -{ - struct vcpu_info *vcpu; - - /* - * We may be preempted as soon as vcpu->evtchn_upcall_mask is - * cleared, so disable preemption to ensure we check for - * events on the VCPU we are still running on. - */ - preempt_disable(); - - vcpu = this_cpu_read(xen_vcpu); - vcpu->evtchn_upcall_mask = 0; - - /* Doesn't matter if we get preempted here, because any - pending event will get dealt with anyway. */ - - barrier(); /* unmask then check (avoid races) */ - if (unlikely(vcpu->evtchn_upcall_pending)) - xen_force_evtchn_callback(); - - preempt_enable(); -} -__PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable, ".noinstr.text"); - static void xen_safe_halt(void) { /* Blocking includes an implicit local_irq_enable(). */ @@ -96,10 +42,10 @@ static void xen_halt(void) static const typeof(pv_ops) xen_irq_ops __initconst = { .irq = { - - .save_fl = PV_CALLEE_SAVE(xen_save_fl), - .irq_disable = PV_CALLEE_SAVE(xen_irq_disable), - .irq_enable = PV_CALLEE_SAVE(xen_irq_enable), + /* Initial interrupt flag handling only called while interrupts off. */ + .save_fl = __PV_IS_CALLEE_SAVE(paravirt_ret0), + .irq_disable = __PV_IS_CALLEE_SAVE(paravirt_nop), + .irq_enable = __PV_IS_CALLEE_SAVE(paravirt_BUG), .safe_halt = xen_safe_halt, .halt = xen_halt, diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index cdbf4822f431..00354866921b 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -41,7 +41,6 @@ * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 */ #include <linux/sched/mm.h> -#include <linux/highmem.h> #include <linux/debugfs.h> #include <linux/bug.h> #include <linux/vmalloc.h> @@ -86,8 +85,10 @@ #include "mmu.h" #include "debugfs.h" +#ifdef CONFIG_X86_VSYSCALL_EMULATION /* l3 pud for userspace vsyscall mapping */ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; +#endif /* * Protects atomic reservation decrease/increase against concurrent increases. @@ -241,9 +242,11 @@ static void xen_set_pmd(pmd_t *ptr, pmd_t val) * Associate a virtual page frame with a given physical page frame * and protection flags for that frame. */ -void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) +void __init set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) { - set_pte_vaddr(vaddr, mfn_pte(mfn, flags)); + if (HYPERVISOR_update_va_mapping(vaddr, mfn_pte(mfn, flags), + UVMF_INVLPG)) + BUG(); } static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) @@ -789,7 +792,9 @@ static void __init xen_mark_pinned(struct mm_struct *mm, struct page *page, static void __init xen_after_bootmem(void) { static_branch_enable(&xen_struct_pages_ready); +#ifdef CONFIG_X86_VSYSCALL_EMULATION SetPagePinned(virt_to_page(level3_user_vsyscall)); +#endif xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); } @@ -1192,6 +1197,13 @@ static void __init xen_pagetable_p2m_setup(void) static void __init xen_pagetable_init(void) { + /* + * The majority of further PTE writes is to pagetables already + * announced as such to Xen. Hence it is more efficient to use + * hypercalls for these updates. + */ + pv_ops.mmu.set_pte = __xen_set_pte; + paging_init(); xen_post_allocator_init(); @@ -1421,10 +1433,18 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) * * Many of these PTE updates are done on unpinned and writable pages * and doing a hypercall for these is unnecessary and expensive. At - * this point it is not possible to tell if a page is pinned or not, - * so always write the PTE directly and rely on Xen trapping and + * this point it is rarely possible to tell if a page is pinned, so + * mostly write the PTE directly and rely on Xen trapping and * emulating any updates as necessary. */ +static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) +{ + if (unlikely(is_early_ioremap_ptep(ptep))) + __xen_set_pte(ptep, pte); + else + native_set_pte(ptep, pte); +} + __visible pte_t xen_make_pte_init(pteval_t pte) { unsigned long pfn; @@ -1446,11 +1466,6 @@ __visible pte_t xen_make_pte_init(pteval_t pte) } PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init); -static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) -{ - __xen_set_pte(ptep, pte); -} - /* Early in boot, while setting up the initial pagetable, assume everything is pinned. */ static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) @@ -1750,7 +1765,6 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) set_page_prot(init_top_pgt, PAGE_KERNEL_RO); set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); @@ -1767,6 +1781,13 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* Unpin Xen-provided one */ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); +#ifdef CONFIG_X86_VSYSCALL_EMULATION + /* Pin user vsyscall L3 */ + set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, + PFN_DOWN(__pa_symbol(level3_user_vsyscall))); +#endif + /* * At this stage there can be no user pgd, and no page structure to * attach it to, so make sure we just set kernel pgd. @@ -1999,6 +2020,7 @@ static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss; static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) { pte_t pte; + unsigned long vaddr; phys >>= PAGE_SHIFT; @@ -2039,15 +2061,15 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) break; } - __native_set_fixmap(idx, pte); + vaddr = __fix_to_virt(idx); + if (HYPERVISOR_update_va_mapping(vaddr, pte, UVMF_INVLPG)) + BUG(); #ifdef CONFIG_X86_VSYSCALL_EMULATION /* Replicate changes to map the vsyscall page into the user pagetable vsyscall mapping. */ - if (idx == VSYSCALL_PAGE) { - unsigned long vaddr = __fix_to_virt(idx); + if (idx == VSYSCALL_PAGE) set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); - } #endif } diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index f387fc7e5250..af216feb63d9 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -306,10 +306,6 @@ static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn) BUG(); } - /* Update kernel mapping, but not for highmem. */ - if (pfn >= PFN_UP(__pa(high_memory - 1))) - return; - if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT), mfn_pte(mfn, PAGE_KERNEL), 0)) { WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n", @@ -429,13 +425,13 @@ static unsigned long __init xen_set_identity_and_remap_chunk( } /* - * If the PFNs are currently mapped, the VA mapping also needs - * to be updated to be 1:1. + * If the PFNs are currently mapped, their VA mappings need to be + * zapped. */ for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) (void)HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), - mfn_pte(pfn, PAGE_KERNEL_IO), 0); + native_make_pte(0), 0); return remap_pfn; } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index c1b2f764b29a..c3e1f9a7d43a 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -121,34 +121,10 @@ int xen_smp_intr_init(unsigned int cpu) void __init xen_smp_cpus_done(unsigned int max_cpus) { - int cpu, rc, count = 0; - if (xen_hvm_domain()) native_smp_cpus_done(max_cpus); else calculate_max_logical_packages(); - - if (xen_have_vcpu_info_placement) - return; - - for_each_online_cpu(cpu) { - if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) - continue; - - rc = remove_cpu(cpu); - - if (rc == 0) { - /* - * Reset vcpu_info so this cpu cannot be onlined again. - */ - xen_vcpu_info_reset(cpu); - count++; - } else { - pr_warn("%s: failed to bring CPU %d down, error %d\n", - __func__, cpu, rc); - } - } - WARN(count, "%s: brought %d CPUs offline\n", __func__, count); } void xen_smp_send_reschedule(int cpu) @@ -268,20 +244,16 @@ void xen_send_IPI_allbutself(int vector) static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) { - irq_enter(); generic_smp_call_function_interrupt(); inc_irq_stat(irq_call_count); - irq_exit(); return IRQ_HANDLED; } static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) { - irq_enter(); generic_smp_call_function_single_interrupt(); inc_irq_stat(irq_call_count); - irq_exit(); return IRQ_HANDLED; } diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 7ed56c6075b0..9e55bcbfcd33 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -458,10 +458,8 @@ static void xen_pv_stop_other_cpus(int wait) static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id) { - irq_enter(); irq_work_run(); inc_irq_stat(apic_irq_work_irqs); - irq_exit(); return IRQ_HANDLED; } diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 9e27b86a0c31..6a64496edefb 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -45,13 +45,13 @@ SYM_CODE_START(startup_xen) /* Clear .bss */ xor %eax,%eax - mov $__bss_start, %_ASM_DI - mov $__bss_stop, %_ASM_CX - sub %_ASM_DI, %_ASM_CX - shr $__ASM_SEL(2, 3), %_ASM_CX - rep __ASM_SIZE(stos) + mov $__bss_start, %rdi + mov $__bss_stop, %rcx + sub %rdi, %rcx + shr $3, %rcx + rep stosq - mov %_ASM_SI, xen_start_info + mov %rsi, xen_start_info mov initial_stack(%rip), %rsp /* Set up %gs. diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 8bc8b72a205d..fd0fec6e92f4 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -76,9 +76,7 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id); bool xen_vcpu_stolen(int vcpu); -extern int xen_have_vcpu_info_placement; - -int xen_vcpu_setup(int cpu); +void xen_vcpu_setup(int cpu); void xen_vcpu_info_reset(int cpu); void xen_setup_vcpu_info_placement(void); |