diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/entry/entry_64.S | 16 | ||||
-rw-r--r-- | arch/x86/include/asm/efi.h | 10 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/msr-index.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/apic/vector.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/irq_32.c | 19 | ||||
-rw-r--r-- | arch/x86/kernel/irq_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt.c | 16 | ||||
-rw-r--r-- | arch/x86/kernel/pci-dma.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 25 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 3 | ||||
-rw-r--r-- | arch/x86/lguest/boot.c | 2 | ||||
-rw-r--r-- | arch/x86/pci/common.c | 1 |
15 files changed, 78 insertions, 31 deletions
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d3033183ed70..055a01de7c8d 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1128,7 +1128,18 @@ END(error_exit) /* Runs on exception stack */ ENTRY(nmi) + /* + * Fix up the exception frame if we're on Xen. + * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most + * one value to the stack on native, so it may clobber the rdx + * scratch slot, but it won't clobber any of the important + * slots past it. + * + * Xen is a different story, because the Xen frame itself overlaps + * the "NMI executing" variable. + */ PARAVIRT_ADJUST_EXCEPTION_FRAME + /* * We allow breakpoints in NMIs. If a breakpoint occurs, then * the iretq it performs will take us out of NMI context. @@ -1179,9 +1190,12 @@ ENTRY(nmi) * we don't want to enable interrupts, because then we'll end * up in an awkward situation in which IRQs are on but NMIs * are off. + * + * We also must not push anything to the stack before switching + * stacks lest we corrupt the "NMI executing" variable. */ - SWAPGS + SWAPGS_UNSAFE_STACK cld movq %rsp, %rdx movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 155162ea0e00..ab5f1d447ef9 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -86,6 +86,16 @@ extern u64 asmlinkage efi_call(void *fp, ...); extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, u32 type, u64 attribute); +/* + * CONFIG_KASAN may redefine memset to __memset. __memset function is present + * only in kernel binary. Since the EFI stub linked into a separate binary it + * doesn't have __memset(). So we should use standard memset from + * arch/x86/boot/compressed/string.c. The same applies to memcpy and memmove. + */ +#undef memcpy +#undef memset +#undef memmove + #endif /* CONFIG_X86_32 */ extern struct efi_scratch efi_scratch; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c12e845f59e6..2beee0382088 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -40,6 +40,7 @@ #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define KVM_HALT_POLL_NS_DEFAULT 500000 #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS @@ -711,6 +712,7 @@ struct kvm_vcpu_stat { u32 nmi_window_exits; u32 halt_exits; u32 halt_successful_poll; + u32 halt_attempted_poll; u32 halt_wakeup; u32 request_irq_exits; u32 irq_exits; diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 54390bc140dd..b8c14bb7fc8f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -333,6 +333,7 @@ /* C1E active bits in int pending message */ #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 #define MSR_K8_TSEG_ADDR 0xc0010112 +#define MSR_K8_TSEG_MASK 0xc0010113 #define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ #define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ #define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 1bbd0fe2c806..836d11b92811 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -489,10 +489,8 @@ static int apic_set_affinity(struct irq_data *irq_data, err = assign_irq_vector(irq, data, dest); if (err) { - struct irq_data *top = irq_get_irq_data(irq); - if (assign_irq_vector(irq, data, - irq_data_get_affinity_mask(top))) + irq_data_get_affinity_mask(irq_data))) pr_err("Failed to recover vector for irq %d\n", irq); return err; } diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index c80cf6699678..38da8f29a9c8 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -68,11 +68,10 @@ static inline void *current_stack(void) return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); } -static inline int -execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) +static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) { struct irq_stack *curstk, *irqstk; - u32 *isp, *prev_esp, arg1, arg2; + u32 *isp, *prev_esp, arg1; curstk = (struct irq_stack *) current_stack(); irqstk = __this_cpu_read(hardirq_stack); @@ -98,8 +97,8 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) asm volatile("xchgl %%ebx,%%esp \n" "call *%%edi \n" "movl %%ebx,%%esp \n" - : "=a" (arg1), "=d" (arg2), "=b" (isp) - : "0" (irq), "1" (desc), "2" (isp), + : "=a" (arg1), "=b" (isp) + : "0" (desc), "1" (isp), "D" (desc->handle_irq) : "memory", "cc", "ecx"); return 1; @@ -150,19 +149,15 @@ void do_softirq_own_stack(void) bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) { - unsigned int irq; - int overflow; - - overflow = check_stack_overflow(); + int overflow = check_stack_overflow(); if (IS_ERR_OR_NULL(desc)) return false; - irq = irq_desc_get_irq(desc); - if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) { + if (user_mode(regs) || !execute_on_irq_stack(overflow, desc)) { if (unlikely(overflow)) print_stack_overflow(); - generic_handle_irq_desc(irq, desc); + generic_handle_irq_desc(desc); } return true; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index ff16ccb918f2..c767cf2bc80a 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -75,6 +75,6 @@ bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) if (unlikely(IS_ERR_OR_NULL(desc))) return false; - generic_handle_irq_desc(irq_desc_get_irq(desc), desc); + generic_handle_irq_desc(desc); return true; } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index f68e48f5f6c2..c2130aef3f9d 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -41,10 +41,18 @@ #include <asm/timer.h> #include <asm/special_insns.h> -/* nop stub */ -void _paravirt_nop(void) -{ -} +/* + * nop stub, which must not clobber anything *including the stack* to + * avoid confusing the entry prologues. + */ +extern void _paravirt_nop(void); +asm (".pushsection .entry.text, \"ax\"\n" + ".global _paravirt_nop\n" + "_paravirt_nop:\n\t" + "ret\n\t" + ".size _paravirt_nop, . - _paravirt_nop\n\t" + ".type _paravirt_nop, @function\n\t" + ".popsection"); /* identity function, which can be inlined */ u32 _paravirt_ident_32(u32 x) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 84b8ef82a159..1b55de1267cf 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -131,8 +131,8 @@ void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp) { - *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp); *gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp); if (!*dev) *dev = &x86_dma_fallback_dev; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 69088a1ba509..ff606f507913 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3322,7 +3322,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) break; reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte, - leaf); + iterator.level); } walk_shadow_page_lockless_end(vcpu); @@ -3614,7 +3614,7 @@ static void __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, struct rsvd_bits_validate *rsvd_check, int maxphyaddr, int level, bool nx, bool gbpages, - bool pse) + bool pse, bool amd) { u64 exb_bit_rsvd = 0; u64 gbpages_bit_rsvd = 0; @@ -3631,7 +3631,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for * leaf entries) on AMD CPUs only. */ - if (guest_cpuid_is_amd(vcpu)) + if (amd) nonleaf_bit8_rsvd = rsvd_bits(8, 8); switch (level) { @@ -3699,7 +3699,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check, cpuid_maxphyaddr(vcpu), context->root_level, context->nx, guest_cpuid_has_gbpages(vcpu), - is_pse(vcpu)); + is_pse(vcpu), guest_cpuid_is_amd(vcpu)); } static void @@ -3749,13 +3749,24 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { + /* + * Passing "true" to the last argument is okay; it adds a check + * on bit 8 of the SPTEs which KVM doesn't use anyway. + */ __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, boot_cpu_data.x86_phys_bits, context->shadow_root_level, context->nx, - guest_cpuid_has_gbpages(vcpu), is_pse(vcpu)); + guest_cpuid_has_gbpages(vcpu), is_pse(vcpu), + true); } EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask); +static inline bool boot_cpu_is_amd(void) +{ + WARN_ON_ONCE(!tdp_enabled); + return shadow_x_mask == 0; +} + /* * the direct page table on host, use as much mmu features as * possible, however, kvm currently does not do execution-protection. @@ -3764,11 +3775,11 @@ static void reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { - if (guest_cpuid_is_amd(vcpu)) + if (boot_cpu_is_amd()) __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, boot_cpu_data.x86_phys_bits, context->shadow_root_level, false, - cpu_has_gbpages, true); + cpu_has_gbpages, true, true); else __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, boot_cpu_data.x86_phys_bits, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index fdb8cb63a6c0..94b7d15db3fc 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -202,6 +202,7 @@ module_param(npt, int, S_IRUGO); static int nested = true; module_param(nested, int, S_IRUGO); +static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); static void svm_flush_tlb(struct kvm_vcpu *vcpu); static void svm_complete_interrupts(struct vcpu_svm *svm); @@ -1263,7 +1264,8 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. * It also updates the guest-visible cr0 value. */ - (void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); + svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); + kvm_mmu_reset_context(&svm->vcpu); save->cr4 = X86_CR4_PAE; /* rdx = ?? */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d01986832afc..64076740251e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6064,6 +6064,8 @@ static __init int hardware_setup(void) memcpy(vmx_msr_bitmap_longmode_x2apic, vmx_msr_bitmap_longmode, PAGE_SIZE); + set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ + if (enable_apicv) { for (msr = 0x800; msr <= 0x8ff; msr++) vmx_disable_intercept_msr_read_x2apic(msr); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a60bdbccff51..991466bf8dee 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -149,6 +149,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "nmi_window", VCPU_STAT(nmi_window_exits) }, { "halt_exits", VCPU_STAT(halt_exits) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, + { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "hypercalls", VCPU_STAT(hypercalls) }, { "request_irq", VCPU_STAT(request_irq_exits) }, @@ -2189,6 +2190,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_LASTINTFROMIP: case MSR_IA32_LASTINTTOIP: case MSR_K8_SYSCFG: + case MSR_K8_TSEG_ADDR: + case MSR_K8_TSEG_MASK: case MSR_K7_HWCR: case MSR_VM_HSAVE_PA: case MSR_K8_INT_PENDING_MSG: diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 161804de124a..a0d09f6c6533 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1015,7 +1015,7 @@ static struct clock_event_device lguest_clockevent = { * This is the Guest timer interrupt handler (hardware interrupt 0). We just * call the clockevent infrastructure and it does whatever needs doing. */ -static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) +static void lguest_time_irq(struct irq_desc *desc) { unsigned long flags; diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 09d3afc0a181..dc78a4a9a466 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -166,6 +166,7 @@ void pcibios_fixup_bus(struct pci_bus *b) { struct pci_dev *dev; + pci_read_bridge_bases(b); list_for_each_entry(dev, &b->devices, bus_list) pcibios_fixup_device_resources(dev); } |