diff options
Diffstat (limited to 'arch/x86')
30 files changed, 234 insertions, 57 deletions
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 6915ff2bd996..8774cb23064f 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -26,7 +26,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 -KBUILD_CFLAGS += -fno-strict-aliasing -fPIC +KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC) KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING cflags-$(CONFIG_X86_32) := -march=i386 cflags-$(CONFIG_X86_64) := -mcmodel=small @@ -40,6 +40,18 @@ GCOV_PROFILE := n UBSAN_SANITIZE :=n LDFLAGS := -m elf_$(UTS_MACHINE) +ifeq ($(CONFIG_RELOCATABLE),y) +# If kernel is relocatable, build compressed kernel as PIE. +ifeq ($(CONFIG_X86_32),y) +LDFLAGS += $(call ld-option, -pie) $(call ld-option, --no-dynamic-linker) +else +# To build 64-bit compressed kernel as PIE, we disable relocation +# overflow check to avoid relocation overflow error with a new linker +# command-line option, -z noreloc-overflow. +LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \ + && echo "-z noreloc-overflow -pie --no-dynamic-linker") +endif +endif LDFLAGS_vmlinux := -T hostprogs-y := mkpiggy diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 8ef964ddc18e..0256064da8da 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -31,6 +31,34 @@ #include <asm/asm-offsets.h> #include <asm/bootparam.h> +/* + * The 32-bit x86 assembler in binutils 2.26 will generate R_386_GOT32X + * relocation to get the symbol address in PIC. When the compressed x86 + * kernel isn't built as PIC, the linker optimizes R_386_GOT32X + * relocations to their fixed symbol addresses. However, when the + * compressed x86 kernel is loaded at a different address, it leads + * to the following load failure: + * + * Failed to allocate space for phdrs + * + * during the decompression stage. + * + * If the compressed x86 kernel is relocatable at run-time, it should be + * compiled with -fPIE, instead of -fPIC, if possible and should be built as + * Position Independent Executable (PIE) so that linker won't optimize + * R_386_GOT32X relocation to its fixed symbol address. Older + * linkers generate R_386_32 relocations against locally defined symbols, + * _bss, _ebss, _got and _egot, in PIE. It isn't wrong, just less + * optimal than R_386_RELATIVE. But the x86 kernel fails to properly handle + * R_386_32 relocations when relocating the kernel. To generate + * R_386_RELATIVE relocations, we mark _bss, _ebss, _got and _egot as + * hidden: + */ + .hidden _bss + .hidden _ebss + .hidden _got + .hidden _egot + __HEAD ENTRY(startup_32) #ifdef CONFIG_EFI_STUB diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index b0c0d16ef58d..86558a199139 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -33,6 +33,14 @@ #include <asm/asm-offsets.h> #include <asm/bootparam.h> +/* + * Locally defined symbols should be marked hidden: + */ + .hidden _bss + .hidden _ebss + .hidden _got + .hidden _egot + __HEAD .code32 ENTRY(startup_32) diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index a8a0224fa0f8..081255cea1ee 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c @@ -453,10 +453,10 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, req = cast_mcryptd_ctx_to_req(req_ctx); if (irqs_disabled()) - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); else { local_bh_disable(); - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); local_bh_enable(); } } diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 86a9bec18dab..bd3e8421b57c 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -115,7 +115,7 @@ static __initconst const u64 amd_hw_cache_event_ids /* * AMD Performance Monitor K7 and later. */ -static const u64 amd_perfmon_event_map[] = +static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 40625ca7a190..6011a573dd64 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -474,6 +474,7 @@ static __init int _init_perf_amd_iommu( static struct perf_amd_iommu __perf_iommu = { .pmu = { + .task_ctx_nr = perf_invalid_context, .event_init = perf_iommu_event_init, .add = perf_iommu_add, .del = perf_iommu_del, diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 68fa55b4d42e..a6fd4dbcf820 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3637,8 +3637,11 @@ __init int intel_pmu_init(void) pr_cont("Knights Landing events, "); break; + case 142: /* 14nm Kabylake Mobile */ + case 158: /* 14nm Kabylake Desktop */ case 78: /* 14nm Skylake Mobile */ case 94: /* 14nm Skylake Desktop */ + case 85: /* 14nm Skylake Server */ x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 6c3b7c1780c9..1ca5d1e7d4f2 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -63,7 +63,7 @@ static enum { #define LBR_PLM (LBR_KERNEL | LBR_USER) -#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */ +#define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */ #define LBR_NOT_SUPP -1 /* LBR filter not supported */ #define LBR_IGN 0 /* ignored */ @@ -610,8 +610,10 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate * in suppress mode. So LBR_SELECT should be set to * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK) + * But the 10th bit LBR_CALL_STACK does not operate + * in suppress mode. */ - reg->config = mask ^ x86_pmu.lbr_sel_mask; + reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK); if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 6af7cf71d6b2..09a77dbc73c9 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -136,9 +136,21 @@ static int __init pt_pmu_hw_init(void) struct dev_ext_attribute *de_attrs; struct attribute **attrs; size_t size; + u64 reg; int ret; long i; + if (boot_cpu_has(X86_FEATURE_VMX)) { + /* + * Intel SDM, 36.5 "Tracing post-VMXON" says that + * "IA32_VMX_MISC[bit 14]" being 1 means PT can trace + * post-VMXON. + */ + rdmsrl(MSR_IA32_VMX_MISC, reg); + if (reg & BIT(14)) + pt_pmu.vmx = true; + } + attrs = NULL; for (i = 0; i < PT_CPUID_LEAVES; i++) { @@ -269,20 +281,23 @@ static void pt_config(struct perf_event *event) reg |= (event->attr.config & PT_CONFIG_MASK); + event->hw.config = reg; wrmsrl(MSR_IA32_RTIT_CTL, reg); } -static void pt_config_start(bool start) +static void pt_config_stop(struct perf_event *event) { - u64 ctl; + u64 ctl = READ_ONCE(event->hw.config); + + /* may be already stopped by a PMI */ + if (!(ctl & RTIT_CTL_TRACEEN)) + return; - rdmsrl(MSR_IA32_RTIT_CTL, ctl); - if (start) - ctl |= RTIT_CTL_TRACEEN; - else - ctl &= ~RTIT_CTL_TRACEEN; + ctl &= ~RTIT_CTL_TRACEEN; wrmsrl(MSR_IA32_RTIT_CTL, ctl); + WRITE_ONCE(event->hw.config, ctl); + /* * A wrmsr that disables trace generation serializes other PT * registers and causes all data packets to be written to memory, @@ -291,8 +306,7 @@ static void pt_config_start(bool start) * The below WMB, separating data store and aux_head store matches * the consumer's RMB that separates aux_head load and data load. */ - if (!start) - wmb(); + wmb(); } static void pt_config_buffer(void *buf, unsigned int topa_idx, @@ -942,11 +956,17 @@ void intel_pt_interrupt(void) if (!ACCESS_ONCE(pt->handle_nmi)) return; - pt_config_start(false); + /* + * If VMX is on and PT does not support it, don't touch anything. + */ + if (READ_ONCE(pt->vmx_on)) + return; if (!event) return; + pt_config_stop(event); + buf = perf_get_aux(&pt->handle); if (!buf) return; @@ -983,6 +1003,35 @@ void intel_pt_interrupt(void) } } +void intel_pt_handle_vmx(int on) +{ + struct pt *pt = this_cpu_ptr(&pt_ctx); + struct perf_event *event; + unsigned long flags; + + /* PT plays nice with VMX, do nothing */ + if (pt_pmu.vmx) + return; + + /* + * VMXON will clear RTIT_CTL.TraceEn; we need to make + * sure to not try to set it while VMX is on. Disable + * interrupts to avoid racing with pmu callbacks; + * concurrent PMI should be handled fine. + */ + local_irq_save(flags); + WRITE_ONCE(pt->vmx_on, on); + + if (on) { + /* prevent pt_config_stop() from writing RTIT_CTL */ + event = pt->handle.event; + if (event) + event->hw.config = 0; + } + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(intel_pt_handle_vmx); + /* * PMU callbacks */ @@ -992,6 +1041,9 @@ static void pt_event_start(struct perf_event *event, int mode) struct pt *pt = this_cpu_ptr(&pt_ctx); struct pt_buffer *buf = perf_get_aux(&pt->handle); + if (READ_ONCE(pt->vmx_on)) + return; + if (!buf || pt_buffer_is_full(buf, pt)) { event->hw.state = PERF_HES_STOPPED; return; @@ -1014,7 +1066,8 @@ static void pt_event_stop(struct perf_event *event, int mode) * see comment in intel_pt_interrupt(). */ ACCESS_ONCE(pt->handle_nmi) = 0; - pt_config_start(false); + + pt_config_stop(event); if (event->hw.state == PERF_HES_STOPPED) return; diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h index 336878a5d205..3abb5f5cccc8 100644 --- a/arch/x86/events/intel/pt.h +++ b/arch/x86/events/intel/pt.h @@ -65,6 +65,7 @@ enum pt_capabilities { struct pt_pmu { struct pmu pmu; u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES]; + bool vmx; }; /** @@ -107,10 +108,12 @@ struct pt_buffer { * struct pt - per-cpu pt context * @handle: perf output handle * @handle_nmi: do handle PT PMI on this cpu, there's an active event + * @vmx_on: 1 if VMX is ON on this cpu */ struct pt { struct perf_output_handle handle; int handle_nmi; + int vmx_on; }; #endif /* __INTEL_PT_H__ */ diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 70c93f9b03ac..1705c9d75e44 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -718,6 +718,7 @@ static int __init rapl_pmu_init(void) break; case 60: /* Haswell */ case 69: /* Haswell-Celeron */ + case 70: /* Haswell GT3e */ case 61: /* Broadwell */ case 71: /* Broadwell-H */ rapl_cntr_mask = RAPL_IDX_HSW; diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index f8a29d2c97b0..e6a8613fbfb0 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -4,6 +4,7 @@ #include <asm/page.h> #include <asm-generic/hugetlb.h> +#define hugepages_supported() cpu_has_pse static inline int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 5a2ed3ed2f26..f353061bba1d 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -285,6 +285,10 @@ static inline void perf_events_lapic_init(void) { } static inline void perf_check_microcode(void) { } #endif +#ifdef CONFIG_CPU_SUP_INTEL + extern void intel_pt_handle_vmx(int on); +#endif + #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) extern void amd_pmu_enable_virt(void); extern void amd_pmu_disable_virt(void); diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index ad59d70bcb1a..ef495511f019 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -256,7 +256,8 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data) struct irq_desc *desc; int cpu, vector; - BUG_ON(!data->cfg.vector); + if (!data->cfg.vector) + return; vector = data->cfg.vector; for_each_cpu_and(cpu, data->domain, cpu_online_mask) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 8f4942e2bcbb..d7ce96a7daca 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -891,9 +891,7 @@ void __init uv_system_init(void) } pr_info("UV: Found %s hub\n", hub); - /* We now only need to map the MMRs on UV1 */ - if (is_uv1_hub()) - map_low_mmrs(); + map_low_mmrs(); m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); m_val = m_n_config.s.m_skt; diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c index 0a850100c594..2658e2af74ec 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c +++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c @@ -29,7 +29,7 @@ static char gen_pool_buf[MCE_POOLSZ]; void mce_gen_pool_process(void) { struct llist_node *head; - struct mce_evt_llist *node; + struct mce_evt_llist *node, *tmp; struct mce *mce; head = llist_del_all(&mce_event_llist); @@ -37,7 +37,7 @@ void mce_gen_pool_process(void) return; head = llist_reverse_order(head); - llist_for_each_entry(node, head, llnode) { + llist_for_each_entry_safe(node, tmp, head, llnode) { mce = &node->mce; atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node)); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 4e7c6933691c..10c11b4da31d 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -152,6 +152,11 @@ static struct clocksource hyperv_cs = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +static unsigned char hv_get_nmi_reason(void) +{ + return 0; +} + static void __init ms_hyperv_init_platform(void) { /* @@ -191,6 +196,13 @@ static void __init ms_hyperv_init_platform(void) machine_ops.crash_shutdown = hv_machine_crash_shutdown; #endif mark_tsc_unstable("running on Hyper-V"); + + /* + * Generation 2 instances don't support reading the NMI status from + * 0x61 port. + */ + if (efi_enabled(EFI_BOOT)) + x86_platform.get_nmi_reason = hv_get_nmi_reason; } const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 54cdbd2003fe..af1112980dd4 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -389,12 +389,6 @@ default_entry: /* Make changes effective */ wrmsr - /* - * And make sure that all the mappings we set up have NX set from - * the beginning. - */ - orl $(1 << (_PAGE_BIT_NX - 32)), pa(__supported_pte_mask + 4) - enable_paging: /* diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 319b08a5b6ed..2367ae07eb76 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -146,6 +146,31 @@ int default_check_phys_apicid_present(int phys_apicid) struct boot_params boot_params; +/* + * Machine setup.. + */ +static struct resource data_resource = { + .name = "Kernel data", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM +}; + +static struct resource code_resource = { + .name = "Kernel code", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM +}; + +static struct resource bss_resource = { + .name = "Kernel bss", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM +}; + + #ifdef CONFIG_X86_32 /* cpu data as detected by the assembly code in head.S */ struct cpuinfo_x86 new_cpu_data = { @@ -924,6 +949,13 @@ void __init setup_arch(char **cmdline_p) mpx_mm_init(&init_mm); + code_resource.start = __pa_symbol(_text); + code_resource.end = __pa_symbol(_etext)-1; + data_resource.start = __pa_symbol(_etext); + data_resource.end = __pa_symbol(_edata)-1; + bss_resource.start = __pa_symbol(__bss_start); + bss_resource.end = __pa_symbol(__bss_stop)-1; + #ifdef CONFIG_CMDLINE_BOOL #ifdef CONFIG_CMDLINE_OVERRIDE strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); @@ -987,6 +1019,11 @@ void __init setup_arch(char **cmdline_p) x86_init.resources.probe_roms(); + /* after parse_early_param, so could debug it */ + insert_resource(&iomem_resource, &code_resource); + insert_resource(&iomem_resource, &data_resource); + insert_resource(&iomem_resource, &bss_resource); + e820_add_kernel_range(); trim_bios_range(); #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c index b285d4e8c68e..5da924bbf0a0 100644 --- a/arch/x86/kernel/sysfb_efi.c +++ b/arch/x86/kernel/sysfb_efi.c @@ -106,14 +106,24 @@ static int __init efifb_set_system(const struct dmi_system_id *id) continue; for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { resource_size_t start, end; + unsigned long flags; + + flags = pci_resource_flags(dev, i); + if (!(flags & IORESOURCE_MEM)) + continue; + + if (flags & IORESOURCE_UNSET) + continue; + + if (pci_resource_len(dev, i) == 0) + continue; start = pci_resource_start(dev, i); - if (start == 0) - break; end = pci_resource_end(dev, i); if (screen_info.lfb_base >= start && screen_info.lfb_base < end) { found_bar = 1; + break; } } } diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 92ae6acac8a7..6aa0f4d9eea6 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -92,7 +92,7 @@ unsigned long try_msr_calibrate_tsc(void) if (freq_desc_tables[cpu_index].msr_plat) { rdmsr(MSR_PLATFORM_INFO, lo, hi); - ratio = (lo >> 8) & 0x1f; + ratio = (lo >> 8) & 0xff; } else { rdmsr(MSR_IA32_PERF_STATUS, lo, hi); ratio = (hi >> 8) & 0x1f; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 8efb839948e5..bbbaa802d13e 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -534,6 +534,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, do_cpuid_1_ent(&entry[i], function, idx); if (idx == 1) { entry[i].eax &= kvm_cpuid_D_1_eax_x86_features; + cpuid_mask(&entry[i].eax, CPUID_D_1_EAX); entry[i].ebx = 0; if (entry[i].eax & (F(XSAVES)|F(XSAVEC))) entry[i].ebx = diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1ff4dbb73fb7..b6f50e8b0a39 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2823,7 +2823,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, */ if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL && - PageTransCompound(pfn_to_page(pfn)) && + PageTransCompoundMap(pfn_to_page(pfn)) && !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) { unsigned long mask; /* @@ -4785,7 +4785,7 @@ restart: */ if (sp->role.direct && !kvm_is_reserved_pfn(pfn) && - PageTransCompound(pfn_to_page(pfn))) { + PageTransCompoundMap(pfn_to_page(pfn))) { drop_spte(kvm, sptep); need_tlb_flush = 1; goto restart; diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index b70df72e2b33..66b33b96a31b 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -173,10 +173,9 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, int index = (pfec >> 1) + (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1)); bool fault = (mmu->permissions[index] >> pte_access) & 1; + u32 errcode = PFERR_PRESENT_MASK; WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK)); - pfec |= PFERR_PRESENT_MASK; - if (unlikely(mmu->pkru_mask)) { u32 pkru_bits, offset; @@ -189,15 +188,15 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3; /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */ - offset = pfec - 1 + + offset = (pfec & ~1) + ((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT)); pkru_bits &= mmu->pkru_mask >> offset; - pfec |= -pkru_bits & PFERR_PK_MASK; + errcode |= -pkru_bits & PFERR_PK_MASK; fault |= (pkru_bits != 0); } - return -(uint32_t)fault & pfec; + return -(u32)fault & errcode; } void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 1d971c7553c3..bc019f70e0b6 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -360,7 +360,7 @@ retry_walk: goto error; if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) { - errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK; + errcode = PFERR_RSVD_MASK | PFERR_PRESENT_MASK; goto error; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ee1c8a93871c..133679d520af 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3103,6 +3103,8 @@ static __init int vmx_disabled_by_bios(void) static void kvm_cpu_vmxon(u64 addr) { + intel_pt_handle_vmx(1); + asm volatile (ASM_VMX_VMXON_RAX : : "a"(&addr), "m"(addr) : "memory", "cc"); @@ -3172,6 +3174,8 @@ static void vmclear_local_loaded_vmcss(void) static void kvm_cpu_vmxoff(void) { asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); + + intel_pt_handle_vmx(0); } static void hardware_disable(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0a2c70e43bc8..9b7798c7b210 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -700,7 +700,6 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512) return 1; } - kvm_put_guest_xcr0(vcpu); vcpu->arch.xcr0 = xcr0; if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND) @@ -6590,8 +6589,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops->prepare_guest_switch(vcpu); if (vcpu->fpu_active) kvm_load_guest_fpu(vcpu); - kvm_load_guest_xcr0(vcpu); - vcpu->mode = IN_GUEST_MODE; srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); @@ -6618,6 +6615,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto cancel_injection; } + kvm_load_guest_xcr0(vcpu); + if (req_immediate_exit) smp_send_reschedule(vcpu->cpu); @@ -6667,6 +6666,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); + kvm_put_guest_xcr0(vcpu); + /* Interrupt is enabled by handle_external_intr() */ kvm_x86_ops->handle_external_intr(vcpu); @@ -7314,7 +7315,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) * and assume host would use all available bits. * Guest xcr0 would be loaded later. */ - kvm_put_guest_xcr0(vcpu); vcpu->guest_fpu_loaded = 1; __kernel_fpu_begin(); __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state); @@ -7323,8 +7323,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { - kvm_put_guest_xcr0(vcpu); - if (!vcpu->guest_fpu_loaded) { vcpu->fpu_counter = 0; return; diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index 8bea84724a7d..f65a33f505b6 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c @@ -32,8 +32,9 @@ early_param("noexec", noexec_setup); void x86_configure_nx(void) { - /* If disable_nx is set, clear NX on all new mappings going forward. */ - if (disable_nx) + if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx) + __supported_pte_mask |= _PAGE_NX; + else __supported_pte_mask &= ~_PAGE_NX; } diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index a2433817c987..6a2f5691b1ab 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -43,40 +43,40 @@ void __init efi_bgrt_init(void) return; if (bgrt_tab->header.length < sizeof(*bgrt_tab)) { - pr_err("Ignoring BGRT: invalid length %u (expected %zu)\n", + pr_notice("Ignoring BGRT: invalid length %u (expected %zu)\n", bgrt_tab->header.length, sizeof(*bgrt_tab)); return; } if (bgrt_tab->version != 1) { - pr_err("Ignoring BGRT: invalid version %u (expected 1)\n", + pr_notice("Ignoring BGRT: invalid version %u (expected 1)\n", bgrt_tab->version); return; } if (bgrt_tab->status & 0xfe) { - pr_err("Ignoring BGRT: reserved status bits are non-zero %u\n", + pr_notice("Ignoring BGRT: reserved status bits are non-zero %u\n", bgrt_tab->status); return; } if (bgrt_tab->image_type != 0) { - pr_err("Ignoring BGRT: invalid image type %u (expected 0)\n", + pr_notice("Ignoring BGRT: invalid image type %u (expected 0)\n", bgrt_tab->image_type); return; } if (!bgrt_tab->image_address) { - pr_err("Ignoring BGRT: null image address\n"); + pr_notice("Ignoring BGRT: null image address\n"); return; } image = memremap(bgrt_tab->image_address, sizeof(bmp_header), MEMREMAP_WB); if (!image) { - pr_err("Ignoring BGRT: failed to map image header memory\n"); + pr_notice("Ignoring BGRT: failed to map image header memory\n"); return; } memcpy(&bmp_header, image, sizeof(bmp_header)); memunmap(image); if (bmp_header.id != 0x4d42) { - pr_err("Ignoring BGRT: Incorrect BMP magic number 0x%x (expected 0x4d42)\n", + pr_notice("Ignoring BGRT: Incorrect BMP magic number 0x%x (expected 0x4d42)\n", bmp_header.id); return; } @@ -84,14 +84,14 @@ void __init efi_bgrt_init(void) bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN); if (!bgrt_image) { - pr_err("Ignoring BGRT: failed to allocate memory for image (wanted %zu bytes)\n", + pr_notice("Ignoring BGRT: failed to allocate memory for image (wanted %zu bytes)\n", bgrt_image_size); return; } image = memremap(bgrt_tab->image_address, bmp_header.size, MEMREMAP_WB); if (!image) { - pr_err("Ignoring BGRT: failed to map image memory\n"); + pr_notice("Ignoring BGRT: failed to map image memory\n"); kfree(bgrt_image); bgrt_image = NULL; return; diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 9e2ba5c6e1dd..f42e78de1e10 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -27,6 +27,12 @@ static bool xen_pvspin = true; static void xen_qlock_kick(int cpu) { + int irq = per_cpu(lock_kicker_irq, cpu); + + /* Don't kick if the target's kicker interrupt is not initialized. */ + if (irq == -1) + return; + xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); } |