diff options
author | Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> | 2021-10-01 21:38:40 +0300 |
---|---|---|
committer | Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> | 2021-10-01 21:40:21 +0300 |
commit | 9c881021a269af242594e2dfc79f1c4701404887 (patch) | |
tree | c8ec14f412d7ea35009b2dee08770082ddbb5c6e /arch/x86 | |
parent | e9479d98b87227b8b7502c4c1e778887b23799f1 (diff) | |
parent | cf06e1ab1c3ed354da5873e646f2164fea147c88 (diff) | |
download | linux-dev-5.10-intel.tar.xz |
Merge branch 'dev-5.10' into dev-5.10-inteldev-5.10-intel
Pull 5.10.67 stable from OpenBMC upstream.
Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/events/amd/ibs.c | 9 | ||||
-rw-r--r-- | arch/x86/events/amd/iommu.c | 47 | ||||
-rw-r--r-- | arch/x86/events/amd/power.c | 1 | ||||
-rw-r--r-- | arch/x86/events/intel/pt.c | 2 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snbep.c | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/mce.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/core.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mshyperv.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/monitor.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/reboot.c | 3 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 6 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 7 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 3 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 66 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 2 | ||||
-rw-r--r-- | arch/x86/xen/p2m.c | 4 |
16 files changed, 115 insertions, 64 deletions
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 40669eac9d6d..ccc9ee1971e8 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -90,6 +90,7 @@ struct perf_ibs { unsigned long offset_mask[1]; int offset_max; unsigned int fetch_count_reset_broken : 1; + unsigned int fetch_ignore_if_zero_rip : 1; struct cpu_perf_ibs __percpu *pcpu; struct attribute **format_attrs; @@ -570,6 +571,7 @@ static struct perf_ibs perf_ibs_op = { .start = perf_ibs_start, .stop = perf_ibs_stop, .read = perf_ibs_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }, .msr = MSR_AMD64_IBSOPCTL, .config_mask = IBS_OP_CONFIG_MASK, @@ -672,6 +674,10 @@ fail: if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { regs.flags &= ~PERF_EFLAGS_EXACT; } else { + /* Workaround for erratum #1197 */ + if (perf_ibs->fetch_ignore_if_zero_rip && !(ibs_data.regs[1])) + goto out; + set_linear_ip(®s, ibs_data.regs[1]); regs.flags |= PERF_EFLAGS_EXACT; } @@ -769,6 +775,9 @@ static __init void perf_event_ibs_init(void) if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18) perf_ibs_fetch.fetch_count_reset_broken = 1; + if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10) + perf_ibs_fetch.fetch_ignore_if_zero_rip = 1; + perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); if (ibs_caps & IBS_CAPS_OPCNT) { diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 6a98a7651621..2da6139b0977 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -18,8 +18,6 @@ #include "../perf_event.h" #include "iommu.h" -#define COUNTER_SHIFT 16 - /* iommu pmu conf masks */ #define GET_CSOURCE(x) ((x)->conf & 0xFFULL) #define GET_DEVID(x) (((x)->conf >> 8) & 0xFFFFULL) @@ -285,22 +283,31 @@ static void perf_iommu_start(struct perf_event *event, int flags) WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); hwc->state = 0; + /* + * To account for power-gating, which prevents write to + * the counter, we need to enable the counter + * before setting up counter register. + */ + perf_iommu_enable_event(event); + if (flags & PERF_EF_RELOAD) { - u64 prev_raw_count = local64_read(&hwc->prev_count); + u64 count = 0; struct amd_iommu *iommu = perf_event_2_iommu(event); + /* + * Since the IOMMU PMU only support counting mode, + * the counter always start with value zero. + */ amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, - IOMMU_PC_COUNTER_REG, &prev_raw_count); + IOMMU_PC_COUNTER_REG, &count); } - perf_iommu_enable_event(event); perf_event_update_userpage(event); - } static void perf_iommu_read(struct perf_event *event) { - u64 count, prev, delta; + u64 count; struct hw_perf_event *hwc = &event->hw; struct amd_iommu *iommu = perf_event_2_iommu(event); @@ -311,14 +318,11 @@ static void perf_iommu_read(struct perf_event *event) /* IOMMU pc counter register is only 48 bits */ count &= GENMASK_ULL(47, 0); - prev = local64_read(&hwc->prev_count); - if (local64_cmpxchg(&hwc->prev_count, prev, count) != prev) - return; - - /* Handle 48-bit counter overflow */ - delta = (count << COUNTER_SHIFT) - (prev << COUNTER_SHIFT); - delta >>= COUNTER_SHIFT; - local64_add(delta, &event->count); + /* + * Since the counter always start with value zero, + * simply just accumulate the count for the event. + */ + local64_add(count, &event->count); } static void perf_iommu_stop(struct perf_event *event, int flags) @@ -328,15 +332,16 @@ static void perf_iommu_stop(struct perf_event *event, int flags) if (hwc->state & PERF_HES_UPTODATE) return; + /* + * To account for power-gating, in which reading the counter would + * return zero, we need to read the register before disabling. + */ + perf_iommu_read(event); + hwc->state |= PERF_HES_UPTODATE; + perf_iommu_disable_event(event); WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); hwc->state |= PERF_HES_STOPPED; - - if (hwc->state & PERF_HES_UPTODATE) - return; - - perf_iommu_read(event); - hwc->state |= PERF_HES_UPTODATE; } static int perf_iommu_add(struct perf_event *event, int flags) diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c index 16a2369c586e..37d5b380516e 100644 --- a/arch/x86/events/amd/power.c +++ b/arch/x86/events/amd/power.c @@ -213,6 +213,7 @@ static struct pmu pmu_class = { .stop = pmu_event_stop, .read = pmu_event_read, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .module = THIS_MODULE, }; static int power_cpu_exit(unsigned int cpu) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index e94af4a54d0d..37129b76135a 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -62,7 +62,7 @@ static struct pt_cap_desc { PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)), PT_CAP(output_subsys, 0, CPUID_ECX, BIT(3)), PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)), - PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x3), + PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x7), PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000), PT_CAP(cycle_thresholds, 1, CPUID_EBX, 0xffff), PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000), diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 9c936d06fb61..2701f87a9a7c 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -4669,7 +4669,7 @@ static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box, return; pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword); - addr = (pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; + addr = ((resource_size_t)pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; pci_read_config_dword(pdev, mem_offset, &pci_dword); addr |= (pci_dword & SNR_IMC_MMIO_MEM0_MASK) << 12; diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index fc25c88c7ff2..9b5ff423e939 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -259,6 +259,7 @@ enum mcp_flags { MCP_TIMESTAMP = BIT(0), /* log time stamp */ MCP_UC = BIT(1), /* log uncorrected errors */ MCP_DONTLOG = BIT(2), /* only clear, don't log */ + MCP_QUEUE_LOG = BIT(3), /* only queue to genpool */ }; bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index b7a27589dfa0..056d0367864e 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -817,7 +817,10 @@ log_it: if (mca_cfg.dont_log_ce && !mce_usable_address(&m)) goto clear_it; - mce_log(&m); + if (flags & MCP_QUEUE_LOG) + mce_gen_pool_add(&m); + else + mce_log(&m); clear_it: /* @@ -1628,10 +1631,12 @@ static void __mcheck_cpu_init_generic(void) m_fl = MCP_DONTLOG; /* - * Log the machine checks left over from the previous reset. + * Log the machine checks left over from the previous reset. Log them + * only, do not start processing them. That will happen in mcheck_late_init() + * when all consumers have been registered on the notifier chain. */ bitmap_fill(all_banks, MAX_NR_BANKS); - machine_check_poll(MCP_UC | m_fl, &all_banks); + machine_check_poll(MCP_UC | MCP_QUEUE_LOG | m_fl, &all_banks); cr4_set_bits(X86_CR4_MCE); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 6cc50ab07bde..65d11711cd7b 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -322,8 +322,6 @@ static void __init ms_hyperv_init_platform(void) if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, 0x1); setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); - } else { - mark_tsc_unstable("running on Hyper-V"); } /* @@ -382,6 +380,13 @@ static void __init ms_hyperv_init_platform(void) /* Register Hyper-V specific clocksource */ hv_init_clocksource(); #endif + /* + * TSC should be marked as unstable only after Hyper-V + * clocksource has been initialized. This ensures that the + * stability of the sched_clock is not altered. + */ + if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)) + mark_tsc_unstable("running on Hyper-V"); } const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 3075624723b2..576f16a505e3 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -241,6 +241,12 @@ static u64 __mon_event_count(u32 rmid, struct rmid_read *rr) case QOS_L3_MBM_LOCAL_EVENT_ID: m = &rr->d->mbm_local[rmid]; break; + default: + /* + * Code would never reach here because an invalid + * event id would fail the __rmid_read. + */ + return RMID_VAL_ERROR; } if (rr->first) { diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index b29657b76e3f..798a6f73f894 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -388,10 +388,11 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = { }, { /* Handle problems with rebooting on the OptiPlex 990. */ .callback = set_pci_reboot, - .ident = "Dell OptiPlex 990", + .ident = "Dell OptiPlex 990 BIOS A0x", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"), + DMI_MATCH(DMI_BIOS_VERSION, "A0"), }, }, { /* Handle problems with rebooting on Dell 300's */ diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 5e25d93ec7d0..060d9a906535 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -267,12 +267,6 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte) static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, struct x86_exception *exception) { - /* Check if guest physical address doesn't exceed guest maximum */ - if (kvm_vcpu_is_illegal_gpa(vcpu, gpa)) { - exception->error_code |= PFERR_RSVD_MASK; - return UNMAPPED_GVA; - } - return gpa; } diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index e0c7910207c0..d5f24a2f3e91 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2243,12 +2243,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) ~PIN_BASED_VMX_PREEMPTION_TIMER); /* Posted interrupts setting is only taken from vmcs12. */ - if (nested_cpu_has_posted_intr(vmcs12)) { + vmx->nested.pi_pending = false; + if (nested_cpu_has_posted_intr(vmcs12)) vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; - vmx->nested.pi_pending = false; - } else { + else exec_control &= ~PIN_BASED_POSTED_INTR; - } pin_controls_set(vmx, exec_control); /* diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index de24d3826788..fcd8bcb7e0ea 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6396,6 +6396,9 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + if (vmx->emulation_required) + return; + if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT) handle_external_interrupt_irqoff(vcpu); else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6ab42cdcb8a4..75c59ad27e9f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3116,6 +3116,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated) { s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; adjust_tsc_offset_guest(vcpu, adj); + /* Before back to guest, tsc_timestamp must be adjusted + * as well, otherwise guest's percpu pvclock time could jump. + */ + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); } vcpu->arch.ia32_tsc_adjust_msr = data; } @@ -7023,6 +7027,11 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK); BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK); + ctxt->interruptibility = 0; + ctxt->have_exception = false; + ctxt->exception.vector = -1; + ctxt->perm_ok = false; + init_decode_cache(ctxt); vcpu->arch.emulate_regs_need_sync_from_vcpu = false; } @@ -7338,6 +7347,37 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt) return false; } +/* + * Decode to be emulated instruction. Return EMULATION_OK if success. + */ +int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type, + void *insn, int insn_len) +{ + int r = EMULATION_OK; + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; + + init_emulate_ctxt(vcpu); + + /* + * We will reenter on the same instruction since we do not set + * complete_userspace_io. This does not handle watchpoints yet, + * those would be handled in the emulate_ops. + */ + if (!(emulation_type & EMULTYPE_SKIP) && + kvm_vcpu_check_breakpoint(vcpu, &r)) + return r; + + ctxt->ud = emulation_type & EMULTYPE_TRAP_UD; + + r = x86_decode_insn(ctxt, insn, insn_len); + + trace_kvm_emulate_insn_start(vcpu); + ++vcpu->stat.insn_emulation; + + return r; +} +EXPORT_SYMBOL_GPL(x86_decode_emulated_instruction); + int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type, void *insn, int insn_len) { @@ -7357,32 +7397,12 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, */ write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable; vcpu->arch.write_fault_to_shadow_pgtable = false; - kvm_clear_exception_queue(vcpu); if (!(emulation_type & EMULTYPE_NO_DECODE)) { - init_emulate_ctxt(vcpu); - - /* - * We will reenter on the same instruction since - * we do not set complete_userspace_io. This does not - * handle watchpoints yet, those would be handled in - * the emulate_ops. - */ - if (!(emulation_type & EMULTYPE_SKIP) && - kvm_vcpu_check_breakpoint(vcpu, &r)) - return r; - - ctxt->interruptibility = 0; - ctxt->have_exception = false; - ctxt->exception.vector = -1; - ctxt->perm_ok = false; - - ctxt->ud = emulation_type & EMULTYPE_TRAP_UD; - - r = x86_decode_insn(ctxt, insn, insn_len); + kvm_clear_exception_queue(vcpu); - trace_kvm_emulate_insn_start(vcpu); - ++vcpu->stat.insn_emulation; + r = x86_decode_emulated_instruction(vcpu, emulation_type, + insn, insn_len); if (r != EMULATION_OK) { if ((emulation_type & EMULTYPE_TRAP_UD) || (emulation_type & EMULTYPE_TRAP_UD_FORCED)) { diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 2249a7d7ca27..2bff44f1efec 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -272,6 +272,8 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int page_num); bool kvm_vector_hashing_enabled(void); void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code); +int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type, + void *insn, int insn_len); int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type, void *insn, int insn_len); fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu); diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 56e0f290fef6..e809f1446846 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -618,8 +618,8 @@ int xen_alloc_p2m_entry(unsigned long pfn) } /* Expanded the p2m? */ - if (pfn > xen_p2m_last_pfn) { - xen_p2m_last_pfn = pfn; + if (pfn >= xen_p2m_last_pfn) { + xen_p2m_last_pfn = ALIGN(pfn + 1, P2M_PER_PAGE); HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn; } |