summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorJae Hyun Yoo <jae.hyun.yoo@linux.intel.com>2021-10-01 21:38:40 +0300
committerJae Hyun Yoo <jae.hyun.yoo@linux.intel.com>2021-10-01 21:40:21 +0300
commit9c881021a269af242594e2dfc79f1c4701404887 (patch)
treec8ec14f412d7ea35009b2dee08770082ddbb5c6e /arch/x86
parente9479d98b87227b8b7502c4c1e778887b23799f1 (diff)
parentcf06e1ab1c3ed354da5873e646f2164fea147c88 (diff)
downloadlinux-dev-5.10-intel.tar.xz
Merge branch 'dev-5.10' into dev-5.10-inteldev-5.10-intel
Pull 5.10.67 stable from OpenBMC upstream. Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/events/amd/ibs.c9
-rw-r--r--arch/x86/events/amd/iommu.c47
-rw-r--r--arch/x86/events/amd/power.c1
-rw-r--r--arch/x86/events/intel/pt.c2
-rw-r--r--arch/x86/events/intel/uncore_snbep.c2
-rw-r--r--arch/x86/include/asm/mce.h1
-rw-r--r--arch/x86/kernel/cpu/mce/core.c11
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c9
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c6
-rw-r--r--arch/x86/kernel/reboot.c3
-rw-r--r--arch/x86/kvm/mmu/mmu.c6
-rw-r--r--arch/x86/kvm/vmx/nested.c7
-rw-r--r--arch/x86/kvm/vmx/vmx.c3
-rw-r--r--arch/x86/kvm/x86.c66
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--arch/x86/xen/p2m.c4
16 files changed, 115 insertions, 64 deletions
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 40669eac9d6d..ccc9ee1971e8 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -90,6 +90,7 @@ struct perf_ibs {
unsigned long offset_mask[1];
int offset_max;
unsigned int fetch_count_reset_broken : 1;
+ unsigned int fetch_ignore_if_zero_rip : 1;
struct cpu_perf_ibs __percpu *pcpu;
struct attribute **format_attrs;
@@ -570,6 +571,7 @@ static struct perf_ibs perf_ibs_op = {
.start = perf_ibs_start,
.stop = perf_ibs_stop,
.read = perf_ibs_read,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
},
.msr = MSR_AMD64_IBSOPCTL,
.config_mask = IBS_OP_CONFIG_MASK,
@@ -672,6 +674,10 @@ fail:
if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
regs.flags &= ~PERF_EFLAGS_EXACT;
} else {
+ /* Workaround for erratum #1197 */
+ if (perf_ibs->fetch_ignore_if_zero_rip && !(ibs_data.regs[1]))
+ goto out;
+
set_linear_ip(&regs, ibs_data.regs[1]);
regs.flags |= PERF_EFLAGS_EXACT;
}
@@ -769,6 +775,9 @@ static __init void perf_event_ibs_init(void)
if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18)
perf_ibs_fetch.fetch_count_reset_broken = 1;
+ if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10)
+ perf_ibs_fetch.fetch_ignore_if_zero_rip = 1;
+
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
if (ibs_caps & IBS_CAPS_OPCNT) {
diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index 6a98a7651621..2da6139b0977 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -18,8 +18,6 @@
#include "../perf_event.h"
#include "iommu.h"
-#define COUNTER_SHIFT 16
-
/* iommu pmu conf masks */
#define GET_CSOURCE(x) ((x)->conf & 0xFFULL)
#define GET_DEVID(x) (((x)->conf >> 8) & 0xFFFFULL)
@@ -285,22 +283,31 @@ static void perf_iommu_start(struct perf_event *event, int flags)
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
hwc->state = 0;
+ /*
+ * To account for power-gating, which prevents write to
+ * the counter, we need to enable the counter
+ * before setting up counter register.
+ */
+ perf_iommu_enable_event(event);
+
if (flags & PERF_EF_RELOAD) {
- u64 prev_raw_count = local64_read(&hwc->prev_count);
+ u64 count = 0;
struct amd_iommu *iommu = perf_event_2_iommu(event);
+ /*
+ * Since the IOMMU PMU only support counting mode,
+ * the counter always start with value zero.
+ */
amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
- IOMMU_PC_COUNTER_REG, &prev_raw_count);
+ IOMMU_PC_COUNTER_REG, &count);
}
- perf_iommu_enable_event(event);
perf_event_update_userpage(event);
-
}
static void perf_iommu_read(struct perf_event *event)
{
- u64 count, prev, delta;
+ u64 count;
struct hw_perf_event *hwc = &event->hw;
struct amd_iommu *iommu = perf_event_2_iommu(event);
@@ -311,14 +318,11 @@ static void perf_iommu_read(struct perf_event *event)
/* IOMMU pc counter register is only 48 bits */
count &= GENMASK_ULL(47, 0);
- prev = local64_read(&hwc->prev_count);
- if (local64_cmpxchg(&hwc->prev_count, prev, count) != prev)
- return;
-
- /* Handle 48-bit counter overflow */
- delta = (count << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
- delta >>= COUNTER_SHIFT;
- local64_add(delta, &event->count);
+ /*
+ * Since the counter always start with value zero,
+ * simply just accumulate the count for the event.
+ */
+ local64_add(count, &event->count);
}
static void perf_iommu_stop(struct perf_event *event, int flags)
@@ -328,15 +332,16 @@ static void perf_iommu_stop(struct perf_event *event, int flags)
if (hwc->state & PERF_HES_UPTODATE)
return;
+ /*
+ * To account for power-gating, in which reading the counter would
+ * return zero, we need to read the register before disabling.
+ */
+ perf_iommu_read(event);
+ hwc->state |= PERF_HES_UPTODATE;
+
perf_iommu_disable_event(event);
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
-
- if (hwc->state & PERF_HES_UPTODATE)
- return;
-
- perf_iommu_read(event);
- hwc->state |= PERF_HES_UPTODATE;
}
static int perf_iommu_add(struct perf_event *event, int flags)
diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c
index 16a2369c586e..37d5b380516e 100644
--- a/arch/x86/events/amd/power.c
+++ b/arch/x86/events/amd/power.c
@@ -213,6 +213,7 @@ static struct pmu pmu_class = {
.stop = pmu_event_stop,
.read = pmu_event_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .module = THIS_MODULE,
};
static int power_cpu_exit(unsigned int cpu)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index e94af4a54d0d..37129b76135a 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -62,7 +62,7 @@ static struct pt_cap_desc {
PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)),
PT_CAP(output_subsys, 0, CPUID_ECX, BIT(3)),
PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)),
- PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x3),
+ PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x7),
PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000),
PT_CAP(cycle_thresholds, 1, CPUID_EBX, 0xffff),
PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000),
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 9c936d06fb61..2701f87a9a7c 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -4669,7 +4669,7 @@ static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box,
return;
pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword);
- addr = (pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23;
+ addr = ((resource_size_t)pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23;
pci_read_config_dword(pdev, mem_offset, &pci_dword);
addr |= (pci_dword & SNR_IMC_MMIO_MEM0_MASK) << 12;
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index fc25c88c7ff2..9b5ff423e939 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -259,6 +259,7 @@ enum mcp_flags {
MCP_TIMESTAMP = BIT(0), /* log time stamp */
MCP_UC = BIT(1), /* log uncorrected errors */
MCP_DONTLOG = BIT(2), /* only clear, don't log */
+ MCP_QUEUE_LOG = BIT(3), /* only queue to genpool */
};
bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index b7a27589dfa0..056d0367864e 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -817,7 +817,10 @@ log_it:
if (mca_cfg.dont_log_ce && !mce_usable_address(&m))
goto clear_it;
- mce_log(&m);
+ if (flags & MCP_QUEUE_LOG)
+ mce_gen_pool_add(&m);
+ else
+ mce_log(&m);
clear_it:
/*
@@ -1628,10 +1631,12 @@ static void __mcheck_cpu_init_generic(void)
m_fl = MCP_DONTLOG;
/*
- * Log the machine checks left over from the previous reset.
+ * Log the machine checks left over from the previous reset. Log them
+ * only, do not start processing them. That will happen in mcheck_late_init()
+ * when all consumers have been registered on the notifier chain.
*/
bitmap_fill(all_banks, MAX_NR_BANKS);
- machine_check_poll(MCP_UC | m_fl, &all_banks);
+ machine_check_poll(MCP_UC | MCP_QUEUE_LOG | m_fl, &all_banks);
cr4_set_bits(X86_CR4_MCE);
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 6cc50ab07bde..65d11711cd7b 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -322,8 +322,6 @@ static void __init ms_hyperv_init_platform(void)
if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) {
wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, 0x1);
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
- } else {
- mark_tsc_unstable("running on Hyper-V");
}
/*
@@ -382,6 +380,13 @@ static void __init ms_hyperv_init_platform(void)
/* Register Hyper-V specific clocksource */
hv_init_clocksource();
#endif
+ /*
+ * TSC should be marked as unstable only after Hyper-V
+ * clocksource has been initialized. This ensures that the
+ * stability of the sched_clock is not altered.
+ */
+ if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT))
+ mark_tsc_unstable("running on Hyper-V");
}
const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 3075624723b2..576f16a505e3 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -241,6 +241,12 @@ static u64 __mon_event_count(u32 rmid, struct rmid_read *rr)
case QOS_L3_MBM_LOCAL_EVENT_ID:
m = &rr->d->mbm_local[rmid];
break;
+ default:
+ /*
+ * Code would never reach here because an invalid
+ * event id would fail the __rmid_read.
+ */
+ return RMID_VAL_ERROR;
}
if (rr->first) {
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index b29657b76e3f..798a6f73f894 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -388,10 +388,11 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = {
},
{ /* Handle problems with rebooting on the OptiPlex 990. */
.callback = set_pci_reboot,
- .ident = "Dell OptiPlex 990",
+ .ident = "Dell OptiPlex 990 BIOS A0x",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"),
+ DMI_MATCH(DMI_BIOS_VERSION, "A0"),
},
},
{ /* Handle problems with rebooting on Dell 300's */
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 5e25d93ec7d0..060d9a906535 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -267,12 +267,6 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception)
{
- /* Check if guest physical address doesn't exceed guest maximum */
- if (kvm_vcpu_is_illegal_gpa(vcpu, gpa)) {
- exception->error_code |= PFERR_RSVD_MASK;
- return UNMAPPED_GVA;
- }
-
return gpa;
}
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index e0c7910207c0..d5f24a2f3e91 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2243,12 +2243,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
~PIN_BASED_VMX_PREEMPTION_TIMER);
/* Posted interrupts setting is only taken from vmcs12. */
- if (nested_cpu_has_posted_intr(vmcs12)) {
+ vmx->nested.pi_pending = false;
+ if (nested_cpu_has_posted_intr(vmcs12))
vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
- vmx->nested.pi_pending = false;
- } else {
+ else
exec_control &= ~PIN_BASED_POSTED_INTR;
- }
pin_controls_set(vmx, exec_control);
/*
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index de24d3826788..fcd8bcb7e0ea 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6396,6 +6396,9 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ if (vmx->emulation_required)
+ return;
+
if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT)
handle_external_interrupt_irqoff(vcpu);
else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6ab42cdcb8a4..75c59ad27e9f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3116,6 +3116,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!msr_info->host_initiated) {
s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
adjust_tsc_offset_guest(vcpu, adj);
+ /* Before back to guest, tsc_timestamp must be adjusted
+ * as well, otherwise guest's percpu pvclock time could jump.
+ */
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
}
vcpu->arch.ia32_tsc_adjust_msr = data;
}
@@ -7023,6 +7027,11 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
+ ctxt->interruptibility = 0;
+ ctxt->have_exception = false;
+ ctxt->exception.vector = -1;
+ ctxt->perm_ok = false;
+
init_decode_cache(ctxt);
vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
}
@@ -7338,6 +7347,37 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
return false;
}
+/*
+ * Decode to be emulated instruction. Return EMULATION_OK if success.
+ */
+int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
+ void *insn, int insn_len)
+{
+ int r = EMULATION_OK;
+ struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
+
+ init_emulate_ctxt(vcpu);
+
+ /*
+ * We will reenter on the same instruction since we do not set
+ * complete_userspace_io. This does not handle watchpoints yet,
+ * those would be handled in the emulate_ops.
+ */
+ if (!(emulation_type & EMULTYPE_SKIP) &&
+ kvm_vcpu_check_breakpoint(vcpu, &r))
+ return r;
+
+ ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
+
+ r = x86_decode_insn(ctxt, insn, insn_len);
+
+ trace_kvm_emulate_insn_start(vcpu);
+ ++vcpu->stat.insn_emulation;
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(x86_decode_emulated_instruction);
+
int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int emulation_type, void *insn, int insn_len)
{
@@ -7357,32 +7397,12 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
*/
write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
vcpu->arch.write_fault_to_shadow_pgtable = false;
- kvm_clear_exception_queue(vcpu);
if (!(emulation_type & EMULTYPE_NO_DECODE)) {
- init_emulate_ctxt(vcpu);
-
- /*
- * We will reenter on the same instruction since
- * we do not set complete_userspace_io. This does not
- * handle watchpoints yet, those would be handled in
- * the emulate_ops.
- */
- if (!(emulation_type & EMULTYPE_SKIP) &&
- kvm_vcpu_check_breakpoint(vcpu, &r))
- return r;
-
- ctxt->interruptibility = 0;
- ctxt->have_exception = false;
- ctxt->exception.vector = -1;
- ctxt->perm_ok = false;
-
- ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
-
- r = x86_decode_insn(ctxt, insn, insn_len);
+ kvm_clear_exception_queue(vcpu);
- trace_kvm_emulate_insn_start(vcpu);
- ++vcpu->stat.insn_emulation;
+ r = x86_decode_emulated_instruction(vcpu, emulation_type,
+ insn, insn_len);
if (r != EMULATION_OK) {
if ((emulation_type & EMULTYPE_TRAP_UD) ||
(emulation_type & EMULTYPE_TRAP_UD_FORCED)) {
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 2249a7d7ca27..2bff44f1efec 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -272,6 +272,8 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
int page_num);
bool kvm_vector_hashing_enabled(void);
void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code);
+int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
+ void *insn, int insn_len);
int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int emulation_type, void *insn, int insn_len);
fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 56e0f290fef6..e809f1446846 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -618,8 +618,8 @@ int xen_alloc_p2m_entry(unsigned long pfn)
}
/* Expanded the p2m? */
- if (pfn > xen_p2m_last_pfn) {
- xen_p2m_last_pfn = pfn;
+ if (pfn >= xen_p2m_last_pfn) {
+ xen_p2m_last_pfn = ALIGN(pfn + 1, P2M_PER_PAGE);
HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn;
}