From 421caf5e2ba931f8ed84c4edc03e2cf484aa5e8b Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 11 Jun 2020 10:32:38 +0800 Subject: x86/mce/inject: Fix a wrong assignment of i_mce.status [ Upstream commit 5d7f7d1d5e01c22894dee7c9c9266500478dca99 ] The original code is a nop as i_mce.status is or'ed with part of itself, fix it. Fixes: a1300e505297 ("x86/ras/mce_amd_inj: Trigger deferred and thresholding errors interrupts") Signed-off-by: Zhenzhong Duan Signed-off-by: Borislav Petkov Acked-by: Yazen Ghannam Link: https://lkml.kernel.org/r/20200611023238.3830-1-zhenzhong.duan@gmail.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/mce/inject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 0593b192eb8f..7843ab3fde09 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -511,7 +511,7 @@ static void do_inject(void) */ if (inj_type == DFR_INT_INJ) { i_mce.status |= MCI_STATUS_DEFERRED; - i_mce.status |= (i_mce.status & ~MCI_STATUS_UC); + i_mce.status &= ~MCI_STATUS_UC; } /* -- cgit v1.2.3 From 2b611e8adc574f4cb735c4e56a1489328f6516ae Mon Sep 17 00:00:00 2001 From: Giovanni Gherdovich Date: Sun, 31 May 2020 20:24:51 +0200 Subject: x86, sched: check for counters overflow in frequency invariant accounting [ Upstream commit e2b0d619b400ae326f954a018a1d65d736c237c5 ] The product mcnt * arch_max_freq_ratio can overflows u64. For context, a large value for arch_max_freq_ratio would be 5000, corresponding to a turbo_freq/base_freq ratio of 5 (normally it's more like 1500-2000). A large increment frequency for the MPERF counter would be 5GHz (the base clock of all CPUs on the market today is less than that). With these figures, a CPU would need to go without a scheduler tick for around 8 days for the u64 overflow to happen. It is unlikely, but the check is warranted. Under similar conditions, the difference acnt of two consecutive APERF readings can overflow as well. In these circumstances is appropriate to disable frequency invariant accounting: the feature relies on measures of the clock frequency done at every scheduler tick, which need to be "fresh" to be at all meaningful. A note on i386: prior to version 5.1, the GCC compiler didn't have the builtin function __builtin_mul_overflow. In these GCC versions the macro check_mul_overflow needs __udivdi3() to do (u64)a/b, which the kernel doesn't provide. For this reason this change fails to build on i386 if GCC<5.1, and we protect the entire frequency invariant code behind CONFIG_X86_64 (special thanks to "kbuild test robot" ). Fixes: 1567c3e3467c ("x86, sched: Add support for frequency invariance") Signed-off-by: Giovanni Gherdovich Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Rafael J. Wysocki Link: https://lkml.kernel.org/r/20200531182453.15254-2-ggherdovich@suse.cz Signed-off-by: Sasha Levin --- arch/x86/include/asm/topology.h | 2 +- arch/x86/kernel/smpboot.c | 33 ++++++++++++++++++++++++++++----- 2 files changed, 29 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 79d8d5496330..f4234575f3fd 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -193,7 +193,7 @@ static inline void sched_clear_itmt_support(void) } #endif /* CONFIG_SCHED_MC_PRIO */ -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && defined(CONFIG_X86_64) #include DECLARE_STATIC_KEY_FALSE(arch_scale_freq_key); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ffbd9a3d78d8..18d292fc466c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -1777,6 +1778,7 @@ void native_play_dead(void) #endif +#ifdef CONFIG_X86_64 /* * APERF/MPERF frequency ratio computation. * @@ -2048,11 +2050,19 @@ static void init_freq_invariance(bool secondary) } } +static void disable_freq_invariance_workfn(struct work_struct *work) +{ + static_branch_disable(&arch_scale_freq_key); +} + +static DECLARE_WORK(disable_freq_invariance_work, + disable_freq_invariance_workfn); + DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE; void arch_scale_freq_tick(void) { - u64 freq_scale; + u64 freq_scale = SCHED_CAPACITY_SCALE; u64 aperf, mperf; u64 acnt, mcnt; @@ -2064,19 +2074,32 @@ void arch_scale_freq_tick(void) acnt = aperf - this_cpu_read(arch_prev_aperf); mcnt = mperf - this_cpu_read(arch_prev_mperf); - if (!mcnt) - return; this_cpu_write(arch_prev_aperf, aperf); this_cpu_write(arch_prev_mperf, mperf); - acnt <<= 2*SCHED_CAPACITY_SHIFT; - mcnt *= arch_max_freq_ratio; + if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt)) + goto error; + + if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt) + goto error; freq_scale = div64_u64(acnt, mcnt); + if (!freq_scale) + goto error; if (freq_scale > SCHED_CAPACITY_SCALE) freq_scale = SCHED_CAPACITY_SCALE; this_cpu_write(arch_freq_scale, freq_scale); + return; + +error: + pr_warn("Scheduler frequency invariance went wobbly, disabling!\n"); + schedule_work(&disable_freq_invariance_work); +} +#else +static inline void init_freq_invariance(bool secondary) +{ } +#endif /* CONFIG_X86_64 */ -- cgit v1.2.3 From 6ddc271a3a55a0077f9d703523ead1700b867419 Mon Sep 17 00:00:00 2001 From: Giovanni Gherdovich Date: Sun, 31 May 2020 20:24:52 +0200 Subject: x86, sched: Bail out of frequency invariance if turbo frequency is unknown [ Upstream commit 51beea8862a3095559862df39554f05042e1195b ] There may be CPUs that support turbo boost but don't declare any turbo ratio, i.e. their MSR_TURBO_RATIO_LIMIT is all zeroes. In that condition scale-invariant calculations can't be performed. Fixes: 1567c3e3467c ("x86, sched: Add support for frequency invariance") Suggested-by: Ricardo Neri Signed-off-by: Giovanni Gherdovich Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Rafael J. Wysocki Tested-by: Ricardo Neri Link: https://lkml.kernel.org/r/20200531182453.15254-3-ggherdovich@suse.cz Signed-off-by: Sasha Levin --- arch/x86/kernel/smpboot.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 18d292fc466c..20e1cea262e4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -2002,9 +2002,11 @@ out: /* * Some hypervisors advertise X86_FEATURE_APERFMPERF * but then fill all MSR's with zeroes. + * Some CPUs have turbo boost but don't declare any turbo ratio + * in MSR_TURBO_RATIO_LIMIT. */ - if (!base_freq) { - pr_debug("Couldn't determine cpu base frequency, necessary for scale-invariant accounting.\n"); + if (!base_freq || !turbo_freq) { + pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n"); return false; } -- cgit v1.2.3 From f08ce56b56fe3ad18708ff0b5784ddb93e47ddb2 Mon Sep 17 00:00:00 2001 From: Giovanni Gherdovich Date: Sun, 31 May 2020 20:24:53 +0200 Subject: x86, sched: Bail out of frequency invariance if turbo_freq/base_freq gives 0 [ Upstream commit f4291df103315a696f0b8c4f45ca8ae773c17441 ] Be defensive against the case where the processor reports a base_freq larger than turbo_freq (the ratio would be zero). Fixes: 1567c3e3467c ("x86, sched: Add support for frequency invariance") Signed-off-by: Giovanni Gherdovich Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Rafael J. Wysocki Link: https://lkml.kernel.org/r/20200531182453.15254-4-ggherdovich@suse.cz Signed-off-by: Sasha Levin --- arch/x86/kernel/smpboot.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 20e1cea262e4..518ac6bf752e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1977,6 +1977,7 @@ static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) static bool intel_set_max_freq_ratio(void) { u64 base_freq, turbo_freq; + u64 turbo_ratio; if (slv_set_max_freq_ratio(&base_freq, &turbo_freq)) goto out; @@ -2010,9 +2011,15 @@ out: return false; } - arch_turbo_freq_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, - base_freq); + turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq); + if (!turbo_ratio) { + pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n"); + return false; + } + + arch_turbo_freq_ratio = turbo_ratio; arch_set_max_freq_ratio(turbo_disabled()); + return true; } -- cgit v1.2.3 From 335e8039fdddd2c8b9257393ea71ff5b64fcedac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 14 Aug 2020 11:16:17 -0700 Subject: x86/fsgsbase/64: Fix NULL deref in 86_fsgsbase_read_task [ Upstream commit 8ab49526b53d3172d1d8dd03a75c7d1f5bd21239 ] syzbot found its way in 86_fsgsbase_read_task() and triggered this oops: KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] CPU: 0 PID: 6866 Comm: syz-executor262 Not tainted 5.8.0-syzkaller #0 RIP: 0010:x86_fsgsbase_read_task+0x16d/0x310 arch/x86/kernel/process_64.c:393 Call Trace: putreg32+0x3ab/0x530 arch/x86/kernel/ptrace.c:876 genregs32_set arch/x86/kernel/ptrace.c:1026 [inline] genregs32_set+0xa4/0x100 arch/x86/kernel/ptrace.c:1006 copy_regset_from_user include/linux/regset.h:326 [inline] ia32_arch_ptrace arch/x86/kernel/ptrace.c:1061 [inline] compat_arch_ptrace+0x36c/0xd90 arch/x86/kernel/ptrace.c:1198 __do_compat_sys_ptrace kernel/ptrace.c:1420 [inline] __se_compat_sys_ptrace kernel/ptrace.c:1389 [inline] __ia32_compat_sys_ptrace+0x220/0x2f0 kernel/ptrace.c:1389 do_syscall_32_irqs_on arch/x86/entry/common.c:84 [inline] __do_fast_syscall_32+0x57/0x80 arch/x86/entry/common.c:126 do_fast_syscall_32+0x2f/0x70 arch/x86/entry/common.c:149 entry_SYSENTER_compat_after_hwframe+0x4d/0x5c This can happen if ptrace() or sigreturn() pokes an LDT selector into FS or GS for a task with no LDT and something tries to read the base before a return to usermode notices the bad selector and fixes it. The fix is to make sure ldt pointer is not NULL. Fixes: 07e1d88adaae ("x86/fsgsbase/64: Fix ptrace() to read the FS/GS base accurately") Co-developed-by: Jann Horn Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Andy Lutomirski Cc: Chang S. Bae Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Markus T Metzger Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Rik van Riel Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- arch/x86/kernel/process_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 9a97415b2139..3ebc70bd01e8 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -314,7 +314,7 @@ static unsigned long x86_fsgsbase_read_task(struct task_struct *task, */ mutex_lock(&task->mm->context.lock); ldt = task->mm->context.ldt; - if (unlikely(idx >= ldt->nr_entries)) + if (unlikely(!ldt || idx >= ldt->nr_entries)) base = 0; else base = get_desc_base(ldt->entries + idx); -- cgit v1.2.3 From 6573d3c1f4024429b46d3e98020ae869e9219810 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Tue, 21 Jul 2020 14:26:09 -0600 Subject: irqdomain/treewide: Free firmware node after domain removal commit ec0160891e387f4771f953b888b1fe951398e5d9 upstream. Commit 711419e504eb ("irqdomain: Add the missing assignment of domain->fwnode for named fwnode") unintentionally caused a dangling pointer page fault issue on firmware nodes that were freed after IRQ domain allocation. Commit e3beca48a45b fixed that dangling pointer issue by only freeing the firmware node after an IRQ domain allocation failure. That fix no longer frees the firmware node immediately, but leaves the firmware node allocated after the domain is removed. The firmware node must be kept around through irq_domain_remove, but should be freed it afterwards. Add the missing free operations after domain removal where where appropriate. Fixes: e3beca48a45b ("irqdomain/treewide: Keep firmware node unconditionally allocated") Signed-off-by: Jon Derrick Signed-off-by: Thomas Gleixner Reviewed-by: Andy Shevchenko Acked-by: Bjorn Helgaas # drivers/pci Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1595363169-7157-1-git-send-email-jonathan.derrick@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/mips/pci/pci-xtalk-bridge.c | 3 +++ arch/x86/kernel/apic/io_apic.c | 5 +++++ drivers/iommu/intel/irq_remapping.c | 8 ++++++++ drivers/mfd/ioc3.c | 6 ++++++ drivers/pci/controller/vmd.c | 3 +++ 5 files changed, 25 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c index 5958217861b8..9b3cc775c55e 100644 --- a/arch/mips/pci/pci-xtalk-bridge.c +++ b/arch/mips/pci/pci-xtalk-bridge.c @@ -728,6 +728,7 @@ err_free_resource: pci_free_resource_list(&host->windows); err_remove_domain: irq_domain_remove(domain); + irq_domain_free_fwnode(fn); return err; } @@ -735,8 +736,10 @@ static int bridge_remove(struct platform_device *pdev) { struct pci_bus *bus = platform_get_drvdata(pdev); struct bridge_controller *bc = BRIDGE_CONTROLLER(bus); + struct fwnode_handle *fn = bc->domain->fwnode; irq_domain_remove(bc->domain); + irq_domain_free_fwnode(fn); pci_lock_rescan_remove(); pci_stop_root_bus(bus); pci_remove_root_bus(bus); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 81ffcfbfaef2..21325a4a78b9 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2335,8 +2335,13 @@ static int mp_irqdomain_create(int ioapic) static void ioapic_destroy_irqdomain(int idx) { + struct ioapic_domain_cfg *cfg = &ioapics[idx].irqdomain_cfg; + struct fwnode_handle *fn = ioapics[idx].irqdomain->fwnode; + if (ioapics[idx].irqdomain) { irq_domain_remove(ioapics[idx].irqdomain); + if (!cfg->dev) + irq_domain_free_fwnode(fn); ioapics[idx].irqdomain = NULL; } } diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 9564d23d094f..aa096b333a99 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -628,13 +628,21 @@ out_free_table: static void intel_teardown_irq_remapping(struct intel_iommu *iommu) { + struct fwnode_handle *fn; + if (iommu && iommu->ir_table) { if (iommu->ir_msi_domain) { + fn = iommu->ir_msi_domain->fwnode; + irq_domain_remove(iommu->ir_msi_domain); + irq_domain_free_fwnode(fn); iommu->ir_msi_domain = NULL; } if (iommu->ir_domain) { + fn = iommu->ir_domain->fwnode; + irq_domain_remove(iommu->ir_domain); + irq_domain_free_fwnode(fn); iommu->ir_domain = NULL; } free_pages((unsigned long)iommu->ir_table->base, diff --git a/drivers/mfd/ioc3.c b/drivers/mfd/ioc3.c index 74cee7cb0afc..d939ccc46509 100644 --- a/drivers/mfd/ioc3.c +++ b/drivers/mfd/ioc3.c @@ -616,7 +616,10 @@ static int ioc3_mfd_probe(struct pci_dev *pdev, /* Remove all already added MFD devices */ mfd_remove_devices(&ipd->pdev->dev); if (ipd->domain) { + struct fwnode_handle *fn = ipd->domain->fwnode; + irq_domain_remove(ipd->domain); + irq_domain_free_fwnode(fn); free_irq(ipd->domain_irq, (void *)ipd); } pci_iounmap(pdev, regs); @@ -643,7 +646,10 @@ static void ioc3_mfd_remove(struct pci_dev *pdev) /* Release resources */ mfd_remove_devices(&ipd->pdev->dev); if (ipd->domain) { + struct fwnode_handle *fn = ipd->domain->fwnode; + irq_domain_remove(ipd->domain); + irq_domain_free_fwnode(fn); free_irq(ipd->domain_irq, (void *)ipd); } pci_iounmap(pdev, ipd->regs); diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index 9a64cf90c291..ebec0a6e77ed 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -560,6 +560,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) if (!vmd->bus) { pci_free_resource_list(&resources); irq_domain_remove(vmd->irq_domain); + irq_domain_free_fwnode(fn); return -ENODEV; } @@ -673,6 +674,7 @@ static void vmd_cleanup_srcu(struct vmd_dev *vmd) static void vmd_remove(struct pci_dev *dev) { struct vmd_dev *vmd = pci_get_drvdata(dev); + struct fwnode_handle *fn = vmd->irq_domain->fwnode; sysfs_remove_link(&vmd->dev->dev.kobj, "domain"); pci_stop_root_bus(vmd->bus); @@ -680,6 +682,7 @@ static void vmd_remove(struct pci_dev *dev) vmd_cleanup_srcu(vmd); vmd_detach_resources(vmd); irq_domain_remove(vmd->irq_domain); + irq_domain_free_fwnode(fn); } #ifdef CONFIG_PM_SLEEP -- cgit v1.2.3 From 2ca0b460bbcb0f6ed10fdda297f049aae2c95ba8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 24 Jul 2020 22:44:41 +0200 Subject: genirq/affinity: Make affinity setting if activated opt-in commit f0c7baca180046824e07fc5f1326e83a8fd150c7 upstream. John reported that on a RK3288 system the perf per CPU interrupts are all affine to CPU0 and provided the analysis: "It looks like what happens is that because the interrupts are not per-CPU in the hardware, armpmu_request_irq() calls irq_force_affinity() while the interrupt is deactivated and then request_irq() with IRQF_PERCPU | IRQF_NOBALANCING. Now when irq_startup() runs with IRQ_STARTUP_NORMAL, it calls irq_setup_affinity() which returns early because IRQF_PERCPU and IRQF_NOBALANCING are set, leaving the interrupt on its original CPU." This was broken by the recent commit which blocked interrupt affinity setting in hardware before activation of the interrupt. While this works in general, it does not work for this particular case. As contrary to the initial analysis not all interrupt chip drivers implement an activate callback, the safe cure is to make the deferred interrupt affinity setting at activation time opt-in. Implement the necessary core logic and make the two irqchip implementations for which this is required opt-in. In hindsight this would have been the right thing to do, but ... Fixes: baedb87d1b53 ("genirq/affinity: Handle affinity setting on inactive interrupts correctly") Reported-by: John Keeping Signed-off-by: Thomas Gleixner Tested-by: Marc Zyngier Acked-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/87blk4tzgm.fsf@nanos.tec.linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/vector.c | 4 ++++ drivers/irqchip/irq-gic-v3-its.c | 5 ++++- include/linux/irq.h | 13 +++++++++++++ kernel/irq/manage.c | 6 +++++- 4 files changed, 26 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 7649da2478d8..dae32d948bf2 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -560,6 +560,10 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, * as that can corrupt the affinity move state. */ irqd_set_handle_enforce_irqctx(irqd); + + /* Don't invoke affinity setter on deactivated interrupts */ + irqd_set_affinity_on_activate(irqd); + /* * Legacy vectors are already assigned when the IOAPIC * takes them over. They stay on the same vector. This is diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index da44bfa48bc2..4c3c60964424 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3523,6 +3523,7 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, msi_alloc_info_t *info = args; struct its_device *its_dev = info->scratchpad[0].ptr; struct its_node *its = its_dev->its; + struct irq_data *irqd; irq_hw_number_t hwirq; int err; int i; @@ -3542,7 +3543,9 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i, &its_irq_chip, its_dev); - irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(virq + i))); + irqd = irq_get_irq_data(virq + i); + irqd_set_single_target(irqd); + irqd_set_affinity_on_activate(irqd); pr_debug("ID:%d pID:%d vID:%d\n", (int)(hwirq + i - its_dev->event_map.lpi_base), (int)(hwirq + i), virq + i); diff --git a/include/linux/irq.h b/include/linux/irq.h index 8d5bc2c237d7..1b7f4dfee35b 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -213,6 +213,8 @@ struct irq_data { * required * IRQD_HANDLE_ENFORCE_IRQCTX - Enforce that handle_irq_*() is only invoked * from actual interrupt context. + * IRQD_AFFINITY_ON_ACTIVATE - Affinity is set on activation. Don't call + * irq_chip::irq_set_affinity() when deactivated. */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -237,6 +239,7 @@ enum { IRQD_CAN_RESERVE = (1 << 26), IRQD_MSI_NOMASK_QUIRK = (1 << 27), IRQD_HANDLE_ENFORCE_IRQCTX = (1 << 28), + IRQD_AFFINITY_ON_ACTIVATE = (1 << 29), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -421,6 +424,16 @@ static inline bool irqd_msi_nomask_quirk(struct irq_data *d) return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK; } +static inline void irqd_set_affinity_on_activate(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_AFFINITY_ON_ACTIVATE; +} + +static inline bool irqd_affinity_on_activate(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_AFFINITY_ON_ACTIVATE; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 2a9fec53e159..48c38e09c673 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -320,12 +320,16 @@ static bool irq_set_affinity_deactivated(struct irq_data *data, struct irq_desc *desc = irq_data_to_desc(data); /* + * Handle irq chips which can handle affinity only in activated + * state correctly + * * If the interrupt is not yet activated, just store the affinity * mask and do not call the chip driver at all. On activation the * driver has to make sure anyway that the interrupt is in a * useable state so startup works. */ - if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || irqd_is_activated(data)) + if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || + irqd_is_activated(data) || !irqd_affinity_on_activate(data)) return false; cpumask_copy(desc->irq_common_data.affinity, mask); -- cgit v1.2.3 From bff77b49db3ea98fabfbe8efbb89fc54afabb84b Mon Sep 17 00:00:00 2001 From: Dilip Kota Date: Mon, 3 Aug 2020 15:56:36 +0800 Subject: x86/tsr: Fix tsc frequency enumeration bug on Lightning Mountain SoC [ Upstream commit 7d98585860d845e36ee612832a5ff021f201dbaf ] Frequency descriptor of Lightning Mountain SoC doesn't have all the frequency entries so resulting in the below failure causing a kernel hang: Error MSR_FSB_FREQ index 15 is unknown tsc: Fast TSC calibration failed So, add all the frequency entries in the Lightning Mountain SoC frequency descriptor. Fixes: 0cc5359d8fd45 ("x86/cpu: Update init data for new Airmont CPU model") Fixes: 812c2d7506fd ("x86/tsc_msr: Use named struct initializers") Signed-off-by: Dilip Kota Signed-off-by: Ingo Molnar Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/211c643ae217604b46cbec43a2c0423946dc7d2d.1596440057.git.eswara.kota@linux.intel.com Signed-off-by: Sasha Levin --- arch/x86/kernel/tsc_msr.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 4fec6f3a1858..a654a9b4b77c 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -133,10 +133,15 @@ static const struct freq_desc freq_desc_ann = { .mask = 0x0f, }; -/* 24 MHz crystal? : 24 * 13 / 4 = 78 MHz */ +/* + * 24 MHz crystal? : 24 * 13 / 4 = 78 MHz + * Frequency step for Lightning Mountain SoC is fixed to 78 MHz, + * so all the frequency entries are 78000. + */ static const struct freq_desc freq_desc_lgm = { .use_msr_plat = true, - .freqs = { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 }, + .freqs = { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000, + 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 }, .mask = 0x0f, }; -- cgit v1.2.3 From 8dfe0e66582c402d122c87d630a0a439bfa1706b Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Thu, 16 Jul 2020 12:23:59 -0700 Subject: x86/bugs/multihit: Fix mitigation reporting when VMX is not in use [ Upstream commit f29dfa53cc8ae6ad93bae619bcc0bf45cab344f7 ] On systems that have virtualization disabled or unsupported, sysfs mitigation for X86_BUG_ITLB_MULTIHIT is reported incorrectly as: $ cat /sys/devices/system/cpu/vulnerabilities/itlb_multihit KVM: Vulnerable System is not vulnerable to DoS attack from a rogue guest when virtualization is disabled or unsupported in the hardware. Change the mitigation reporting for these cases. Fixes: b8e8c8303ff2 ("kvm: mmu: ITLB_MULTIHIT mitigation") Reported-by: Nelson Dsouza Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Pawan Gupta Signed-off-by: Ingo Molnar Reviewed-by: Tony Luck Acked-by: Thomas Gleixner Link: https://lore.kernel.org/r/0ba029932a816179b9d14a30db38f0f11ef1f166.1594925782.git.pawan.kumar.gupta@linux.intel.com Signed-off-by: Sasha Levin --- Documentation/admin-guide/hw-vuln/multihit.rst | 4 ++++ arch/x86/kernel/cpu/bugs.c | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/Documentation/admin-guide/hw-vuln/multihit.rst b/Documentation/admin-guide/hw-vuln/multihit.rst index ba9988d8bce5..140e4cec38c3 100644 --- a/Documentation/admin-guide/hw-vuln/multihit.rst +++ b/Documentation/admin-guide/hw-vuln/multihit.rst @@ -80,6 +80,10 @@ The possible values in this file are: - The processor is not vulnerable. * - KVM: Mitigation: Split huge pages - Software changes mitigate this issue. + * - KVM: Mitigation: VMX unsupported + - KVM is not vulnerable because Virtual Machine Extensions (VMX) is not supported. + * - KVM: Mitigation: VMX disabled + - KVM is not vulnerable because Virtual Machine Extensions (VMX) is disabled. * - KVM: Vulnerable - The processor is vulnerable, but no mitigation enabled diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 0b71970d2d3d..b0802d45abd3 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "cpu.h" @@ -1556,7 +1557,12 @@ static ssize_t l1tf_show_state(char *buf) static ssize_t itlb_multihit_show_state(char *buf) { - if (itlb_multihit_kvm_mitigation) + if (!boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || + !boot_cpu_has(X86_FEATURE_VMX)) + return sprintf(buf, "KVM: Mitigation: VMX unsupported\n"); + else if (!(cr4_read_shadow() & X86_CR4_VMXE)) + return sprintf(buf, "KVM: Mitigation: VMX disabled\n"); + else if (itlb_multihit_kvm_mitigation) return sprintf(buf, "KVM: Mitigation: Split huge pages\n"); else return sprintf(buf, "KVM: Vulnerable\n"); -- cgit v1.2.3