diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/crypto/Kconfig | 11 | ||||
-rw-r--r-- | arch/x86/entry/entry.S | 2 | ||||
-rw-r--r-- | arch/x86/events/core.c | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/cpufeatures.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/intel-family.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 30 | ||||
-rw-r--r-- | arch/x86/kernel/i8253.c | 3 | ||||
-rw-r--r-- | arch/x86/kvm/svm/avic.c | 66 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/posted_intr.c | 28 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 20 | ||||
-rw-r--r-- | arch/x86/lib/x86-opcode-map.txt | 4 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 6 | ||||
-rw-r--r-- | arch/x86/pci/xen.c | 8 | ||||
-rw-r--r-- | arch/x86/xen/enlighten_pvh.c | 19 |
14 files changed, 120 insertions, 82 deletions
diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig index 7b1bebed879d..46b53ab06165 100644 --- a/arch/x86/crypto/Kconfig +++ b/arch/x86/crypto/Kconfig @@ -3,10 +3,12 @@ menu "Accelerated Cryptographic Algorithms for CPU (x86)" config CRYPTO_CURVE25519_X86 - tristate "Public key crypto: Curve25519 (ADX)" + tristate depends on X86 && 64BIT + select CRYPTO_KPP select CRYPTO_LIB_CURVE25519_GENERIC select CRYPTO_ARCH_HAVE_LIB_CURVE25519 + default CRYPTO_LIB_CURVE25519_INTERNAL help Curve25519 algorithm @@ -348,11 +350,12 @@ config CRYPTO_ARIA_GFNI_AVX512_X86_64 Processes 64 blocks in parallel. config CRYPTO_CHACHA20_X86_64 - tristate "Ciphers: ChaCha20, XChaCha20, XChaCha12 (SSSE3/AVX2/AVX-512VL)" + tristate depends on X86 && 64BIT select CRYPTO_SKCIPHER select CRYPTO_LIB_CHACHA_GENERIC select CRYPTO_ARCH_HAVE_LIB_CHACHA + default CRYPTO_LIB_CHACHA_INTERNAL help Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12 stream cipher algorithms @@ -417,10 +420,12 @@ config CRYPTO_POLYVAL_CLMUL_NI - CLMUL-NI (carry-less multiplication new instructions) config CRYPTO_POLY1305_X86_64 - tristate "Hash functions: Poly1305 (SSE2/AVX2)" + tristate depends on X86 && 64BIT + select CRYPTO_HASH select CRYPTO_LIB_POLY1305_GENERIC select CRYPTO_ARCH_HAVE_LIB_POLY1305 + default CRYPTO_LIB_POLY1305_INTERNAL help Poly1305 authenticator algorithm (RFC7539) diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index b7ea3e8e9ecc..58e3124ee2b4 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -18,7 +18,7 @@ SYM_FUNC_START(entry_ibpb) movl $MSR_IA32_PRED_CMD, %ecx - movl $PRED_CMD_IBPB, %eax + movl _ASM_RIP(x86_pred_cmd), %eax xorl %edx, %edx wrmsr diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 0d33c85da453..d737d53d03aa 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -628,7 +628,7 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.type == event->pmu->type) event->hw.config |= x86_pmu_get_event_config(event); - if (!event->attr.freq && x86_pmu.limit_period) { + if (is_sampling_event(event) && !event->attr.freq && x86_pmu.limit_period) { s64 left = event->attr.sample_period; x86_pmu.limit_period(event, &left); if (left > event->attr.sample_period) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 913fd3a7bac6..64fa42175a15 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -449,6 +449,7 @@ #define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ #define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ #define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ +#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 1a42f829667a..62d8b9448dc5 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -115,6 +115,8 @@ #define INTEL_GRANITERAPIDS_X IFM(6, 0xAD) #define INTEL_GRANITERAPIDS_D IFM(6, 0xAE) +#define INTEL_BARTLETTLAKE IFM(6, 0xD7) /* Raptor Cove */ + /* "Hybrid" Processors (P-Core/E-Core) */ #define INTEL_LAKEFIELD IFM(6, 0x8A) /* Sunny Cove / Tremont */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 5fba44a4f988..46bddb5bb15f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1578,7 +1578,7 @@ static void __init spec_ctrl_disable_kernel_rrsba(void) rrsba_disabled = true; } -static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) +static void __init spectre_v2_select_rsb_mitigation(enum spectre_v2_mitigation mode) { /* * Similar to context switches, there are two types of RSB attacks @@ -1602,27 +1602,30 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_ */ switch (mode) { case SPECTRE_V2_NONE: - return; + break; - case SPECTRE_V2_EIBRS_LFENCE: case SPECTRE_V2_EIBRS: + case SPECTRE_V2_EIBRS_LFENCE: + case SPECTRE_V2_EIBRS_RETPOLINE: if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { - setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE); pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n"); + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE); } - return; + break; - case SPECTRE_V2_EIBRS_RETPOLINE: case SPECTRE_V2_RETPOLINE: case SPECTRE_V2_LFENCE: case SPECTRE_V2_IBRS: + pr_info("Spectre v2 / SpectreRSB: Filling RSB on context switch and VMEXIT\n"); + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); - pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n"); - return; - } + break; - pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit"); - dump_stack(); + default: + pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation\n"); + dump_stack(); + break; + } } /* @@ -1854,10 +1857,7 @@ static void __init spectre_v2_select_mitigation(void) * * FIXME: Is this pointless for retbleed-affected AMD? */ - setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); - pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); - - spectre_v2_determine_rsb_fill_type_at_vmexit(mode); + spectre_v2_select_rsb_mitigation(mode); /* * Retpoline protects the kernel, but doesn't protect firmware. IBRS diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 80e262bb627f..cb9852ad6098 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -46,7 +46,8 @@ bool __init pit_timer_init(void) * VMMs otherwise steal CPU time just to pointlessly waggle * the (masked) IRQ. */ - clockevent_i8253_disable(); + scoped_guard(irq) + clockevent_i8253_disable(); return false; } clockevent_i8253_init(true); diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 4b74ea91f4e6..63dea8ecd7ef 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -820,7 +820,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) * Allocating new amd_iommu_pi_data, which will get * add to the per-vcpu ir_list. */ - ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT); + ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_ATOMIC | __GFP_ACCOUNT); if (!ir) { ret = -ENOMEM; goto out; @@ -896,6 +896,7 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; + bool enable_remapped_mode = true; int idx, ret = 0; if (!kvm_arch_has_assigned_device(kvm) || @@ -933,6 +934,8 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, kvm_vcpu_apicv_active(&svm->vcpu)) { struct amd_iommu_pi_data pi; + enable_remapped_mode = false; + /* Try to enable guest_mode in IRTE */ pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK); @@ -951,33 +954,6 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, */ if (!ret && pi.is_guest_mode) svm_ir_list_add(svm, &pi); - } else { - /* Use legacy mode in IRTE */ - struct amd_iommu_pi_data pi; - - /** - * Here, pi is used to: - * - Tell IOMMU to use legacy mode for this interrupt. - * - Retrieve ga_tag of prior interrupt remapping data. - */ - pi.prev_ga_tag = 0; - pi.is_guest_mode = false; - ret = irq_set_vcpu_affinity(host_irq, &pi); - - /** - * Check if the posted interrupt was previously - * setup with the guest_mode by checking if the ga_tag - * was cached. If so, we need to clean up the per-vcpu - * ir_list. - */ - if (!ret && pi.prev_ga_tag) { - int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); - struct kvm_vcpu *vcpu; - - vcpu = kvm_get_vcpu_by_id(kvm, id); - if (vcpu) - svm_ir_list_del(to_svm(vcpu), &pi); - } } if (!ret && svm) { @@ -993,6 +969,34 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, } ret = 0; + if (enable_remapped_mode) { + /* Use legacy mode in IRTE */ + struct amd_iommu_pi_data pi; + + /** + * Here, pi is used to: + * - Tell IOMMU to use legacy mode for this interrupt. + * - Retrieve ga_tag of prior interrupt remapping data. + */ + pi.prev_ga_tag = 0; + pi.is_guest_mode = false; + ret = irq_set_vcpu_affinity(host_irq, &pi); + + /** + * Check if the posted interrupt was previously + * setup with the guest_mode by checking if the ga_tag + * was cached. If so, we need to clean up the per-vcpu + * ir_list. + */ + if (!ret && pi.prev_ga_tag) { + int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); + struct kvm_vcpu *vcpu; + + vcpu = kvm_get_vcpu_by_id(kvm, id); + if (vcpu) + svm_ir_list_del(to_svm(vcpu), &pi); + } + } out: srcu_read_unlock(&kvm->irq_srcu, idx); return ret; @@ -1199,6 +1203,12 @@ bool avic_hardware_setup(void) return false; } + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP) && + !boot_cpu_has(X86_FEATURE_HV_INUSE_WR_ALLOWED)) { + pr_warn("AVIC disabled: missing HvInUseWrAllowed on SNP-enabled system\n"); + return false; + } + if (boot_cpu_has(X86_FEATURE_AVIC)) { pr_info("AVIC enabled\n"); } else if (force_avic) { diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index ec08fa3caf43..6b803324a981 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -274,6 +274,7 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; + bool enable_remapped_mode = true; struct kvm_lapic_irq irq; struct kvm_vcpu *vcpu; struct vcpu_data vcpu_info; @@ -312,21 +313,8 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, kvm_set_msi_irq(kvm, e, &irq); if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) || - !kvm_irq_is_postable(&irq)) { - /* - * Make sure the IRTE is in remapped mode if - * we don't handle it in posted mode. - */ - ret = irq_set_vcpu_affinity(host_irq, NULL); - if (ret < 0) { - printk(KERN_INFO - "failed to back to remapped mode, irq: %u\n", - host_irq); - goto out; - } - + !kvm_irq_is_postable(&irq)) continue; - } vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); vcpu_info.vector = irq.vector; @@ -334,11 +322,12 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi, vcpu_info.vector, vcpu_info.pi_desc_addr, set); - if (set) - ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); - else - ret = irq_set_vcpu_affinity(host_irq, NULL); + if (!set) + continue; + enable_remapped_mode = false; + + ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); if (ret < 0) { printk(KERN_INFO "%s: failed to update PI IRTE\n", __func__); @@ -346,6 +335,9 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, } } + if (enable_remapped_mode) + ret = irq_set_vcpu_affinity(host_irq, NULL); + ret = 0; out: srcu_read_unlock(&kvm->irq_srcu, idx); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1a4ca471d63d..7a5367b14518 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -13555,15 +13555,22 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, { struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm *kvm = irqfd->kvm; int ret; - irqfd->producer = prod; kvm_arch_start_assignment(irqfd->kvm); + + spin_lock_irq(&kvm->irqfds.lock); + irqfd->producer = prod; + ret = kvm_x86_call(pi_update_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 1); if (ret) kvm_arch_end_assignment(irqfd->kvm); + spin_unlock_irq(&kvm->irqfds.lock); + + return ret; } @@ -13573,9 +13580,9 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, int ret; struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm *kvm = irqfd->kvm; WARN_ON(irqfd->producer != prod); - irqfd->producer = NULL; /* * When producer of consumer is unregistered, we change back to @@ -13583,12 +13590,18 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, * when the irq is masked/disabled or the consumer side (KVM * int this case doesn't want to receive the interrupts. */ + spin_lock_irq(&kvm->irqfds.lock); + irqfd->producer = NULL; + ret = kvm_x86_call(pi_update_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 0); if (ret) printk(KERN_INFO "irq bypass consumer (token %p) unregistration" " fails: %d\n", irqfd->consumer.token, ret); + spin_unlock_irq(&kvm->irqfds.lock); + + kvm_arch_end_assignment(irqfd->kvm); } @@ -13601,7 +13614,8 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old, struct kvm_kernel_irq_routing_entry *new) { - if (new->type != KVM_IRQ_ROUTING_MSI) + if (old->type != KVM_IRQ_ROUTING_MSI || + new->type != KVM_IRQ_ROUTING_MSI) return true; return !!memcmp(&old->msi, &new->msi, sizeof(new->msi)); diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index caedb3ef6688..f5dd84eb55dc 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -996,8 +996,8 @@ AVXcode: 4 83: Grp1 Ev,Ib (1A),(es) # CTESTSCC instructions are: CTESTB, CTESTBE, CTESTF, CTESTL, CTESTLE, CTESTNB, CTESTNBE, CTESTNL, # CTESTNLE, CTESTNO, CTESTNS, CTESTNZ, CTESTO, CTESTS, CTESTT, CTESTZ -84: CTESTSCC (ev) -85: CTESTSCC (es) | CTESTSCC (66),(es) +84: CTESTSCC Eb,Gb (ev) +85: CTESTSCC Ev,Gv (es) | CTESTSCC Ev,Gv (66),(es) 88: POPCNT Gv,Ev (es) | POPCNT Gv,Ev (66),(es) 8f: POP2 Bq,Rq (000),(11B),(ev) a5: SHLD Ev,Gv,CL (es) | SHLD Ev,Gv,CL (66),(es) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 00ffa74d0dd0..27d81cb049ff 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -389,9 +389,9 @@ static void cond_mitigation(struct task_struct *next) prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_spec); /* - * Avoid user/user BTB poisoning by flushing the branch predictor - * when switching between processes. This stops one process from - * doing Spectre-v2 attacks on another. + * Avoid user->user BTB/RSB poisoning by flushing them when switching + * between processes. This stops one process from doing Spectre-v2 + * attacks on another. * * Both, the conditional and the always IBPB mode use the mm * pointer to avoid the IBPB when switching between tasks of the diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 0f2fe524f60d..b8755cde2419 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -436,7 +436,8 @@ static struct msi_domain_ops xen_pci_msi_domain_ops = { }; static struct msi_domain_info xen_pci_msi_domain_info = { - .flags = MSI_FLAG_PCI_MSIX | MSI_FLAG_FREE_MSI_DESCS | MSI_FLAG_DEV_SYSFS, + .flags = MSI_FLAG_PCI_MSIX | MSI_FLAG_FREE_MSI_DESCS | + MSI_FLAG_DEV_SYSFS | MSI_FLAG_NO_MASK, .ops = &xen_pci_msi_domain_ops, }; @@ -484,11 +485,6 @@ static __init void xen_setup_pci_msi(void) * in allocating the native domain and never use it. */ x86_init.irqs.create_pci_msi_domain = xen_create_pci_msi_domain; - /* - * With XEN PIRQ/Eventchannels in use PCI/MSI[-X] masking is solely - * controlled by the hypervisor. - */ - pci_msi_ignore_mask = 1; } #else /* CONFIG_PCI_MSI */ diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 0e3d930bcb89..9d25d9373945 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/acpi.h> +#include <linux/cpufreq.h> +#include <linux/cpuidle.h> #include <linux/export.h> #include <linux/mm.h> @@ -123,8 +125,23 @@ static void __init pvh_arch_setup(void) { pvh_reserve_extra_memory(); - if (xen_initial_domain()) + if (xen_initial_domain()) { xen_add_preferred_consoles(); + + /* + * Disable usage of CPU idle and frequency drivers: when + * running as hardware domain the exposed native ACPI tables + * causes idle and/or frequency drivers to attach and + * malfunction. It's Xen the entity that controls the idle and + * frequency states. + * + * For unprivileged domains the exposed ACPI tables are + * fabricated and don't contain such data. + */ + disable_cpuidle(); + disable_cpufreq(); + WARN_ON(xen_set_default_idle()); + } } void __init xen_pvh_init(struct boot_params *boot_params) |